Create Your Own Chatbot
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
import random
import json
import pickle
import numpy as np
import string
import timeit
import csv
import nltk
from nltk.stem import WordNetLemmatizer
class AIChatbot:
def clean_up_sentence(self, sentence):
lemmatizer = WordNetLemmatizer()
sentence_words = nltk.word_tokenize(sentence)
sentence_words = [lemmatizer.lemmatize(word) for word in sentence_words]
return sentence_word
def train_chat(self, json_file_path, tfidf_vectorizer_pikle_path , tfidf_matrix_train_pikle_path):
questions = []
with open(json_file_path,'r') as data:
for line in csv.reader(data):
questions.append(line[0])
start = timeit.default_timer()
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix_train = tfidf_vectorizer.fit_transform(questions)
y = tfidf_matrix_train.toarray()
stop = timeit.default_timer()
print ("training time took was : ")
print (stop - start)
f = open(tfidf_vectorizer_pikle_path, 'wb')
pickle.dump(tfidf_vectorizer, f)
f.close()
f = open(tfidf_matrix_train_pikle_path, 'wb')
pickle.dump(tfidf_matrix_train, f)
f.close()
return tfidf_vectorizer , tfidf_matrix_train
def talk_to_cb_primary(self, message, minimum_score, tfidf_vectorizer_pikle_path ,tfidf_matrix_train_pikle_path):
json_file_path = "/root/chatbot/path/QA-WA.csv"
tfidf_vectorizer_pikle_path = "/root/chatbot/contents/ai_vectorizer.pickle"
tfidf_matrix_train_pikle_path ="/root/chatbot/contents/ai_matrix_train.pickle"
test_set = (message, "")
try:
f = open(tfidf_vectorizer_pikle_path, 'rb')
tfidf_vectorizer = pickle.load(f)
f.close()
f = open(tfidf_matrix_train_pikle_path, 'rb')
tfidf_matrix_train = pickle.load(f)
f.close()
except:
# ---------------to train------------------#
tfidf_vectorizer , tfidf_matrix_train = self.train_chat(json_file_path , tfidf_vectorizer_pikle_path , tfidf_matrix_train_pikle_path)
tfidf_matrix_test = tfidf_vectorizer.transform(test_set).toarray()
cosine = cosine_similarity(tfidf_matrix_test, tfidf_matrix_train)
max1 = cosine.max()
response_index = 0
if (max1 > minimum_score):
row1 = cosine[0]
response_index = np.where(row1 == max1)
count = 0
with open(json_file_path, "r") as data :
for line in csv.reader(data, delimiter='"'):
if count == response_index[0][0]:
# print(line[0], line[1])
return line[1]
else:
count = count + 1
else:
return "If you want to connect to our agent, Please reply with Y or Yes."
count = 0
with open(json_file_path, "r") as data :
for line in csv.reader(data, delimiter='"'):
if count == response_index[0][0]:
return line[1]
else:
count = count + 1
def previous_chats(self, query):
minimum_score = 0.55
tfidf_vectorizer_pikle_path = "aii_previous_vectorizer.pickle"
tfidf_matrix_train_path = "aii_previous_matrix_train.pickle"
query_response= self.talk_to_cb_primary(query , minimum_score , tfidf_vectorizer_pikle_path , tfidf_matrix_train_path)
return query_response
def runbot(self, msg):
return self.previous_chats(msg)