-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathchatbot.py
372 lines (318 loc) · 12.8 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''Trains a memory network on the bAbI dataset.
References:
- Jason Weston, Antoine Bordes, Sumit Chopra, Tomas Mikolov, Alexander M. Rush,
"Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks",
http://arxiv.org/abs/1502.05698
- Sainbayar Sukhbaatar, Arthur Szlam, Jason Weston, Rob Fergus,
"End-To-End Memory Networks",
http://arxiv.org/abs/1503.08895
Reaches 98.6% accuracy on task 'single_supporting_fact_10k' after 120 epochs.
Time per epoch: 3s on CPU (core i7).
Code by Eibriel, Siraj Raval and fchollet.
'''
import os
import sys
import random as rd
from keras.layers import add
from keras.layers import dot
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Input
from keras.layers import Dropout
from keras.layers import Permute
from keras.layers import Activation
from keras.layers import concatenate
from keras.models import Model
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from helpers import *
from telegram import telegram
try:
# Attempts to load Telegram configuration
from config import Config
except:
print ("Missing config.py, no Telegram support")
# Generate the dataset on the fly
# Commands to add an item
add_commands = [
'add',
'i would like',
'i want'
]
# Commands to remove an item
remove_commands = [
'remove',
'i dont want'
]
# Commands to change an item for another
change_commands = [
'change:for'
]
# List of ice cream flavors
flavors = [
'chocolate',
'lemon',
'cherry',
'coffee'
]
generated_dataset = []
# Ammount of stories to generate
# will be splitted by half on training and testing set
stories_count = 40000
for n in range(stories_count):
is_flavor = [False, False, False, False] # Holds flavor selection status
sentences = []
for n in range(rd.randint(1, 6)):
random_action = rd.randint(0, 2) # Selects a random action between add, remove or change
random_flavor = rd.randint(0, 3) # Selects a random flavor
random_flavor_b = rd.randint(0, 3) # Selects a random flavor
if random_action==0: #add
is_flavor[random_flavor] = True
text = "{} {} .".format(rd.choice(add_commands), flavors[random_flavor])
elif random_action==1: #remove
is_flavor[random_flavor] = False
text = "{} {} .".format(rd.choice(remove_commands), flavors[random_flavor])
elif random_action==2: #change
is_flavor[random_flavor] = False
is_flavor[random_flavor_b] = True
command_text = rd.choice(change_commands)
command_text = command_text.split(':')
text = "{} {} {} {} .".format(command_text[0], flavors[random_flavor], command_text[1], flavors[random_flavor_b])
sentences.append(text)
sentences = " ".join(sentences)
random_flavor = rd.randint(0, 3) # Select a random flavor for the question
question = "is {} in the order ?".format(flavors[random_flavor]) # Generates the question
answer = "yes" if is_flavor[random_flavor] else "no" # Generates the answer
generated_dataset.append( (sentences.split(" "), question.split(" "), answer) )
# The dataset is divided by half for training and testing
# Given the way the data is created the training and testing set
# might have some duplicated stories, so the validation is not accurated
split_idx = int(stories_count/2)
train_stories = generated_dataset[:split_idx]
test_stories = generated_dataset[split_idx:]
# Generates the vocabulary
vocab = set()
for story, q, answer in train_stories + test_stories:
vocab |= set(story + q + [answer])
vocab = sorted(vocab)
# Reserve 0 for masking via pad_sequences
vocab_size = len(vocab) + 1
story_maxlen = max(map(len, (x for x, _, _ in train_stories + test_stories)))
query_maxlen = max(map(len, (x for _, x, _ in train_stories + test_stories)))
print('-')
print('Vocab size:', vocab_size, 'unique words')
print('Story max length:', story_maxlen, 'words')
print('Query max length:', query_maxlen, 'words')
print('Number of training stories:', len(train_stories))
print('Number of test stories:', len(test_stories))
print('-')
print('Here\'s what a "story" tuple looks like (input, query, answer):')
print(train_stories[0])
print('-')
print('Vectorizing the word sequences...')
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
inputs_train, queries_train, answers_train = vectorize_stories(train_stories,
word_idx,
story_maxlen,
query_maxlen)
inputs_test, queries_test, answers_test = vectorize_stories(test_stories,
word_idx,
story_maxlen,
query_maxlen)
print('-')
print('inputs: integer tensor of shape (samples, max_length)')
print('inputs_train shape:', inputs_train.shape)
print('inputs_test shape:', inputs_test.shape)
print('-')
print('queries: integer tensor of shape (samples, max_length)')
print('queries_train shape:', queries_train.shape)
print('queries_test shape:', queries_test.shape)
print('-')
print('answers: binary (1 or 0) tensor of shape (samples, vocab_size)')
print('answers_train shape:', answers_train.shape)
print('answers_test shape:', answers_test.shape)
print('-')
print('Compiling...')
#story_maxlen = 1
embed_dim=64
keep_prob = 0.3
# placeholders
input_sequence = Input((story_maxlen,))
question = Input((query_maxlen,))
# encoders
# embed the input sequence into a sequence of vectors
input_encoder_m = Sequential()
input_encoder_m.add(Embedding(input_dim=vocab_size,
output_dim=embed_dim))
input_encoder_m.add(Dropout(keep_prob))
# output: (samples, story_maxlen, embedding_dim)
# embed the input into a sequence of vectors of size query_maxlen
input_encoder_c = Sequential()
input_encoder_c.add(Embedding(input_dim=vocab_size,
output_dim=query_maxlen))
input_encoder_c.add(Dropout(keep_prob))
# output: (samples, story_maxlen, query_maxlen)
# embed the question into a sequence of vectors
question_encoder = Sequential()
question_encoder.add(Embedding(input_dim=vocab_size,
output_dim=embed_dim,
input_length=query_maxlen))
question_encoder.add(Dropout(keep_prob))
# output: (samples, query_maxlen, embedding_dim)
# encode input sequence and questions (which are indices)
# to sequences of dense vectors
input_encoded_m = input_encoder_m(input_sequence)
input_encoded_c = input_encoder_c(input_sequence)
question_encoded = question_encoder(question)
# compute a 'match' between the first input vector sequence
# and the question vector sequence
# shape: `(samples, story_maxlen, query_maxlen)`
match = dot([input_encoded_m, question_encoded], axes=(2, 2))
match = Activation('softmax')(match)
# add the match matrix with the second input vector sequence
response = add([match, input_encoded_c]) # (samples, story_maxlen, query_maxlen)
response = Permute((2, 1))(response) # (samples, query_maxlen, story_maxlen)
# concatenate the match matrix with the question vector sequence
answer = concatenate([response, question_encoded])
# the original paper uses a matrix multiplication for this reduction step.
# we choose to use a RNN instead.
answer = LSTM(32)(answer) # (samples, 32)
# one regularization layer -- more would probably be needed.
answer = Dropout(keep_prob)(answer)
answer = Dense(vocab_size)(answer) # (samples, vocab_size)
# we output a probability distribution over the vocabulary
answer = Activation('softmax')(answer)
# build the final model
model = Model([input_sequence, question], answer)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
metrics=['accuracy'])
model_filepath = 'model.hdf5'
if not os.path.isfile(model_filepath):
# train
model.fit([inputs_train, queries_train], answers_train,
batch_size=32*8,
epochs=120,
validation_data=([inputs_test, queries_test], answers_test))
model.save_weights(model_filepath)
else:
model.load_weights(model_filepath)
# Returns an order, given a story
def order_from_story(story):
story_int = [0 for n in range(30-len(story))]
story_int = story_int + [word_idx[word] for word in story]
order = []
for f in flavors:
query = ["is", f, "in", "the", "order", "?"]
query_int = [word_idx[word] for word in query]
prediction = model.predict([np.array(story_int).reshape(1, -1), np.array(query_int).reshape(1, -1)])
if vocab[np.argmax(prediction)-1]=="yes":
order.append(f)
order = ", ".join(order)
return "Your order: *{}* 🍦\n\n(restarting order)\n\n".format(order)
# Sends a message to Telegram
def send_to_telegram(chat_id, answer):
msg = {
'chat_id': chat_id,
'parse_mode': 'Markdown',
'text': answer,
}
r = telegram_conection.send_to_bot('sendMessage', data = msg)
# Checks if a list contains only known words from the vocabulary
def known_words(sentence):
for word in sentence:
if not word in word_idx or word in ["order", "yes", "no", "is", "in", "the"]:
return False
return True
welcome_text = """
*Welcome to the End-to-End Ice Cream Truck, please place your order.*
I understand the following commands:
*add flavor* / *i would like flavor* / *i want flavor*
To select a new flavor
*remove flavor* / *i dont want flavor*
To remove a selected flavor
*change flavor for flavor*
To change one flavor to another
*done* - To print your current order
*quit* - To exit
Today flavors: _chocolate_ - _lemon_ - _cherry_ - _coffee_
"""
# Select the proper UI (cli or telegram)
if "telegram" in sys.argv:
ui="telegram"
else:
ui="cli"
# If UI is cli
if ui=="cli":
input_text = ""
print (welcome_text.replace("*", "").replace("_", ""))
while 1:
story = [] # Restart story
while 1:
input_text = input(">") # Read the inpur
if input_text in ["done", "quit", "order"]: # If is a stop command
break
sentence = input_text.split(" ")
if sentence[-1] != ".":
sentence.append(".")
if not known_words(sentence): # If contains unknown words
print ("Unknown command")
continue
story = story + sentence
if input_text == "quit":
break
if len(story) == 0:
continue
# Print order
print ("\n")
print (order_from_story(story).replace("*", "").replace("_", ""))
# if ui is Telegram
elif ui=="telegram":
print ("\nListening for Telegram messages")
# Configure Telegram connection
telegram_conection = telegram("eibriel_icecream_bot", Config.telegram_token, "8979")
chat_history = {} # Holds the story of the Telegram users
while 1:
telegram_conection.open_session()
r = telegram_conection.get_update() # Listen for new messages
if not r:
continue # If no messages continue loop
r_json = r.json()
telegram_conection.close_session()
for result in r_json["result"]:
answer = ""
if not ("message" in result and "text" in result["message"]):
continue # Sanity check on the message
chat_id = result["message"]["chat"]["id"] # Get user id
sentence = result["message"]["text"].lower() # Get input text
print (sentence)
if sentence == "/restart":
# If restart command, empty user story
chat_history[chat_id] = []
send_to_telegram(chat_id, "Order restarted")
continue
if sentence in ["done", "quit", "order"]:
# If quit command print order
if chat_id not in chat_history:
continue
if len(chat_history[chat_id]) == 0:
continue
answer = order_from_story(chat_history[chat_id])
send_to_telegram(chat_id, answer)
chat_history[chat_id] = []
continue
# Input text to list, the model expect period
# at the end of a sentence
sentence = sentence.split(" ")
if sentence[-1] != ".":
sentence.append(".")
# If the sentence includes unknown words abort
if not known_words(sentence):
send_to_telegram(chat_id, welcome_text)
continue
# Add the new sentence to the story
if not chat_id in chat_history:
chat_history[chat_id] = []
chat_history[chat_id] += sentence
send_to_telegram(chat_id, "Ok!")