-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCARL.py
114 lines (95 loc) · 3.5 KB
/
CARL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import json, os, sys, random, re
ROOT_DIR = "/srv/CARL"
def getLeastUsed(links, excludeIdx):
least = []
for i, item in enumerate(links):
if i != excludeIdx:
least.append(len(item))
if len(least) == 0:
return random.randint(0,len(links)-1)
least = min(least)
leastIdx = []
for i, link in enumerate(links):
if len(link) == least:
leastIdx.append(i)
return random.choice(leastIdx)
splitter = re.compile("[^\w']+")
def spellcheckPhrase(p, phrases):
best = 0
bestIdx = 0
for i, phrase in enumerate(phrases):
inputWords = [x.lower() for x in splitter.split(p) if x]
phraseWords = [x.lower() for x in splitter.split(phrase) if x]
phraseLen = len(phraseWords)
lenFactor = 0
if phraseLen != 0:
lenFactor = 1.0 * len(inputWords) / phraseLen
if abs(lenFactor - 1) >= 0.2 and abs(len(inputWords) - phraseLen) >= 3:
continue
foundCount = 0
for word in inputWords:
if word in phraseWords:
foundCount += 1
wordsFactor = 0
if phraseLen != 0:
wordsFactor = 1.0 * foundCount / phraseLen
finalFactor = lenFactor * wordsFactor
if finalFactor > best:
best = finalFactor
bestIdx = i
return bestIdx, best
def answer(carlAsked, userAnswered, allowProfanity):
if allowProfanity:
channel = "E2"
else:
channel = "default"
from profanity_filter import ProfanityFilter
pf = ProfanityFilter()
storageFile=ROOT_DIR+"/channels/"+channel+".json"
if os.path.isfile(storageFile):
storage = json.load(open(storageFile, 'r'))
else:
storage = {
'phrases':[],
'links':[],
}
illegalChars = ('{', '}', '[', ']', '(', ')', '|', '\\', '<', '>', '/')
for illegalChar in illegalChars:
carlAsked = carlAsked.replace(illegalChar, "")
userAnswered = userAnswered.replace(illegalChar, "")
phrases = storage['phrases'] #a list of phrases
links = storage['links'] #a list of links to other phrases from each phrase
if len(userAnswered) == 0 or userAnswered[-1] not in ('.', '!', '?', '"', "'"):
userAnswered += '.'
if len(userAnswered) > 250: userAnswered = userAnswered[:250]
if carlAsked in phrases:
askIdx = phrases.index(carlAsked)
else:
askIdx = -1
futureAskIdx = -1
if userAnswered in phrases:
answerIdx = phrases.index(userAnswered)
if len(links[answerIdx]) > 0:
futureAskIdx = random.choice(links[answerIdx])
else:
futureAskIdx = getLeastUsed(links, answerIdx) #exclude answerIdx
if askIdx != -1:
links[askIdx].append(answerIdx)
else:
bestIdx, best = spellcheckPhrase(userAnswered, phrases)
if best > 0.6:
if len(links[bestIdx]) > 0:
futureAskIdx = random.choice(links[bestIdx])
else:
futureAskIdx = getLeastUsed(links, bestIdx) #exclude answerIdx
if askIdx != -1:
links[askIdx].append(bestIdx)
else:
futureAskIdx = getLeastUsed(links, bestIdx) #exclude answerIdx
if allowProfanity or pf.is_clean(userAnswered):
if askIdx != -1:
links[askIdx].append(len(phrases))
links.append([])
phrases.append(userAnswered)
json.dump(storage, open(storageFile, 'w'))
return phrases[futureAskIdx]