-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathembedder.py
71 lines (63 loc) · 2.42 KB
/
embedder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Embedder (Glove wrapper)
# embeddings of two or more string longs are concatenated
# ===== imports =====
from glove import Glove
from urllib import urlopen
import numpy as np
import re
import json
# ===== definitions =====
class Embedder:
# attributes
gloveModel = None
length = 50
fixPatterns = False
# constructor
def __init__(self,path ='glove.6B.50d.txt', fixPatterns=False):
self.gloveModel = Glove.load_stanford(path)
self.length = int(re.findall('\.[0-9][0-9][0-9]?d',path)[0][1:-1])
self.fixPatterns = fixPatterns
#print('Done loading GloVe model')
# methods
def getVector(self, word):
if len(word.split()) == 1:
if not self.gloveModel.dictionary.has_key(word.lower()):
if self.fixPatterns:
word = self.__use_suggessions__(word)
if len(word.split()) == 1:
if not self.gloveModel.dictionary.has_key(word.lower()):
#print(word,'not found')
return np.zeros(self.length)
else:
return self.getVector(word)
else:
#print(word,'not found')
return np.zeros(self.length)
index = self.gloveModel.dictionary[word.lower()]
return self.gloveModel.word_vectors[index]
else : # do summation
result = np.array([])
for w in word.split():
v = self.getVector(w)
if len(result) == 0:
result = np.zeros(self.length)
all_zeros = not v.any()
if not all_zeros:
result = np.add(result,v)/2.
return result
def __use_suggessions__(self,phrase):
url = "http://suggestqueries.google.com/complete/search?output=firefox&q="
webpage = urlopen(url + phrase).read()
try:
jsonObj = json.loads(webpage)
except ValueError:
return phrase
if len(jsonObj[1]) == 0:
return phrase
#print(jsonObj[1][0])
return jsonObj[1][0]
# ===== main testing =====
if __name__ == "__main__":
embedder = Embedder('../glove.6B.50d.txt')
question = 'Hello yaser you are the man'
print(embedder.getVector(question))