-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathviterbi.py
62 lines (41 loc) · 1.26 KB
/
viterbi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import counter
sentence = "she wanted to race ."
words = sentence.split()
tagged_words = words
states = ['NOUN' , 'VERB' , 'ADJ', 'ADP', 'ADV', 'CONJ', 'DET', 'PRT', 'PRON', 'NUM', 'X', '$', '.', ',']
curr_state='$'
table=[]
def selectFirst(obj):
return obj[0]
counter1=0
for word in words:
observation = []
if curr_state=='$':
for state in states:
try:
observation.append((counter.probabilities['$', state, word], '$'))
except (KeyError):
observation.append((0.0, '$'))
else:
for toState in states:
counter2 = 0
currProbs=[]
for fromState in states:
try:
prob= counter.probabilities[fromState, toState, word]
#print fromState+ ' ' + toState + ' ' + word +' : '+ str(prob)
except (KeyError):
prob = 0.0
currProbs.append(table[counter1-1][counter2][0]*prob)
counter2 +=1
observation.append((max(currProbs), currProbs.index(max(currProbs))))
#print observation
curr_state = states[observation.index(max(observation, key=selectFirst))]
#print curr_state
table.append(observation)
counter1 += 1
index = table[-1].index(max(table[-1], key=selectFirst))
for i in range(len(table)-1, -1, -1):
tagged_words[i] += '-'+states[index]
index = table[i][index][1]
print tagged_words