-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhmmdecode.py
95 lines (89 loc) · 2.91 KB
/
hmmdecode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import sys
import ast
import operator
class State(object):
def __init__(self, currentState, prevState, probability):
self.currentState = currentState
self.prevState = prevState
self.probability = probability
def containsNum(s):
return any(i.isdigit() for i in s)
hmmmodel = open('hmmmodel.txt', 'r')
transitionProbability = {}
wordTagProbability = {}
tp = 'Transition Probability'
ep = 'Emission Probability'
previousState = ''
tempMap = {}
for line in hmmmodel:
line = line.strip()
if line == ep:
break
else:
if line == tp:
pass
else:
tokens = line.split('$$$$$', 1)
previousState = tokens[0]
tempMap = ast.literal_eval(tokens[1])
transitionProbability[previousState] = tempMap
tempMap = {}
for line in hmmmodel:
line = line.strip()
tokens = line.rsplit('$$$$$', 1)
word = tokens[0]
wordMap = ast.literal_eval(tokens[1])
wordTagProbability[word] = wordMap
wordMap = {}
hmmmodel.close()
hmmoutput = open('hmmoutput.txt','w')
tag = ''
output = {}
inputFile = open(sys.argv[1])
backPointer = {}
probability = {}
for line in inputFile:
words = line.split()
states = []
startState = State('q0', None, 0.0)
states.append(startState)
previousState = startState
tempState = previousState.currentState
for word in words:
word = word.lower()
if wordTagProbability. has_key(word):
wordTag = wordTagProbability[word]
temp = {}
for tags in wordTag.keys():
value = wordTag[tags]
value = float(value) + previousState.probability + float(transitionProbability[previousState.currentState][tags])
temp[tags] = value
tag = max(temp.iteritems(), key=operator.itemgetter(1))
currentState = State(tag[0],previousState.currentState,tag[1])
states.append(currentState)
previousState = currentState
else:
if containsNum(word):
zzProbability = transitionProbability[tempState]['ZZ']
currentState = State('ZZ',previousState.currentState,zzProbability)
states.append(currentState)
previousState = currentState
else:
tempMap = transitionProbability[tempState]
tag = max(tempMap.iteritems(), key=operator.itemgetter(1))
prob = previousState.probability + float(tag[1])
currentState = State(tag[0],tempState,prob)
states.append(currentState)
previousState = currentState
index = 1
output = {}
for word in words:
output[word] = states[index].currentState
index += 1
line = ''
for word in words:
line = line + word+'/'+output[word]+' '
line = line.strip()
hmmoutput.write(line)
hmmoutput.write('\n')
hmmoutput.close()