-
Notifications
You must be signed in to change notification settings - Fork 0
/
topicGraph.py
152 lines (122 loc) · 4.69 KB
/
topicGraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from collections import defaultdict
import heapq
from operator import itemgetter
import wordsUtil
class topicGraph():
# Constructor
def __init__(self, iterates=5):
self.graph = defaultdict(dict)
self.nodes = {}
self.iterates = iterates
self.synonyms = {}
self.hypernym = {}
self.word_order = {}
# Get Nodes from graph
def getNodes(self, maxNodes = -1):
self.nodes = {}
nodes = self.graph.keys()
for i in nodes:
for j in self.graph[i]:
if i != j:
if i not in self.nodes:
self.nodes[i] = self.graph[i][j]
else:
self.nodes[i] += self.graph[i][j]
if maxNodes >= 1:
topTopics = heapq.nlargest(maxNodes, self.nodes.items(), key=itemgetter(1))
return dict(topTopics)
return self.nodes
# Print Graph
def printGraph(self):
print(dict(self.graph))
def findSynonym(self, word):
if word in self.graph:
return word, False
if word in self.synonyms:
return self.synonyms[word], True
synonyms = wordsUtil.getSynonyms(word)
for synonym in synonyms:
if synonym in self.graph:
print('Synonym Matched', word, '=>', synonym)
self.synonyms[word] = synonym
return self.synonyms[word], True
return word, False
def findHypernym(self, word):
if word in self.graph:
return word, False
if word in self.hypernym:
return self.hypernym[word], True
for node in self.graph:
if wordsUtil.isHypernym(word, node):
print('Hypernym Matched', word, '=>', node)
self.hypernym[word] = node
return self.hypernym[word], True
return word, False
# add an edge to graph
def addEdge(self, u, v, weight=1):
if u not in self.word_order:
self.word_order[u] = {}
if v not in self.word_order[u]:
self.word_order[u][v] = 1
else:
self.word_order[u][v] += 1
if v not in self.word_order:
self.word_order[v] = {}
if u not in self.word_order[v]:
self.word_order[v][u] = 0
u, uSynonymFound = self.findSynonym(u.lower())
v, vSynonymFound = self.findSynonym(v.lower())
# if not uSynonymFound:
# u, uHypernymFound = self.findHypernym(u.lower())
# if not vSynonymFound:
# v, uHypernymFound = self.findHypernym(v.lower())
self.graph[u][v] = weight
self.graph[v][u] = weight
# get weight of an edge
def getEdgeWeight(self, u, v):
if u not in self.graph or v not in self.graph[u]:
return 0
weight = self.graph[u][v]
return weight if weight else 0
# increment weight in an edge
def incrementEdgeWeight(self, u, v, weight):
current_weight = self.getEdgeWeight(u, v)
self.addEdge(u, v, current_weight + weight)
def isNeighbourNodes(self, u, v):
if u in self.graph and v in self.graph[u] and self.graph[u][v] > 0:
return True
if v in self.graph and u in self.graph[v] and self.graph[v][u] > 0:
return True
return False
# Cluster Simillar words
def clusterSimiilarWords(self, maxNodes=20):
for _ in range(self.iterates):
for i, _ in list(self.getNodes(maxNodes).items()):
for j in self.getNodes(maxNodes):
if i!=j:
if self.isNeighbourNodes(i, j):
if i in self.graph and j in self.graph[i] and self.graph[i][j] > 2:
self.mergeNodes(i, j)
# Merge 2 Nodes
def mergeNodes(self, u, v):
print(f'Merging Nodes "{u}" & "{v}"')
# if u not in self.graph[v] or v not in self.graph[u]:
# return
# Find Word Order
topic1 = u.split(' ')[0]
topic2 = v.split(' ')[-1]
if topic2 in self.word_order[topic1] and self.word_order[topic1][topic2] < self.word_order[topic2][topic1]:
u, v = v, u
if v in self.graph[u]:
del self.graph[u][v]
if u in self.graph[v]:
del self.graph[v][u]
for edgeU in self.graph[u]:
if edgeU in self.graph[v]:
self.graph[u][edgeU] += self.graph[v][edgeU]
del self.graph[v][edgeU]
for edgeV in self.graph[v]:
self.graph[u][edgeV] = self.graph[v][edgeV]
del self.graph[v]
self.graph[f'{u} {v}'] = self.graph[u]
del self.graph[u]