Skip to content

Commit

Permalink
Semantic Similarity Files
Browse files Browse the repository at this point in the history
  • Loading branch information
sayanmutd committed Apr 1, 2017
1 parent ac83708 commit 294e372
Show file tree
Hide file tree
Showing 14 changed files with 2,528 additions and 0 deletions.
50 changes: 50 additions & 0 deletions BrownIC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import nltk
from nltk.corpus import wordnet as wn
from nltk.corpus import wordnet_ic
import math,csv
import scipy

brown_ic = wordnet_ic.ic('ic-brown.dat')

def sim_lin(syns1,syns2):
maxSim=None
for s1 in syns1:
for s2 in syns2:
sim=s1.lin_similarity(s2,brown_ic)
if maxSim==None or maxSim<sim:
maxSim=sim
return maxSim

def sim_resnik(syns1,syns2):
maxSim=None
for s1 in syns1:
for s2 in syns2:
sim=s1.res_similarity(s2,brown_ic)
if maxSim==None or maxSim<sim:
maxSim=sim
return maxSim

train = csv.reader(open("rg.csv",'rb'),delimiter=';')

word1=[]
word2=[]
hr=[]
LinS=[]

for row in train:
word1.append(row[0])
word2.append(row[1])
hr.append(row[2])

#f=open("BrownIC.txt","w")

for i in range(1,len(hr)):
a=wn.synsets(word1[i],pos="n")
b=wn.synsets(word2[i],pos="n")
LinS.append(sim_resnik(a,b))
#print sim_lin(a,b)
#f.write("%s\t%s\t%.5s\t%.5s\t%.5s\n"%(word1[i],word2[i],sim_lin(a,b),sim_resnik(a,b),hr[i]))

#f.close()
hr.pop(0)
print scipy.stats.spearmanr(LinS,hr)
Loading

0 comments on commit 294e372

Please sign in to comment.