diff --git a/BrownIC.py b/BrownIC.py new file mode 100644 index 0000000..2c61fb4 --- /dev/null +++ b/BrownIC.py @@ -0,0 +1,50 @@ +import nltk +from nltk.corpus import wordnet as wn +from nltk.corpus import wordnet_ic +import math,csv +import scipy + +brown_ic = wordnet_ic.ic('ic-brown.dat') + +def sim_lin(syns1,syns2): + maxSim=None + for s1 in syns1: + for s2 in syns2: + sim=s1.lin_similarity(s2,brown_ic) + if maxSim==None or maxSimdpt: + dpt=len(hypernym_paths(i1)) + item=i1 + it1=i + it2=j + return item,a[it1],b[it2] + +def IC(a): + if len(hypernym_paths(a))==0: + return (-1)*math.log(0.9999866) + return (-1)*math.log((float(len(Hyponym.leaf_nodes(a))/len(hypernym_paths(a)))+1)/74898) + +def Prob(a): + if len(hypernym_paths(a))==0: + return math.exp(math.log(0.9999866)) + return math.exp(math.log((float(len(Hyponym.leaf_nodes(a))/len(hypernym_paths(a)))+1)/74898)) + +def Lin_Sim(item,it1,it2): + return 2*IC(item)/(IC(it1)+IC(it2)) + +def Res_Sim(item): + return IC(item) + +train = csv.reader(open("mc.csv",'rb'),delimiter=';') + +word1=[] +word2=[] +hr=[] +LinS=[] + +for row in train: + word1.append(row[0]) + word2.append(row[1]) + hr.append(row[2]) + +#f=open("DavidModel.txt","w") + +for i in range(1,len(hr)): + a=wn.synsets(word1[i]) + b=wn.synsets(word2[i]) + [item,it1,it2]=LCS(a,b) + LinS.append(Lin_Sim(item,it1,it2)*(1-Prob(item))) + #print Prob(item) + #f.write("%s\t%s\t%.5s\t%.5s\t%.5s\n"%(word1[i],word2[i],Lin_Sim(item,it1,it2),Res_Sim(item),hr[i])) + +#f.close() +hr.pop(0) +print scipy.stats.spearmanr(LinS,hr) diff --git a/GenesisIC.py b/GenesisIC.py new file mode 100644 index 0000000..fafc002 --- /dev/null +++ b/GenesisIC.py @@ -0,0 +1,50 @@ +import nltk +from nltk.corpus import wordnet as wn +from nltk.corpus import genesis +import math,csv +import scipy + +genesis_ic=wn.ic(genesis, False, 0.0) + +def sim_lin(syns1,syns2): + maxSim=None + for s1 in syns1: + for s2 in syns2: + sim=s1.lin_similarity(s2,genesis_ic) + if maxSim==None or maxSimdpt: + dpt=len(hypernym_paths(i1)) + item=i1 + it1=i + it2=j + return item,a[it1],b[it2] + +def IC(a): + if (Hyponym.hyponym_paths(a))==[]: + return 1-(math.log10(1)/math.log10(node_max)) + return 1-(math.log10(len(Hyponym.hyponym_paths(a)))/math.log10(node_max)) + +def Prob(a): + if (Hyponym.hyponym_paths(a))==[]: + return 1-(math.log10(1)/math.log10(node_max)) + return (math.log10(len(Hyponym.hyponym_paths(a))+1)/math.log10(node_max+1)) + """(1/(1-(math.log10(len(Hyponym.hyponym_paths(a))+2)/math.log10(node_max+1)))) + """ + +def Lin_Sim(item,it1,it2): + return 2*IC(item)/(IC(it1)+IC(it2)) + +def Res_Sim(item): + return IC(item) + +train = csv.reader(open("rg.csv",'rb'),delimiter=';') + +word1=[] +word2=[] +hr=[] +LinS=[] + +for row in train: + word1.append(row[0]) + word2.append(row[1]) + hr.append(row[2]) + +#f=open("NunoModel.txt","w") + +for i in range(1,len(hr)): + a=wn.synsets(word1[i]) + b=wn.synsets(word2[i]) + [item,it1,it2]=LCS(a,b) + LinS.append(Res_Sim(item)) + #print Prob(item),Lin_Sim(item,it1,it2) + #f.write("%s\t%s\t%.5s\t%.5s\t%.5s\n"%(word1[i],word2[i],Lin_Sim(item,it1,it2),Res_Sim(item),hr[i])) + +#f.close() +hr.pop(0) +#print len(LinS), len(hr1) +print scipy.stats.spearmanr(LinS,hr) diff --git a/SemcorIC.py b/SemcorIC.py new file mode 100644 index 0000000..729d79d --- /dev/null +++ b/SemcorIC.py @@ -0,0 +1,49 @@ +import nltk +from nltk.corpus import wordnet as wn +from nltk.corpus import wordnet_ic +import math,csv +import scipy + +semcor_ic = wordnet_ic.ic('ic-semcor.dat') + +def sim_lin(syns1,syns2): + maxSim=None + for s1 in syns1: + for s2 in syns2: + sim=s1.lin_similarity(s2,semcor_ic) + if maxSim==None or maxSim