Skip to content

Commit

Permalink
modify implicit feedback to adapt to different file type
Browse files Browse the repository at this point in the history
  • Loading branch information
Ykid committed Aug 2, 2013
1 parent 41c7a18 commit 339145b
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 73 deletions.
4 changes: 2 additions & 2 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#['rmt','FM','RelatedMovieTag',['2']]

#['basicSVD','SVD','Basic',[]]
#['ImplicitFeedbackSVD','SVD','ImplicitFeedback',[]]
['NeighborhoodMovieTag', 'SVD' , 'Neighborhood' , ['MovieTag']]
['ImplicitFeedbackSVD','SVD','ImplicitFeedback',[]]
#['NeighborhoodMovieTag', 'SVD' , 'Neighborhood' , ['MovieTag']]
]

# Defining models:
Expand Down
92 changes: 80 additions & 12 deletions utils/ImplicitFeedbackFunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,36 @@
the output format: rate \t number of user group \t number of user implicit feedback \t fid1:fvalue1, fid2:fvalue2 ... \n
'''

def reIndex_Implicit(fin):
print("Reindexing Origin Data Set and Building the Correspondence Dics")
fi = open( fin, 'r' ) #training set
#extract from input file
def reIndex_Implicit(ftrain,fCV,ftest,ftrainOut,fCVOut,ftestOut):
print("Reindexing Data Sets and Building the Correspondence Dics")
bootTrainFile = open(ftrain, 'r')
bootCVFile = open(fCV , 'r')
bootTestFile = open(ftest , 'r')
tmpTrainFile = open(ftrainOut, 'w')
tmpTestFile = open(ftestOut, 'w')
tmpCVFile = open(fCVOut, 'w')

############# Write tmp file reindexed ###############3
trainLines = bootTrainFile.readlines()
CVLines = bootCVFile.readlines()
testLines = bootTestFile.readlines()

fullInput = []
fullInput.append(trainLines)
fullInput.append(CVLines)
fullInput.append(testLines)

uidDic={}
iidDic={}
newuid=1
newiid=1
ctr=0 # is the counter of the total number.
sum=0.0

for line in fi:
arr = line.split()
#Build dictionary

for line in trainLines:
arr = line.rsplit('\t')
uid = int(arr[0].strip())
iid = int(arr[1].strip())
rating = int(float(arr[2].strip()))
Expand All @@ -41,10 +58,61 @@ def reIndex_Implicit(fin):
if iid not in iidDic:
iidDic[iid]=newiid
newiid+=1

fi.close()
#calculate different parameter.

for line in CVLines:
arr = line.rsplit('\t')
uid = int(arr[0].strip())
iid = int(arr[1].strip())
#this part for reindexing the user ID
if uid not in uidDic:
uidDic[uid]=newuid
newuid+=1
#this part for reindexing the item ID
if iid not in iidDic:
iidDic[iid]=newiid
newiid+=1

for line in testLines:
arr = line.rsplit('\t')
uid = int(arr[0].strip())
iid = int(arr[1].strip())
#this part for reindexing the user ID
if uid not in uidDic:
uidDic[uid]=newuid
newuid+=1
#this part for reindexing the item ID
if iid not in iidDic:
iidDic[iid]=newiid
newiid+=1

#Re-index
for line in trainLines:
arr = line.split()
uid = int(arr[0].strip())
iid = int(arr[1].strip())
rating = int(float(arr[2].strip()))
tmpTrainFile.write('%d\t%d\t%d\n' %(uidDic[uid],iidDic[iid],rating))
for line in CVLines:
arr = line.split()
uid = int(arr[0].strip())
iid = int(arr[1].strip())
rating = int(float(arr[2].strip()))
tmpCVFile.write('%d\t%d\t%d\n' %(uidDic[uid],iidDic[iid],rating))
for line in testLines:
arr = line.split()
uid = int(arr[0].strip())
iid = int(arr[1].strip())
rating = int(float(arr[2].strip()))
tmpTestFile.write('%d\t%d\t%d\n' %(uidDic[uid],iidDic[iid],rating))

avg=sum/ctr
#Close files
bootTrainFile.close()
bootTestFile.close()
bootCVFile.close()
tmpTrainFile.close()
tmpTestFile.close()
tmpCVFile.close()
print("Finished")
return(uidDic,iidDic,avg)

Expand Down Expand Up @@ -77,8 +145,8 @@ def userfeedback(fname):
feedback = {}
for line in fi:
attr = line.strip().split('\t')
uid = int(attr[0])-1
iid = int(attr[1])-1
uid = int(attr[0])-1#uid actually start from 0
iid = int(attr[1])-1#mid actually start from 0
if uid in feedback:
feedback[uid].append(iid)
else:
Expand All @@ -94,7 +162,7 @@ def usergroup(fname):
lastuid = -1
for line in fi:
attr = line.strip().split('\t')
uid = int(attr[0])-1
uid = int(attr[0])-1 #uid actually start from 0
if uid in groupnum:
groupnum[uid] += 1
else:
Expand Down
71 changes: 12 additions & 59 deletions utils/SVDModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,13 @@ def __init__(self,configModel,utils,config,strTrial):


### Neighborhood Model Files###
if self.misc[0] == "MovieTag":
self.TagFilePath = self.movieTagPath
self.TagFileReindexPath = utils.MODEL_TMP_PATH + self.tag + \
'_' + self.misc[0] + '_t' + strTrial
self.ShareTagPath = utils.MODEL_TMP_PATH + self.tag + \
'_share_' + self.misc[0] + '_t' + strTrial
if len(self.misc) > 0:
if self.misc[0] == "MovieTag":
self.TagFilePath = self.movieTagPath
self.TagFileReindexPath = utils.MODEL_TMP_PATH + self.tag + \
'_' + self.misc[0] + '_t' + strTrial
self.ShareTagPath = utils.MODEL_TMP_PATH + self.tag + \
'_share_' + self.misc[0] + '_t' + strTrial
### End Neighborhood Model Files###
### End Baidu Specific ###

Expand All @@ -61,6 +62,10 @@ def __init__(self,configModel,utils,config,strTrial):
self.SVDFeatureSVDPPRandOrder = utils.SVDFEATURE_SVDPP_RANDORDER
self.formatType = 0
self.numUserFeedback = 0
self.numUser= 0
self.numMovie= 0
self.numGlobal = 0
self.avg= 0
self.originDataSet = utils.ORIGINAL_DATA_PATH
# 0 is the default value

Expand Down Expand Up @@ -271,17 +276,10 @@ def basicConvert(self,fin,fout):

def setupImplicitFeatures(self):
import os

#reindex the training files and build two dicts
Udic,ItemDic,avg=IFF.reIndex_Implicit(self.originDataSet)
Udic,ItemDic,avg=IFF.reIndex_Implicit(self.bootTrain, self.bootCV, self.bootTest, self.tmpTrain, self.tmpCV, self.tmpTest)
#reindex the history
IFF.translate(self.userHistoryPath, self.userHistoryReindexPath, Udic, ItemDic)
#reindex CV file
IFF.translate(self.bootCV, self.tmpCV, Udic, ItemDic)
#reindex Testfile
IFF.translate(self.bootTest, self.tmpTest, Udic, ItemDic)
#reindex the training
IFF.translate(self.bootTrain,self.tmpTrain,Udic,ItemDic)

#make group training files
os.system(self.SVDFeatureSVDPPRandOrder +' '+ self.tmpTrain + ' ' + self.tmpLineOrder)
Expand Down Expand Up @@ -335,51 +333,6 @@ def fixRun(self):
self.prependUserMovieToPredictions(self.bootCV,self.predCVTmp,self.predCV)
self.prependUserMovieToPredictions(self.bootTest,self.predTestTmp,self.predTest)

def setupImplicitFeatures(self):
import os
#reindex the training files and build two dicts
Udic,ItemDic,avg=IFF.reIndex_Implicit(self.originDataSet)
#reindex the history
IFF.translate(self.userHistoryPath, self.userHistoryReindexPath, Udic, ItemDic)
#reindex CV file
IFF.translate(self.bootCV, self.tmpCV, Udic, ItemDic)
#reindex Testfile
IFF.translate(self.bootTest, self.tmpTest, Udic, ItemDic)
#reindex the training files
IFF.translate(self.bootTrain,self.tmpTrain,Udic,ItemDic)

#make group training files
os.system(self.SVDFeatureSVDPPRandOrder +' '+ self.tmpTrain + ' ' + self.tmpLineOrder)
os.system(self.SVDFeatureLineReorder + ' ' + self.tmpTrain + ' ' + self.tmpLineOrder + ' ' + self.tmpGpTrain)

#make group training files of the CV set
os.system(self.SVDFeatureSVDPPRandOrder +' '+ self.tmpCV + \
' '+ self.tmpLineOrder)
os.system(self.SVDFeatureLineReorder + ' ' + self.tmpCV + \
' ' + self.tmpLineOrder + ' ' + self.tmpGpCV)

#make basic feature files
self.basicConvert(self.tmpGpTrain,self.featTrain)
self.basicConvert(self.tmpGpCV, self.featCV)
self.basicConvert(self.tmpTest, self.featTest)

#make implicit feature files
IFF.mkImplicitFeatureFile(self.userHistoryReindexPath,self.tmpGpTrain,self.ImfeatTrain)
IFF.mkImplicitFeatureFile(self.userHistoryReindexPath,self.tmpTest,self.ImfeatTest)
IFF.mkImplicitFeatureFile(self.userHistoryReindexPath,self.tmpGpCV,self.ImfeatCV)


#set different parameters
self.numUser=len(Udic)
self.numMovie=len(ItemDic)
self.avg=avg
self.numGlobal = 0
self.activeType = '0'
self.formatType = 1
self.numUserFeedback = len(ItemDic)




def NeighborhoodSetup(self):
#second
Expand Down

0 comments on commit 339145b

Please sign in to comment.