-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcode
126 lines (119 loc) · 5.26 KB
/
code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# -*-coding=utf-8 -*-
import sys
import math from texttable import Texttable
#计算余弦距离
def getCosDist(user1, user2):
sum_x = 0.0
sum_y = 0.0
sum_xy = 0.0
for key1 in user1:
for key2 in user2:
if key1[0] == key2[0]:
sum_x += key1[1] * key1[1]
sum_y += key2[1] * key2[1]
sum_xy += key1[1] * key2[1]
if sum_xy == 0.0:
return 0
demo = math.sqrt(sum_x * sum_y)
return sum_xy / demo
#读取文件,读取以行为单位,每一行是列表里的一个元素
def readFile(filename):
contents = []
f = open(filename, "r")
contents = f.readlines()
f.close()
return contents
#数据格式化为二维数组
def getRatingInfo(ratings):
rates = []
for line in ratings:
rate = line.split("\t")
rates.append([int(rate[0]), int(rate[1]), int(rate[2])])
return rates
#生成用户评分数据结构
def getUserScoreDataStructure(rates):
#userDict[2]=[(1,5),(4,2)].... 表示用户2对电影1的评分是5,对电影4的评分是2
userDict = {}
itemUser = {}
for k in rates:
user_rank = (k[1], k[2])
if k[0] in userDict:
userDict[k[0]].append(user_rank)
else:
userDict[k[0]] = [user_rank]
if k[1] in itemUser:
itemUser[k[1]].append(k[0])
else:
itemUser[k[1]] = [k[0]]
return userDict, itemUser
#计算与指定用户最相近的邻居
def getNearestNeighbor(userId, userDict, itemUser):
neighbors = []
for item in userDict[userId]:
for neighbor in itemUser[item[0]]:
if neighbor != userId and neighbor not in neighbors:
neighbors.append(neighbor)
neighbors_dist = []
for neighbor in neighbors:
dist = getCosDist(userDict[userId], userDict[neighbor])
neighbors_dist.append([dist, neighbor])
neighbors_dist.sort(reverse = True)
return neighbors_dist
#使用UserFC进行推荐,输入:文件名,用户ID,邻居数量
def recommendByUserFC(filename, userId, k = 5):
#读取文件
contents = readFile(filename)
#文件格式数据转化为二维数组
rates = getRatingInfo(contents)
#格式化成字典数据
userDict, itemUser = getUserScoreDataStructure(rates)
#找邻居
neighbors = getNearestNeighbor(userId, userDict, itemUser)[:5]
#建立推荐字典
recommand_dict = {}
for neighbor in neighbors:
neighbor_user_id = neighbor[1]
movies = userDict[neighbor_user_id]
for movie in movies:
if movie[0] not in recommand_dict:
recommand_dict[movie[0]] = neighbor[0]
else:
recommand_dict[movie[0]] += neighbor[0]
#建立推荐列表
recommand_list = []
for key in recommand_dict:
recommand_list.append([recommand_dict[key], key])
recommand_list.sort(reverse = True)
user_movies = [k[0] for k in userDict[userId]]
return [k[1] for k in recommand_list], user_movies, itemUser, neighbors
#获取电影的列表
def getMovieList(filename):
contents = readFile(filename)
movies_info = {}
for movie in contents:
single_info = movie.split("|")
movies_info[int(single_info[0])] = single_info[1:]
return movies_info
#从这里开始运行
if __name__ == '__main__':
reload(sys)
sys.setdefaultencoding('utf-8')
#获取所有电影的列表
movies = getMovieList("u.item")
recommend_list, user_movie, items_movie, neighbors = recommendByUserFC("u.data", 50, 80)
neighbors_id=[ i[1] for i in neighbors]
table = Texttable()
table.set_deco(Texttable.HEADER)
table.set_cols_dtype(['t', 't', 't'])
table.set_cols_align(["l", "l", "l"])
rows=[]
rows.append([u"movie name",u"release", u"from userid"])
for movie_id in recommend_list[:20]:
from_user=[]
for user_id in items_movie[movie_id]:
if user_id in neighbors_id:
from_user.append(user_id)
rows.append([movies[movie_id][0],movies[movie_id][1],""])
table.add_rows(rows)
print table.draw()