forked from recommenders-team/recommenders
-
Notifications
You must be signed in to change notification settings - Fork 0
/
python_utils.py
103 lines (70 loc) · 3.09 KB
/
python_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import logging
import numpy as np
from scipy import sparse
logger = logging.getLogger()
def exponential_decay(value, max_val, half_life):
"""Compute decay factor for a given value based on an exponential decay.
Values greater than `max_val` will be set to 1.
Args:
value (numeric): value to calculate decay factor
max_val (numeric): value at which decay factor will be 1
half_life (numeric): value at which decay factor will be 0.5
Returns:
float: decay factor
"""
return np.minimum(1.0, np.power(0.5, (max_val - value) / half_life))
def jaccard(cooccurrence):
"""Helper method to calculate the Jaccard similarity of a matrix of co-occurrences.
Args:
cooccurrence (np.array): the symmetric matrix of co-occurrences of items.
Returns:
np.array: The matrix of Jaccard similarities between any two items.
"""
diag = cooccurrence.diagonal()
diag_rows = np.expand_dims(diag, axis=0)
diag_cols = np.expand_dims(diag, axis=1)
with np.errstate(invalid="ignore", divide="ignore"):
result = cooccurrence / (diag_rows + diag_cols - cooccurrence)
return np.array(result)
def lift(cooccurrence):
"""Helper method to calculate the Lift of a matrix of co-occurrences.
Args:
cooccurrence (np.array): the symmetric matrix of co-occurrences of items.
Returns:
np.array: The matrix of Lifts between any two items.
"""
diag = cooccurrence.diagonal()
diag_rows = np.expand_dims(diag, axis=0)
diag_cols = np.expand_dims(diag, axis=1)
with np.errstate(invalid="ignore", divide="ignore"):
result = cooccurrence / (diag_rows * diag_cols)
return np.array(result)
def get_top_k_scored_items(scores, top_k, sort_top_k=False):
"""Extract top K items from a matrix of scores for each user-item pair, optionally sort results per user.
Args:
scores (np.array): score matrix (users x items).
top_k (int): number of top items to recommend.
sort_top_k (bool): flag to sort top k results.
Returns:
np.array, np.array: indices into score matrix for each users top items, scores corresponding to top items.
"""
# ensure we're working with a dense ndarray
if isinstance(scores, sparse.spmatrix):
scores = scores.todense()
if scores.shape[1] < top_k:
logger.warning(
"Number of items is less than top_k, limiting top_k to number of items"
)
k = min(top_k, scores.shape[1])
test_user_idx = np.arange(scores.shape[0])[:, None]
# get top K items and scores
# this determines the un-ordered top-k item indices for each user
top_items = np.argpartition(scores, -k, axis=1)[:, -k:]
top_scores = scores[test_user_idx, top_items]
if sort_top_k:
sort_ind = np.argsort(-top_scores)
top_items = top_items[test_user_idx, sort_ind]
top_scores = top_scores[test_user_idx, sort_ind]
return np.array(top_items), np.array(top_scores)