-
Notifications
You must be signed in to change notification settings - Fork 2
/
scoredict.py
66 lines (51 loc) · 1.75 KB
/
scoredict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from collections import defaultdict
from math import log
from settings import settings
# This module exists because my machine can handle a dict of 100Mil ints,
# but not tuples. Sticking this in couchdb will make things orders of
# magnitude slower.
#
# The scores dict contains int bitfields:
# bit 0-13: mention_score
# bit 14-27: rfriends_score
# bit 28-29: state - NEW, LOOKUP, DONE, FAILED
BUCKETS = 29
MAX_SCORE = 1<<14
STATE_FIELD = MAX_SCORE*MAX_SCORE
NEW=0
LOOKUP=1
DONE=2
FAILED=3
def log_score(rfs, ats, weight=None):
"log(weighted avg of scores,sqrt(2))"
if weight is None:
weight = settings.mention_weight
avg = (1-weight)*rfs + weight*ats
if avg<1:
return 0
return int(2*log(avg,2))
class Scores(defaultdict):
def __init__(self):
defaultdict.__init__(self,int)
def split(self, uid):
val = self[uid]
return val/STATE_FIELD, (val/MAX_SCORE)%MAX_SCORE, val%MAX_SCORE
def increment(self, uid, rfriends, mentions):
state, old_rfs, old_ats = self.split(uid)
rfs = min(old_rfs+(rfriends or 0),MAX_SCORE-1)
ats = min(old_ats+(mentions or 0),MAX_SCORE-1)
self[uid] = state*STATE_FIELD + rfs*MAX_SCORE + ats
def set_state(self, uid, state):
ignore, rfs, ats = self.split(uid)
self[uid] = state*STATE_FIELD + rfs*MAX_SCORE + ats
def dump(self, path):
with open(path,'w') as f:
for u,s in self.iteritems():
print >>f,"%d\t%d"%(u,s)
def read(self, path):
with open(path,'r') as f:
for line in f:
k,v = [int(s) for s in line.strip().split('\t')]
self[k]=v
def count_lookups(self):
return sum(1 for v in self.itervalues() if v/STATE_FIELD)