-
Notifications
You must be signed in to change notification settings - Fork 1
/
entropy.pyx
43 lines (40 loc) · 930 Bytes
/
entropy.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import math
def getRepeats(s):
two = {}
three = {}
four = {}
for i in range(len(s)-3):
context = chr(s[i]) + chr(s[i+1])
wordcontext = context + chr(s[i+2])
word = wordcontext + chr(s[i+3])
if context in two:
two[context] += 1
else:
two[context] = 1
if wordcontext in three:
three[wordcontext] += 1
else:
three[wordcontext] = 1
if word in four:
four[word] += 1
else:
four[word] = 1
return two, three, four
def getConditionalEntropy(context, shorter, longer):
#entropia warunkowa
if context not in shorter:
return 0
entropy = 0
for i in range(256):
word = context + chr(i)
if word in longer:
p = longer[word]/shorter[context]
entropy -= p*math.log2(p)
return entropy
def getOrderEntropy(shorter, longer, amount):
entropy = 0
for context in shorter:
e = getConditionalEntropy(context, shorter, longer)
e *= shorter[context] / amount
entropy += e
return entropy