-
Notifications
You must be signed in to change notification settings - Fork 2
/
performance.py
107 lines (97 loc) · 2.37 KB
/
performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
Script to measure the performance
"""
import timeit
import sys
# number of iterations
iterations = 1000
if len(sys.argv) > 1:
iterations = int(sys.argv[1])
# code snippet to be executed only once
setup = """
import pandas as pd
import association_measures.frequencies as fq
import association_measures.measures as am
df = pd.read_csv('tests/data/brown.csv')
df = fq.observed_frequencies(df)
df = fq.expected_frequencies(df, observed=True)
"""
# code snippet whose execution time is to be measured
codes = [
{
'name': 'contingency_table',
'code': 'fq.observed_frequencies(df)'
},
{
'name': 'expected_frequencies',
'code': 'fq.expected_frequencies(df)'
},
# asymptotic hypothesis tests
{
'name': 'z_score',
'code': 'am.z_score(df)'
},
{
'name': 't_score',
'code': 'am.t_score(df)'
},
{
'name': 'log_likelihood',
'code': 'am.log_likelihood(df)'
},
{
'name': 'simple_ll',
'code': 'am.simple_ll(df)'
},
# point estimates of association strength
{
'name': 'min_sensitivity',
'code': 'am.min_sensitivity(df)'
},
{
'name': 'liddell',
'code': 'am.liddell(df)'
},
{
'name': 'dice',
'code': 'am.dice(df)'
},
{
'name': 'log_ratio',
'code': 'am.log_ratio(df)'
},
# likelihood measures
# ~2.5s for 1x ~25,000
# {
# 'name': 'hypergeometric_likelihood',
# 'code': 'am.hypergeometric_likelihood(df)'
# },
{
'name': 'binomial_likelihood',
'code': 'am.binomial_likelihood(df)'
},
# conservative estimates
{
'name': 'conservative_log_ratio',
'code': 'am.conservative_log_ratio(df, boundary="normal")'
},
{
'name': 'conservative_log_ratio_poisson',
'code': 'am.conservative_log_ratio(df, boundary="poisson")'
},
# information theory
{
'name': 'mutual_information',
'code': 'am.mutual_information(df)'
},
{
'name': 'local_mutual_information',
'code': 'am.local_mutual_information(df)'
},
]
size = 24167
print(f'settings: iterations={iterations}, df_size={size}')
for code in codes:
res = timeit.timeit(setup=setup, stmt=code['code'], number=iterations)
func = code['name']
print(f'- {res:7.4f} :: {func}')