Skip to content

Commit

Permalink
benchmarks for similar_keys
Browse files Browse the repository at this point in the history
  • Loading branch information
kmike committed Jan 2, 2013
1 parent 7d96b0d commit 60f66c9
Showing 1 changed file with 46 additions and 2 deletions.
48 changes: 46 additions & 2 deletions bench/speed.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ def prefixes1k(words, prefix_len):
_words = [w[:prefix_len] for w in words[::every_nth]]
return _words[:1000]

def leet_words(words, replaces):
for key, value in replaces.items():
words = [w.replace(key, value) for w in words]
return words


WORDS100k = words100k()
MIXED_WORDS100k = truncated_words(WORDS100k)
NON_WORDS100k = random_words(100000)
Expand All @@ -46,6 +52,20 @@ def prefixes1k(words, prefix_len):
PREFIXES_8_1k = prefixes1k(WORDS100k, 8)
PREFIXES_15_1k = prefixes1k(WORDS100k, 15)

LEET_REPLACES = {
'o': '0',
'O': '0',
'u': '0',
'l': '1',
'i': '1',
'e': '3',
'E': '3',
'A': '4',
'a': '4',
'h': '4',
's': 'z',
}
LEET_50k = leet_words(WORDS100k[:50000], LEET_REPLACES)

def format_result(key, value, text_width):
key = key.ljust(text_width)
Expand Down Expand Up @@ -86,6 +106,9 @@ def create_int_dawg():
values = [len(word) for word in words]
return dawg.IntDAWG(zip(words, values))

def create_leet_dawg():
return dawg.DAWG(LEET_50k)


def benchmark():
print('\n====== Benchmarks (100k unique unicode words) =======\n')
Expand All @@ -102,17 +125,19 @@ def benchmark():
]

common_setup = """
from __main__ import create_dawg, create_bytes_dawg, create_record_dawg, create_int_dawg
from __main__ import create_dawg, create_bytes_dawg, create_record_dawg, create_int_dawg, create_leet_dawg
from __main__ import WORDS100k, NON_WORDS100k, MIXED_WORDS100k
from __main__ import PREFIXES_3_1k, PREFIXES_5_1k, PREFIXES_8_1k, PREFIXES_15_1k
from __main__ import LEET_50k, LEET_REPLACES
NON_WORDS_10k = NON_WORDS100k[:10000]
NON_WORDS_1k = ['ыва', 'xyz', 'соы', 'Axx', 'avы']*200
"""
dict_setup = common_setup + 'data = dict((word, len(word)) for word in WORDS100k);'
dawg_setup = common_setup + 'data = create_dawg();'
dawg_setup = common_setup + 'data = create_dawg(); repl = data.compile_replaces(LEET_REPLACES);'
bytes_dawg_setup = common_setup + 'data = create_bytes_dawg();'
record_dawg_setup = common_setup + 'data = create_record_dawg();'
int_dawg_setup = common_setup + 'data = create_int_dawg();'
leet_dawg_setup = common_setup + 'data = create_leet_dawg(); repl = data.compile_replaces(LEET_REPLACES);'

structures = [
('dict', dict_setup),
Expand All @@ -128,6 +153,25 @@ def benchmark():
bench(full_test_name, timer, descr, op_count, repeats, 9)

# DAWG-specific benchmarks

# benchmark for similar_keys
bench(
"DAWG.similar_keys (no replaces)",
timeit.Timer(
"for word in WORDS100k[:50000]: data.similar_keys(word, repl)",
setup=dawg_setup,
),
op_count=0.05
)
bench(
"DAWG.similar_keys (l33t)",
timeit.Timer(
"for word in WORDS100k[:50000]: data.similar_keys(word, repl)",
setup=leet_dawg_setup,
),
op_count=0.05
)

for struct_name, setup in structures[1:]:

# prefixes of a given key
Expand Down

0 comments on commit 60f66c9

Please sign in to comment.