Skip to content

Commit

Permalink
Change test on compatibility given threading
Browse files Browse the repository at this point in the history
  • Loading branch information
softwaredoug committed May 15, 2024
1 parent 93d5e6d commit 604c678
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
1 change: 1 addition & 0 deletions searcharray/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def _process_batches(term_doc, batch_size,
doc_lens=None,
truncate=False):
batch_results = [None] * len(futures)
batch_beg = 0
for future in as_completed(futures):
try:
batch_beg, batch_term_doc, batch_bit_posns, batch_doc_lens = future.result()
Expand Down
7 changes: 6 additions & 1 deletion test/test_tmdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,12 @@ def test_slice_then_search(tmdb_data):
def test_batch_sizes_give_same(tmdb_data):
with_batch_10k = SearchArray.index(tmdb_data['overview'], batch_size=10000)
with_batch_5k = SearchArray.index(tmdb_data['overview'], batch_size=5000)
assert np.all(with_batch_10k == with_batch_5k)
# We don't expect the full array to be compatible given term dict assigned
# different term ids given threading, but individual docs should be the same
assert np.all(with_batch_10k[-1] == with_batch_5k[-1])
assert np.all(with_batch_10k[100] == with_batch_5k[100])
assert np.all(with_batch_10k[5000] == with_batch_5k[5000])
assert np.all(with_batch_10k[5001] == with_batch_5k[5001])


tmdb_term_matches = [
Expand Down

0 comments on commit 604c678

Please sign in to comment.