From 604c678f398d4c9553227c4aafba8bd3e8c11411 Mon Sep 17 00:00:00 2001
From: Doug Turnbull <softwaredoug@gmail.com>
Date: Wed, 15 May 2024 09:56:57 -0400
Subject: [PATCH] Change test on compatibility given threading

---
 searcharray/indexing.py | 1 +
 test/test_tmdb.py       | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/searcharray/indexing.py b/searcharray/indexing.py
index f804b8b..13ef53d 100644
--- a/searcharray/indexing.py
+++ b/searcharray/indexing.py
@@ -153,6 +153,7 @@ def _process_batches(term_doc, batch_size,
                      doc_lens=None,
                      truncate=False):
     batch_results = [None] * len(futures)
+    batch_beg = 0
     for future in as_completed(futures):
         try:
             batch_beg, batch_term_doc, batch_bit_posns, batch_doc_lens = future.result()
diff --git a/test/test_tmdb.py b/test/test_tmdb.py
index 6c01c26..63ddf98 100644
--- a/test/test_tmdb.py
+++ b/test/test_tmdb.py
@@ -102,7 +102,12 @@ def test_slice_then_search(tmdb_data):
 def test_batch_sizes_give_same(tmdb_data):
     with_batch_10k = SearchArray.index(tmdb_data['overview'], batch_size=10000)
     with_batch_5k = SearchArray.index(tmdb_data['overview'], batch_size=5000)
-    assert np.all(with_batch_10k == with_batch_5k)
+    # We don't expect the full array to be compatible given term dict assigned
+    # different term ids given threading, but individual docs should be the same
+    assert np.all(with_batch_10k[-1] == with_batch_5k[-1])
+    assert np.all(with_batch_10k[100] == with_batch_5k[100])
+    assert np.all(with_batch_10k[5000] == with_batch_5k[5000])
+    assert np.all(with_batch_10k[5001] == with_batch_5k[5001])
 
 
 tmdb_term_matches = [