Skip to content

Commit

Permalink
Avoid copying posns / term dict
Browse files Browse the repository at this point in the history
  • Loading branch information
softwaredoug committed Dec 24, 2023
1 parent 9cd6dde commit 9659037
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ benchmark: deps
open ./.benchmarks/histogram.svg

benchmark_graph: deps
python scripts/graph_benchmarks.py $(TEST)
python scripts/graph_benchmarks.py "$(TEST)"

profile:
python -m pytest -x --benchmark-disable $(TEST)
python -m pytest -x --benchmark-disable "$(TEST)"
snakeviz ./.benchmarks/last.prof


Expand Down
11 changes: 8 additions & 3 deletions searcharray/postings.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,11 +298,12 @@ class PostingsArray(ExtensionArray):

dtype = PostingsDtype()

def __init__(self, postings, tokenizer=ws_tokenizer):
def __init__(self, postings, tokenizer=ws_tokenizer, avoid_copies=True):
# Check dtype, raise TypeError
if not is_list_like(postings):
raise TypeError("Expected list-like object, got {}".format(type(postings)))

self.avoid_copies = avoid_copies
self.tokenizer = tokenizer
self.term_mat, self.posns, \
self.term_dict, self.avg_doc_length, \
Expand Down Expand Up @@ -544,10 +545,14 @@ def take(self, indices, allow_fill=False, fill_value=None):
def copy(self):
postings_arr = PostingsArray([], tokenizer=self.tokenizer)
postings_arr.doc_lens = self.doc_lens.copy()
postings_arr.posns = self.posns.copy()
postings_arr.term_mat = self.term_mat.copy()
postings_arr.term_dict = self.term_dict.copy()
postings_arr.posns = self.posns
postings_arr.term_dict = self.term_dict
postings_arr.avg_doc_length = self.avg_doc_length

if not self.avoid_copies:
postings_arr.posns = self.posns.copy()
postings_arr.term_dict = self.term_dict.copy()
return postings_arr

@classmethod
Expand Down

0 comments on commit 9659037

Please sign in to comment.