Skip to content

Commit

Permalink
Handled index meta directory being a tmpfile in testing by adding it …
Browse files Browse the repository at this point in the history
…to the cache key
  • Loading branch information
jamesbraza committed Oct 22, 2024
1 parent 05ee983 commit 7b39d7a
Showing 1 changed file with 16 additions and 9 deletions.
25 changes: 16 additions & 9 deletions paperqa/agents/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,12 @@ def read_from_string(self, data: str | bytes) -> BaseModel | SupportsPickle:
return pickle.loads(data) # type: ignore[arg-type] # noqa: S301


# Cache of index name to a two-tuple of an opened Index instance and the count
# of SearchIndex instances currently referencing that Index
_OPENED_INDEX_CACHE: dict[str, tuple[Index, int]] = {}
ENV_VAR_MATCH: Collection[str] = {"1", "true"}

# Cache keys are a two-tuple of index name and absolute index directory
# Cache values are a two-tuple of an opened Index instance and the count
# of SearchIndex instances currently referencing that Index
_OPENED_INDEX_CACHE: dict[tuple[str, str], tuple[Index, int]] = {}
DONT_USE_OPENED_INDEX_CACHE = (
os.environ.get("PQA_INDEX_DONT_CACHE_INDEXES", "").lower() in ENV_VAR_MATCH
)
Expand Down Expand Up @@ -197,21 +199,26 @@ async def index(self) -> Index:
if DONT_USE_OPENED_INDEX_CACHE:
self._index = Index.open(path=str(index_meta_directory))
else:
if self.index_name not in _OPENED_INDEX_CACHE: # open a new Index
key = self.index_name, str(await index_meta_directory.absolute())
if key not in _OPENED_INDEX_CACHE: # open a new Index
self._index = Index.open(path=str(index_meta_directory))
prev_count: int = 0
else: # reuse Index
self._index, prev_count = _OPENED_INDEX_CACHE[self.index_name]
_OPENED_INDEX_CACHE[self.index_name] = self._index, prev_count + 1
self._index, prev_count = _OPENED_INDEX_CACHE[key]
_OPENED_INDEX_CACHE[key] = self._index, prev_count + 1
else:
# NOTE: this creates the above meta.json file
self._index = Index(self.schema, path=str(index_meta_directory))
return self._index

def __del__(self) -> None:
if self.index_name in _OPENED_INDEX_CACHE:
index, count = _OPENED_INDEX_CACHE[self.index_name]
_OPENED_INDEX_CACHE[self.index_name] = index, count - 1
index_meta_directory = (
pathlib.Path(self._index_directory) / self.index_name / "index"
)
key = self.index_name, str(index_meta_directory.absolute())
if key in _OPENED_INDEX_CACHE:
index, count = _OPENED_INDEX_CACHE[key]
_OPENED_INDEX_CACHE[key] = index, count - 1

@property
async def searcher(self) -> Searcher:
Expand Down

0 comments on commit 7b39d7a

Please sign in to comment.