From aa7f59f88c0dbc6fe8efd23e31b4d08c8452784e Mon Sep 17 00:00:00 2001 From: Devendra Parihar <54232149+Devparihar5@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:10:06 +0000 Subject: [PATCH 1/2] fix: Refactor FaissStore to enhance error handling, improve type hints, and document methods for better maintainability and usability --- application/vectorstore/faiss.py | 43 +++++++++++++------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py index a8839cd2a..ee74b971d 100644 --- a/application/vectorstore/faiss.py +++ b/application/vectorstore/faiss.py @@ -3,30 +3,27 @@ from application.core.settings import settings import os -def get_vectorstore(path): +def get_vectorstore(path: str) -> str: if path: - vectorstore = "indexes/"+path - vectorstore = os.path.join("application", vectorstore) + vectorstore = os.path.join("application", "indexes", path) else: vectorstore = os.path.join("application") - return vectorstore class FaissStore(BaseVectorStore): - - def __init__(self, source_id, embeddings_key, docs_init=None): + def __init__(self, source_id: str, embeddings_key: str, docs_init=None): super().__init__() self.path = get_vectorstore(source_id) embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) - if docs_init: - self.docsearch = FAISS.from_documents( - docs_init, embeddings - ) - else: - self.docsearch = FAISS.load_local( - self.path, embeddings, - allow_dangerous_deserialization=True - ) + + try: + if docs_init: + self.docsearch = FAISS.from_documents(docs_init, embeddings) + else: + self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True) + except Exception as e: + raise + self.assert_embedding_dimensions(embeddings) def search(self, *args, **kwargs): @@ -42,16 +39,12 @@ def delete_index(self, *args, **kwargs): return self.docsearch.delete(*args, **kwargs) def assert_embedding_dimensions(self, embeddings): - """ - Check that the word embedding dimension of the docsearch index matches - the dimension of the word embeddings used - """ + """Check that the word embedding dimension of the docsearch index matches the dimension of the word embeddings used.""" if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2": - try: - word_embedding_dimension = embeddings.dimension - except AttributeError as e: - raise AttributeError("'dimension' attribute not found in embeddings instance. Make sure the embeddings object is properly initialized.") from e + word_embedding_dimension = getattr(embeddings, 'dimension', None) + if word_embedding_dimension is None: + raise AttributeError("'dimension' attribute not found in embeddings instance.") + docsearch_index_dimension = self.docsearch.index.d if word_embedding_dimension != docsearch_index_dimension: - raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) " + - f"!= docsearch index dimension ({docsearch_index_dimension})") \ No newline at end of file + raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) != docsearch index dimension ({docsearch_index_dimension})") From ef6ec3fcb8bc0a63183a1d56558b9fe1a7fa317b Mon Sep 17 00:00:00 2001 From: Devendra Parihar <54232149+Devparihar5@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:32:08 +0000 Subject: [PATCH 2/2] fix: Fix unused exception variable in FaissStore. --- application/vectorstore/faiss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py index ee74b971d..e6c13bcdd 100644 --- a/application/vectorstore/faiss.py +++ b/application/vectorstore/faiss.py @@ -21,8 +21,8 @@ def __init__(self, source_id: str, embeddings_key: str, docs_init=None): self.docsearch = FAISS.from_documents(docs_init, embeddings) else: self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True) - except Exception as e: - raise + except Exception: + raise # Just re-raise the exception without assigning to e self.assert_embedding_dimensions(embeddings)