Merge pull request #326 from Unobtainiumrock/cosine-similarity-to-pro…

…bability-problem-fix Fix to Cosine Similiarity To Probability Clipping
SylphAI-Inc · Jan 28, 2025 · 45fa558 · 45fa558
2 parents f2abc34 + a18b99f
commit 45fa558
Showing 1 changed file with 10 additions and 5 deletions.
diff --git a/adalflow/adalflow/components/retriever/faiss_retriever.py b/adalflow/adalflow/components/retriever/faiss_retriever.py
@@ -1,5 +1,6 @@
 """Semantic search/embedding-based retriever using FAISS."""
 
+import faiss
 from typing import (
     List,
     Optional,
@@ -29,17 +30,18 @@
 from adalflow.utils.lazy_import import safe_import, OptionalPackages
 
 safe_import(OptionalPackages.FAISS.value[0], OptionalPackages.FAISS.value[1])
-import faiss
 
 log = logging.getLogger(__name__)
 
-FAISSRetrieverDocumentEmbeddingType = Union[List[float], np.ndarray]  # single embedding
+# single embedding
+FAISSRetrieverDocumentEmbeddingType = Union[List[float], np.ndarray]
 FAISSRetrieverDocumentsType = Sequence[FAISSRetrieverDocumentEmbeddingType]
 
 FAISSRetrieverEmbeddingQueryType = Union[
     List[float], List[List[float]], np.ndarray
 ]  # single embedding or list of embeddings
-FAISSRetrieverQueryType = Union[RetrieverStrQueryType, FAISSRetrieverEmbeddingQueryType]
+FAISSRetrieverQueryType = Union[RetrieverStrQueryType,
+                                FAISSRetrieverEmbeddingQueryType]
 FAISSRetrieverQueriesType = Sequence[FAISSRetrieverQueryType]
 FAISSRetrieverQueriesStrType = Sequence[RetrieverStrQueryType]
 FAISSRetrieverQueriesEmbeddingType = Sequence[FAISSRetrieverEmbeddingQueryType]
@@ -161,7 +163,8 @@ def build_index_from_documents(
         If you are using Document format, pass them as [doc.vector for doc in documents]
         """
         if document_map_func:
-            assert callable(document_map_func), "document_map_func should be callable"
+            assert callable(
+                document_map_func), "document_map_func should be callable"
             documents = [document_map_func(doc) for doc in documents]
         try:
             self.documents = documents
@@ -194,6 +197,7 @@ def build_index_from_documents(
             raise e
 
     def _convert_cosine_similarity_to_probability(self, D: np.ndarray) -> np.ndarray:
+        D = np.clip(D, -1, 1)
         D = (D + 1) / 2
         D = np.round(D, 3)
         return D
@@ -295,7 +299,8 @@ def retrieve_string_queries(
         output: RetrieverOutputType = [
             RetrieverOutput(doc_indices=[], query=query) for query in queries
         ]
-        retrieved_output: RetrieverOutputType = self._to_retriever_output(Ind, D)
+        retrieved_output: RetrieverOutputType = self._to_retriever_output(
+            Ind, D)
 
         # fill in the doc_indices and score for valid queries
         for i, per_query_output in enumerate(retrieved_output):