Skip to content

Commit

Permalink
Merge pull request #326 from Unobtainiumrock/cosine-similarity-to-pro…
Browse files Browse the repository at this point in the history
…bability-problem-fix

Fix to Cosine Similiarity To Probability Clipping
  • Loading branch information
liyin2015 authored Jan 28, 2025
2 parents f2abc34 + a18b99f commit 45fa558
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions adalflow/adalflow/components/retriever/faiss_retriever.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Semantic search/embedding-based retriever using FAISS."""

import faiss
from typing import (
List,
Optional,
Expand Down Expand Up @@ -29,17 +30,18 @@
from adalflow.utils.lazy_import import safe_import, OptionalPackages

safe_import(OptionalPackages.FAISS.value[0], OptionalPackages.FAISS.value[1])
import faiss

log = logging.getLogger(__name__)

FAISSRetrieverDocumentEmbeddingType = Union[List[float], np.ndarray] # single embedding
# single embedding
FAISSRetrieverDocumentEmbeddingType = Union[List[float], np.ndarray]
FAISSRetrieverDocumentsType = Sequence[FAISSRetrieverDocumentEmbeddingType]

FAISSRetrieverEmbeddingQueryType = Union[
List[float], List[List[float]], np.ndarray
] # single embedding or list of embeddings
FAISSRetrieverQueryType = Union[RetrieverStrQueryType, FAISSRetrieverEmbeddingQueryType]
FAISSRetrieverQueryType = Union[RetrieverStrQueryType,
FAISSRetrieverEmbeddingQueryType]
FAISSRetrieverQueriesType = Sequence[FAISSRetrieverQueryType]
FAISSRetrieverQueriesStrType = Sequence[RetrieverStrQueryType]
FAISSRetrieverQueriesEmbeddingType = Sequence[FAISSRetrieverEmbeddingQueryType]
Expand Down Expand Up @@ -161,7 +163,8 @@ def build_index_from_documents(
If you are using Document format, pass them as [doc.vector for doc in documents]
"""
if document_map_func:
assert callable(document_map_func), "document_map_func should be callable"
assert callable(
document_map_func), "document_map_func should be callable"
documents = [document_map_func(doc) for doc in documents]
try:
self.documents = documents
Expand Down Expand Up @@ -194,6 +197,7 @@ def build_index_from_documents(
raise e

def _convert_cosine_similarity_to_probability(self, D: np.ndarray) -> np.ndarray:
D = np.clip(D, -1, 1)
D = (D + 1) / 2
D = np.round(D, 3)
return D
Expand Down Expand Up @@ -295,7 +299,8 @@ def retrieve_string_queries(
output: RetrieverOutputType = [
RetrieverOutput(doc_indices=[], query=query) for query in queries
]
retrieved_output: RetrieverOutputType = self._to_retriever_output(Ind, D)
retrieved_output: RetrieverOutputType = self._to_retriever_output(
Ind, D)

# fill in the doc_indices and score for valid queries
for i, per_query_output in enumerate(retrieved_output):
Expand Down

0 comments on commit 45fa558

Please sign in to comment.