Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Commit

Permalink
fix metadata filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
acatav committed Oct 26, 2023
1 parent d586a48 commit 28a5010
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/canopy/knowledge_base/knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def _query_index(self,
sparse_vector=query.sparse_values,
top_k=top_k,
namespace=query.namespace,
metadata_filter=metadata_filter,
filter=metadata_filter,
include_metadata=True,
_check_return_type=_check_return_type,
**query_params)
Expand Down
22 changes: 19 additions & 3 deletions tests/system/knowledge_base/test_knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def knowledge_base(index_full_name, index_name, chunker, encoder):
kb = KnowledgeBase(index_name=index_name,
record_encoder=encoder,
chunker=chunker)
kb.create_canopy_index()
kb.create_canopy_index(indexed_fields=["my-key"])

return kb

Expand Down Expand Up @@ -139,6 +139,18 @@ def execute_and_assert_queries(knowledge_base, chunks_to_query):
f"actual: {q_res.documents}"


def assert_query_metadata_filter(knowledge_base: KnowledgeBase,
metadata_filter: dict,
num_vectors_expected: int,
top_k: int = 100):
assert top_k > num_vectors_expected, \
"the test might return false positive if top_k is not > num_vectors_expected"
query = Query(text="test", top_k=top_k, metadata_filter=metadata_filter)
query_results = knowledge_base.query([query])
assert len(query_results) == 1
assert len(query_results[0].documents) == num_vectors_expected


@pytest.fixture(scope="module", autouse=True)
def teardown_knowledge_base(index_full_name, knowledge_base):
yield
Expand All @@ -162,15 +174,15 @@ def documents(random_texts):
return [Document(id=f"doc_{i}",
text=random_texts[i],
source=f"source_{i}",
metadata={"test": i})
metadata={"my-key": f"value-{i}"})
for i in range(5)]


@pytest.fixture
def documents_large():
return [Document(id=f"doc_{i}_large",
text=f"Sample document {i}",
metadata={"test": i})
metadata={"my-key-large": f"value-{i}"})
for i in range(1000)]


Expand Down Expand Up @@ -249,6 +261,10 @@ def test_query(knowledge_base, encoded_chunks):
execute_and_assert_queries(knowledge_base, encoded_chunks)


def test_query_with_metadata_filter(knowledge_base, encoded_chunks):
assert_query_metadata_filter(knowledge_base, {"my-key": "value-1"}, 2)


def test_delete_documents(knowledge_base, encoded_chunks):
chunk_ids = [chunk.id for chunk in encoded_chunks[-4:]]
doc_ids = set(doc.document_id for doc in encoded_chunks[-4:])
Expand Down

0 comments on commit 28a5010

Please sign in to comment.