diff --git a/agents-api/agents_api/models/docs/search_docs_by_embedding.py b/agents-api/agents_api/models/docs/search_docs_by_embedding.py index 7d3bbbd2f..e346b6b69 100644 --- a/agents-api/agents_api/models/docs/search_docs_by_embedding.py +++ b/agents-api/agents_api/models/docs/search_docs_by_embedding.py @@ -48,8 +48,8 @@ def search_docs_by_embedding( query_embedding: list[float], k: int = 3, confidence: float = 0.5, - ef: int = 32, - mmr_lambda: float = 0.25, + ef: int = 50, + mmr_lambda: float = 0.5, embedding_size: int = 1024, ) -> tuple[list[str], dict]: """ diff --git a/agents-api/agents_api/web.py b/agents-api/agents_api/web.py index 59d7ee429..037767b9f 100644 --- a/agents-api/agents_api/web.py +++ b/agents-api/agents_api/web.py @@ -6,7 +6,6 @@ import logging from typing import Any, Callable -import fire import sentry_sdk import uvicorn import uvloop diff --git a/agents-api/migrations/migrate_1729114011_tweak_proximity_indices.py b/agents-api/migrations/migrate_1729114011_tweak_proximity_indices.py index e8fbbaa58..4852f3603 100644 --- a/agents-api/migrations/migrate_1729114011_tweak_proximity_indices.py +++ b/agents-api/migrations/migrate_1729114011_tweak_proximity_indices.py @@ -12,6 +12,45 @@ def run(client, *queries): client.run(query) +# See: https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md +drop_snippets_hnsw_index = dict( + down=""" + ::hnsw create snippets:embedding_space { + fields: [embedding], + filter: !is_null(embedding), + dim: 1024, + distance: Cosine, + m: 64, + ef_construction: 256, + extend_candidates: true, + keep_pruned_connections: false, + } + """, + up=""" + ::hnsw drop snippets:embedding_space + """, +) + + +# See: https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md +snippets_hnsw_index = dict( + up=""" + ::hnsw create snippets:embedding_space { + fields: [embedding], + filter: !is_null(embedding), + dim: 1024, + distance: Cosine, + m: 64, + ef_construction: 800, + extend_candidates: false, + keep_pruned_connections: false, + } + """, + down=""" + ::hnsw drop snippets:embedding_space + """, +) + drop_snippets_lsh_index = dict( up=""" ::lsh drop snippets:lsh @@ -77,8 +116,10 @@ def run(client, *queries): ) queries_to_run = [ + drop_snippets_hnsw_index, drop_snippets_lsh_index, drop_snippets_fts_index, + snippets_hnsw_index, snippets_lsh_index, snippets_fts_index, ]