From 68ce7f535132dea4ea39ddf5e3f4b0c65c23102b Mon Sep 17 00:00:00 2001 From: Diwank Singh Tomer Date: Fri, 3 May 2024 16:48:20 +0530 Subject: [PATCH] fix: Fix migration not working without correct order of index drops Signed-off-by: Diwank Singh Tomer --- ...1714566760_change_embeddings_dimensions.py | 59 ++++++++++++++----- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/agents-api/migrations/migrate_1714566760_change_embeddings_dimensions.py b/agents-api/migrations/migrate_1714566760_change_embeddings_dimensions.py index 86f2c9543..dba657345 100644 --- a/agents-api/migrations/migrate_1714566760_change_embeddings_dimensions.py +++ b/agents-api/migrations/migrate_1714566760_change_embeddings_dimensions.py @@ -20,7 +20,7 @@ snippet, embed_instruction, embedding, - additional_info_id: doc_id, + doc_id, } :replace information_snippets { @@ -48,7 +48,7 @@ snippet, embed_instruction, embedding, - additional_info_id: doc_id, + doc_id, } :replace information_snippets { @@ -63,48 +63,79 @@ """, } -information_snippets_hnsw_index = dict( +snippets_hnsw_768_index = dict( up=""" ::hnsw create information_snippets:embedding_space { fields: [embedding], filter: !is_null(embedding), - dim: 1024, + dim: 768, distance: Cosine, m: 64, ef_construction: 256, - extend_candidates: false, + extend_candidates: true, keep_pruned_connections: false, } """, down=""" + ::hnsw drop information_snippets:embedding_space + """, +) + +drop_snippets_hnsw_768_index = { + "up": snippets_hnsw_768_index["down"], + "down": snippets_hnsw_768_index["up"], +} +snippets_hnsw_1024_index = dict( + up=""" ::hnsw create information_snippets:embedding_space { fields: [embedding], filter: !is_null(embedding), - dim: 768, + dim: 1024, distance: Cosine, m: 64, ef_construction: 256, - extend_candidates: false, + extend_candidates: true, keep_pruned_connections: false, } """, + down=""" + ::hnsw drop information_snippets:embedding_space + """, ) -drop_index = { - "up": """ - ::hnsw drop information_snippets:embedding_space +drop_snippets_hnsw_1024_index = { + "up": snippets_hnsw_1024_index["down"], + "down": snippets_hnsw_1024_index["up"], +} + + +# See: https://docs.cozodb.org/en/latest/vector.html#full-text-search-fts +information_snippets_fts_index = dict( + up=""" + ::fts create information_snippets:fts { + extractor: concat(title, ' ', snippet), + tokenizer: Simple, + filters: [Lowercase, Stemmer('english'), Stopwords('en')], + } """, - "down": """ - ::hnsw drop information_snippets:embedding_space + down=""" + ::fts drop information_snippets:fts """, +) + +drop_information_snippets_fts_index = { + "up": information_snippets_fts_index["down"], + "down": information_snippets_fts_index["up"], } queries_to_run = [ - drop_index, + drop_information_snippets_fts_index, + drop_snippets_hnsw_768_index, change_dimensions, - information_snippets_hnsw_index, + snippets_hnsw_1024_index, + information_snippets_fts_index, ]