From d0a6ffd9e19f86501261e23b43bda478cfab5210 Mon Sep 17 00:00:00 2001 From: Joan Antoni RE Date: Wed, 22 Jan 2025 10:03:43 +0100 Subject: [PATCH] Remove vectorsets gRPC API --- .../src/nucliadb/ingest/service/writer.py | 62 +------------------ nucliadb/src/nucliadb/writer/vectorsets.py | 1 + .../integration/test_purge_vectorsets.py | 26 ++------ .../integration/test_vectorsets_api.py | 27 +++++--- 4 files changed, 26 insertions(+), 90 deletions(-) diff --git a/nucliadb/src/nucliadb/ingest/service/writer.py b/nucliadb/src/nucliadb/ingest/service/writer.py index fe9def961b..945d5c8ed0 100644 --- a/nucliadb/src/nucliadb/ingest/service/writer.py +++ b/nucliadb/src/nucliadb/ingest/service/writer.py @@ -31,12 +31,12 @@ from nucliadb.ingest import SERVICE_NAME, logger from nucliadb.ingest.orm.broker_message import generate_broker_message from nucliadb.ingest.orm.entities import EntitiesManager -from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict, VectorSetConflict +from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM from nucliadb.ingest.orm.processor import Processor, sequence_manager from nucliadb.ingest.orm.resource import Resource as ResourceORM from nucliadb.ingest.settings import settings -from nucliadb_protos import nodewriter_pb2, writer_pb2, writer_pb2_grpc +from nucliadb_protos import writer_pb2, writer_pb2_grpc from nucliadb_protos.knowledgebox_pb2 import ( DeleteKnowledgeBoxResponse, KnowledgeBoxID, @@ -44,13 +44,10 @@ KnowledgeBoxUpdate, SemanticModelMetadata, UpdateKnowledgeBoxResponse, - VectorSetConfig, ) from nucliadb_protos.writer_pb2 import ( BrokerMessage, DelEntitiesRequest, - DelVectorSetRequest, - DelVectorSetResponse, GetEntitiesGroupRequest, GetEntitiesGroupResponse, GetEntitiesRequest, @@ -63,8 +60,6 @@ ListMembersResponse, NewEntitiesGroupRequest, NewEntitiesGroupResponse, - NewVectorSetRequest, - NewVectorSetResponse, OpStatusWriter, SetEntitiesRequest, UpdateEntitiesGroupRequest, @@ -472,56 +467,3 @@ async def ReIndex(self, request: IndexResource, context=None) -> IndexStatus: # errors.capture_exception(e) logger.error("Error in ingest gRPC servicer", exc_info=True) raise - - async def NewVectorSet( # type: ignore - self, request: NewVectorSetRequest, context=None - ) -> NewVectorSetResponse: - config = VectorSetConfig( - vectorset_id=request.vectorset_id, - vectorset_index_config=nodewriter_pb2.VectorIndexConfig( - similarity=request.similarity, - normalize_vectors=request.normalize_vectors, - vector_type=request.vector_type, - vector_dimension=request.vector_dimension, - ), - matryoshka_dimensions=request.matryoshka_dimensions, - storage_key_kind=VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX, - ) - response = NewVectorSetResponse() - try: - async with self.driver.transaction() as txn: - kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid) - await kbobj.create_vectorset(config) - await txn.commit() - except VectorSetConflict as exc: - response.status = NewVectorSetResponse.Status.ERROR - response.details = str(exc) - except Exception as exc: - errors.capture_exception(exc) - logger.error("Error in ingest gRPC while creating a vectorset", exc_info=True) - response.status = NewVectorSetResponse.Status.ERROR - response.details = str(exc) - else: - response.status = NewVectorSetResponse.Status.OK - return response - - async def DelVectorSet( # type: ignore - self, request: DelVectorSetRequest, context=None - ) -> DelVectorSetResponse: - response = DelVectorSetResponse() - try: - async with self.driver.transaction() as txn: - kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid) - await kbobj.delete_vectorset(request.vectorset_id) - await txn.commit() - except VectorSetConflict as exc: - response.status = DelVectorSetResponse.Status.ERROR - response.details = str(exc) - except Exception as exc: - errors.capture_exception(exc) - logger.error("Error in ingest gRPC while deleting a vectorset", exc_info=True) - response.status = DelVectorSetResponse.Status.ERROR - response.details = str(exc) - else: - response.status = DelVectorSetResponse.Status.OK - return response diff --git a/nucliadb/src/nucliadb/writer/vectorsets.py b/nucliadb/src/nucliadb/writer/vectorsets.py index a97ee60b35..4e984d9734 100644 --- a/nucliadb/src/nucliadb/writer/vectorsets.py +++ b/nucliadb/src/nucliadb/writer/vectorsets.py @@ -85,6 +85,7 @@ async def delete(kbid: str, vectorset_id: str) -> None: kbobj = KnowledgeBox(txn, storage, kbid) await kbobj.delete_vectorset(vectorset_id=vectorset_id) await txn.commit() + except VectorSetConflict: # caller should handle this error raise diff --git a/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py b/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py index 9ce345a641..7ff4ac427a 100644 --- a/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py +++ b/nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py @@ -26,6 +26,7 @@ from nucliadb.common.maindb.driver import Driver from nucliadb.ingest.orm.knowledgebox import ( KB_VECTORSET_TO_DELETE, + KnowledgeBox, ) from nucliadb.purge import purge_kb_vectorsets from nucliadb_protos import resources_pb2, utils_pb2, writer_pb2 @@ -33,26 +34,9 @@ from nucliadb_protos.writer_pb2_grpc import WriterStub from nucliadb_utils.storages.storage import Storage from tests.ingest.fixtures import make_extracted_text -from tests.nucliadb.knowledgeboxes.vectorsets import KbSpecs from tests.utils import inject_message -async def test_purge_vectorsets__kb_with_single_vectorset( - maindb_driver: Driver, - storage: Storage, - nucliadb_grpc: WriterStub, - kb_with_vectorset: KbSpecs, -): - kbid = kb_with_vectorset.kbid - vectorset_id = kb_with_vectorset.vectorset_id - - response = await nucliadb_grpc.DelVectorSet( - writer_pb2.DelVectorSetRequest(kbid=kbid, vectorset_id=vectorset_id) - ) # type: ignore - assert response.status == response.Status.ERROR - assert "Deletion of your last vectorset is not allowed" in response.details - - async def test_purge_vectorsets__kb_with_vectorsets( maindb_driver: Driver, storage: Storage, nucliadb_grpc: WriterStub, knowledgebox_with_vectorsets: str ): @@ -67,10 +51,10 @@ async def test_purge_vectorsets__kb_with_vectorsets( with patch.object( storage, "delete_upload", new=AsyncMock(side_effect=storage.delete_upload) ) as mock: - response = await nucliadb_grpc.DelVectorSet( - writer_pb2.DelVectorSetRequest(kbid=kbid, vectorset_id=vectorset_id) - ) # type: ignore - assert response.status == response.Status.OK + async with maindb_driver.transaction() as txn: + kb_obj = KnowledgeBox(txn, storage, kbid) + await kb_obj.delete_vectorset(vectorset_id) + await txn.commit() async with maindb_driver.transaction(read_only=True) as txn: value = await txn.get(KB_VECTORSET_TO_DELETE.format(kbid=kbid, vectorset=vectorset_id)) diff --git a/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py b/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py index 197cb11196..db3bc83f81 100644 --- a/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py +++ b/nucliadb/tests/nucliadb/integration/test_vectorsets_api.py @@ -38,13 +38,12 @@ from nucliadb_protos.writer_pb2 import ( BrokerMessage, FieldID, - NewVectorSetRequest, - NewVectorSetResponse, ) from nucliadb_protos.writer_pb2_grpc import WriterStub from tests.utils import inject_message from tests.utils.broker_messages import BrokerMessageBuilder, FieldBuilder from tests.utils.dirty_index import mark_dirty, wait_for_sync +from tests.utils.vectorsets import add_vectorset MODULE = "nucliadb.writer.vectorsets" @@ -94,6 +93,8 @@ async def test_add_delete_vectorsets( existing_lconfig, # Initial configuration updated_lconfig, # Configuration after adding the vectorset updated_lconfig, # Configuration right before deleting the vectorset + existing_lconfig, # Initial configuration + existing_lconfig, # Initial configuration ], ) as get_configuration: with patch(f"{MODULE}.learning_proxy.update_configuration", return_value=None): @@ -123,6 +124,17 @@ async def test_add_delete_vectorsets( vs = await datamanagers.vectorsets.get(txn, kbid=kbid, vectorset_id=vectorset_id) assert vs is None + # Deleting your last vectorset is not allowed + resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/multilingual") + assert resp.status_code == 409, resp.text + assert "Deletion of your last vectorset is not allowed" in resp.json()["detail"] + + # But deleting twice is okay + resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}") + # XXX: however, we get the same error as before due to our lazy + # check strategy. This shuold be a 200 + assert resp.status_code == 409, resp.text + async def test_learning_config_errors_are_proxied_correctly( nucliadb_manager: AsyncClient, @@ -234,14 +246,11 @@ async def test_vectorset_migration( await _check_search(nucliadb_reader, kbid) # Now add a new vectorset - request = NewVectorSetRequest( - kbid=kbid, - vectorset_id="en-2024-05-06", - similarity=utils_pb2.VectorSimilarity.COSINE, - vector_dimension=1024, + vectorset_id = "en-2024-05-06" + resp = await add_vectorset( + nucliadb_manager, kbid, vectorset_id, similarity=SimilarityFunction.COSINE, vector_dimension=1024 ) - resp: NewVectorSetResponse = await nucliadb_grpc.NewVectorSet(request) # type: ignore - assert resp.status == NewVectorSetResponse.Status.OK # type: ignore + assert resp.status_code == 200 # Ingest a new broker message as if it was coming from the migration bm2 = BrokerMessage(