Skip to content

Commit

Permalink
Remove vectorsets gRPC API
Browse files Browse the repository at this point in the history
  • Loading branch information
jotare committed Jan 22, 2025
1 parent 540d9f2 commit d0a6ffd
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 90 deletions.
62 changes: 2 additions & 60 deletions nucliadb/src/nucliadb/ingest/service/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,23 @@
from nucliadb.ingest import SERVICE_NAME, logger
from nucliadb.ingest.orm.broker_message import generate_broker_message
from nucliadb.ingest.orm.entities import EntitiesManager
from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict, VectorSetConflict
from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
from nucliadb.ingest.orm.processor import Processor, sequence_manager
from nucliadb.ingest.orm.resource import Resource as ResourceORM
from nucliadb.ingest.settings import settings
from nucliadb_protos import nodewriter_pb2, writer_pb2, writer_pb2_grpc
from nucliadb_protos import writer_pb2, writer_pb2_grpc
from nucliadb_protos.knowledgebox_pb2 import (
DeleteKnowledgeBoxResponse,
KnowledgeBoxID,
KnowledgeBoxResponseStatus,
KnowledgeBoxUpdate,
SemanticModelMetadata,
UpdateKnowledgeBoxResponse,
VectorSetConfig,
)
from nucliadb_protos.writer_pb2 import (
BrokerMessage,
DelEntitiesRequest,
DelVectorSetRequest,
DelVectorSetResponse,
GetEntitiesGroupRequest,
GetEntitiesGroupResponse,
GetEntitiesRequest,
Expand All @@ -63,8 +60,6 @@
ListMembersResponse,
NewEntitiesGroupRequest,
NewEntitiesGroupResponse,
NewVectorSetRequest,
NewVectorSetResponse,
OpStatusWriter,
SetEntitiesRequest,
UpdateEntitiesGroupRequest,
Expand Down Expand Up @@ -472,56 +467,3 @@ async def ReIndex(self, request: IndexResource, context=None) -> IndexStatus: #
errors.capture_exception(e)
logger.error("Error in ingest gRPC servicer", exc_info=True)
raise

async def NewVectorSet( # type: ignore
self, request: NewVectorSetRequest, context=None
) -> NewVectorSetResponse:
config = VectorSetConfig(
vectorset_id=request.vectorset_id,
vectorset_index_config=nodewriter_pb2.VectorIndexConfig(
similarity=request.similarity,
normalize_vectors=request.normalize_vectors,
vector_type=request.vector_type,
vector_dimension=request.vector_dimension,
),
matryoshka_dimensions=request.matryoshka_dimensions,
storage_key_kind=VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX,
)
response = NewVectorSetResponse()
try:
async with self.driver.transaction() as txn:
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
await kbobj.create_vectorset(config)
await txn.commit()
except VectorSetConflict as exc:
response.status = NewVectorSetResponse.Status.ERROR
response.details = str(exc)
except Exception as exc:
errors.capture_exception(exc)
logger.error("Error in ingest gRPC while creating a vectorset", exc_info=True)
response.status = NewVectorSetResponse.Status.ERROR
response.details = str(exc)
else:
response.status = NewVectorSetResponse.Status.OK
return response

async def DelVectorSet( # type: ignore
self, request: DelVectorSetRequest, context=None
) -> DelVectorSetResponse:
response = DelVectorSetResponse()
try:
async with self.driver.transaction() as txn:
kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
await kbobj.delete_vectorset(request.vectorset_id)
await txn.commit()
except VectorSetConflict as exc:
response.status = DelVectorSetResponse.Status.ERROR
response.details = str(exc)
except Exception as exc:
errors.capture_exception(exc)
logger.error("Error in ingest gRPC while deleting a vectorset", exc_info=True)
response.status = DelVectorSetResponse.Status.ERROR
response.details = str(exc)
else:
response.status = DelVectorSetResponse.Status.OK
return response
1 change: 1 addition & 0 deletions nucliadb/src/nucliadb/writer/vectorsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ async def delete(kbid: str, vectorset_id: str) -> None:
kbobj = KnowledgeBox(txn, storage, kbid)
await kbobj.delete_vectorset(vectorset_id=vectorset_id)
await txn.commit()

except VectorSetConflict:
# caller should handle this error
raise
Expand Down
26 changes: 5 additions & 21 deletions nucliadb/tests/nucliadb/integration/test_purge_vectorsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,33 +26,17 @@
from nucliadb.common.maindb.driver import Driver
from nucliadb.ingest.orm.knowledgebox import (
KB_VECTORSET_TO_DELETE,
KnowledgeBox,
)
from nucliadb.purge import purge_kb_vectorsets
from nucliadb_protos import resources_pb2, utils_pb2, writer_pb2
from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig, VectorSetPurge
from nucliadb_protos.writer_pb2_grpc import WriterStub
from nucliadb_utils.storages.storage import Storage
from tests.ingest.fixtures import make_extracted_text
from tests.nucliadb.knowledgeboxes.vectorsets import KbSpecs
from tests.utils import inject_message


async def test_purge_vectorsets__kb_with_single_vectorset(
maindb_driver: Driver,
storage: Storage,
nucliadb_grpc: WriterStub,
kb_with_vectorset: KbSpecs,
):
kbid = kb_with_vectorset.kbid
vectorset_id = kb_with_vectorset.vectorset_id

response = await nucliadb_grpc.DelVectorSet(
writer_pb2.DelVectorSetRequest(kbid=kbid, vectorset_id=vectorset_id)
) # type: ignore
assert response.status == response.Status.ERROR
assert "Deletion of your last vectorset is not allowed" in response.details


async def test_purge_vectorsets__kb_with_vectorsets(
maindb_driver: Driver, storage: Storage, nucliadb_grpc: WriterStub, knowledgebox_with_vectorsets: str
):
Expand All @@ -67,10 +51,10 @@ async def test_purge_vectorsets__kb_with_vectorsets(
with patch.object(
storage, "delete_upload", new=AsyncMock(side_effect=storage.delete_upload)
) as mock:
response = await nucliadb_grpc.DelVectorSet(
writer_pb2.DelVectorSetRequest(kbid=kbid, vectorset_id=vectorset_id)
) # type: ignore
assert response.status == response.Status.OK
async with maindb_driver.transaction() as txn:
kb_obj = KnowledgeBox(txn, storage, kbid)
await kb_obj.delete_vectorset(vectorset_id)
await txn.commit()

async with maindb_driver.transaction(read_only=True) as txn:
value = await txn.get(KB_VECTORSET_TO_DELETE.format(kbid=kbid, vectorset=vectorset_id))
Expand Down
27 changes: 18 additions & 9 deletions nucliadb/tests/nucliadb/integration/test_vectorsets_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,12 @@
from nucliadb_protos.writer_pb2 import (
BrokerMessage,
FieldID,
NewVectorSetRequest,
NewVectorSetResponse,
)
from nucliadb_protos.writer_pb2_grpc import WriterStub
from tests.utils import inject_message
from tests.utils.broker_messages import BrokerMessageBuilder, FieldBuilder
from tests.utils.dirty_index import mark_dirty, wait_for_sync
from tests.utils.vectorsets import add_vectorset

MODULE = "nucliadb.writer.vectorsets"

Expand Down Expand Up @@ -94,6 +93,8 @@ async def test_add_delete_vectorsets(
existing_lconfig, # Initial configuration
updated_lconfig, # Configuration after adding the vectorset
updated_lconfig, # Configuration right before deleting the vectorset
existing_lconfig, # Initial configuration
existing_lconfig, # Initial configuration
],
) as get_configuration:
with patch(f"{MODULE}.learning_proxy.update_configuration", return_value=None):
Expand Down Expand Up @@ -123,6 +124,17 @@ async def test_add_delete_vectorsets(
vs = await datamanagers.vectorsets.get(txn, kbid=kbid, vectorset_id=vectorset_id)
assert vs is None

# Deleting your last vectorset is not allowed
resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/multilingual")
assert resp.status_code == 409, resp.text
assert "Deletion of your last vectorset is not allowed" in resp.json()["detail"]

# But deleting twice is okay
resp = await nucliadb_manager.delete(f"/kb/{kbid}/vectorsets/{vectorset_id}")
# XXX: however, we get the same error as before due to our lazy
# check strategy. This shuold be a 200
assert resp.status_code == 409, resp.text


async def test_learning_config_errors_are_proxied_correctly(
nucliadb_manager: AsyncClient,
Expand Down Expand Up @@ -234,14 +246,11 @@ async def test_vectorset_migration(
await _check_search(nucliadb_reader, kbid)

# Now add a new vectorset
request = NewVectorSetRequest(
kbid=kbid,
vectorset_id="en-2024-05-06",
similarity=utils_pb2.VectorSimilarity.COSINE,
vector_dimension=1024,
vectorset_id = "en-2024-05-06"
resp = await add_vectorset(
nucliadb_manager, kbid, vectorset_id, similarity=SimilarityFunction.COSINE, vector_dimension=1024
)
resp: NewVectorSetResponse = await nucliadb_grpc.NewVectorSet(request) # type: ignore
assert resp.status == NewVectorSetResponse.Status.OK # type: ignore
assert resp.status_code == 200

# Ingest a new broker message as if it was coming from the migration
bm2 = BrokerMessage(
Expand Down

0 comments on commit d0a6ffd

Please sign in to comment.