From 2c7de49f0568910fdec2fc20b9994ffd9a4a2105 Mon Sep 17 00:00:00 2001 From: Paul Makles Date: Tue, 6 Feb 2024 15:31:14 +0000 Subject: [PATCH] feat(lasr_vector_database_weaviate): service for interacting with database --- .../lasr_vector_database_weaviate/README.md | 131 ++++++++++++++++++ .../launch/server.launch | 18 +++ .../lasr_vector_database_weaviate/package.xml | 1 + .../requirements.in | 1 + .../requirements.txt | 16 +-- .../scripts/server | 92 +++++++++++- 6 files changed, 246 insertions(+), 13 deletions(-) create mode 100644 common/helpers/lasr_vector_database_weaviate/README.md create mode 100644 common/helpers/lasr_vector_database_weaviate/launch/server.launch diff --git a/common/helpers/lasr_vector_database_weaviate/README.md b/common/helpers/lasr_vector_database_weaviate/README.md new file mode 100644 index 000000000..a09cce10c --- /dev/null +++ b/common/helpers/lasr_vector_database_weaviate/README.md @@ -0,0 +1,131 @@ +# lasr_vector_database_weaviate + +Integration of the Weaviate vector database into LASR stack. + +This package is maintained by: +- [Paul Makles](mailto:me@insrt.uk) + +## Prerequisites + +This package depends on the following ROS packages: +- catkin (buildtool) +- catkin_virtualenv (build) +- lasr_vector_database_msgs + +This packages requires Python 3.10 to be present. + +This package has 18 Python dependencies: +- [weaviate-client](https://pypi.org/project/weaviate-client)==v4.4b2 +- [requests](https://pypi.org/project/requests)==2.31.0 +- .. and 16 sub dependencies + +Currently this package will only work on Linux amd64. + +## Usage + +Ask the package maintainer to write a `doc/USAGE.md` for their package! + +## Example + +Create a new collection called "Hello": + +```bash +rosservice call /database/vectors/weaviate/create_collection "name: 'Hello' +skip_if_exists: false +clear_if_exists: false" +``` + +Insert some vectors: + +```bash +rosservice call /database/vectors/weaviate/insert "name: 'Hello' +properties: +- key: 'thing' + value: 'true' +vector: +- 0 +- 1 +- 0" + +rosservice call /database/vectors/weaviate/insert "name: 'Hello' +properties: +- key: 'thing' + value: 'false' +vector: +- 1 +- 0 +- 1" +``` + +Now you can query the database: + +```bash +rosservice call /database/vectors/weaviate/query "name: 'Hello' +limit: 3 +vector: +- 1 +- 1 +- 1" +``` + +You should receive results like: + +```yaml +results: + - + certainty: 0.9082483053207397 + properties: + - + key: "thing" + value: "false" + - + certainty: 0.7886751294136047 + properties: + - + key: "thing" + value: "true" +``` + +## Technical Overview + +Ask the package maintainer to write a `doc/TECHNICAL.md` for their package! + +## ROS Definitions + +### Launch Files + +#### `server` + +Start the Weviate vector database + +```bash +# Default directory and ports +roslaunch lasr_vector_database_weaviate server.launch + +# Customise the ports used +roslaunch lasr_vector_database_weaviate server.launch http_port:=40050 grpc_port:=40051 + +# Create a separate database instance +roslaunch lasr_vector_database_weaviate server.launch db_name:=your_database_name +``` + +| Argument | Default | Description | +|:-:|:-:|---| +| version | 1.22.11 | Version of Weaviate to download and run | +| http_port | 50050 | Port to bind HTTP service to | +| grpc_port | 50051 | Port to bind GRPC service to | +| db_name | weaviate | Custom database name | + + + +### Messages + +This package has no messages. + +### Services + +This package has no services. + +### Actions + +This package has no actions. diff --git a/common/helpers/lasr_vector_database_weaviate/launch/server.launch b/common/helpers/lasr_vector_database_weaviate/launch/server.launch new file mode 100644 index 000000000..d473aad64 --- /dev/null +++ b/common/helpers/lasr_vector_database_weaviate/launch/server.launch @@ -0,0 +1,18 @@ + + Start the Weviate vector database + + http_port:=40050 grpc_port:=40051 + db_name:=your_database_name + + + + + + + + + + + + + \ No newline at end of file diff --git a/common/helpers/lasr_vector_database_weaviate/package.xml b/common/helpers/lasr_vector_database_weaviate/package.xml index bcd26f468..c4aeb947f 100644 --- a/common/helpers/lasr_vector_database_weaviate/package.xml +++ b/common/helpers/lasr_vector_database_weaviate/package.xml @@ -50,6 +50,7 @@ catkin catkin_virtualenv + lasr_vector_database_msgs diff --git a/common/helpers/lasr_vector_database_weaviate/requirements.in b/common/helpers/lasr_vector_database_weaviate/requirements.in index 2f82b7a9c..d6a337c75 100644 --- a/common/helpers/lasr_vector_database_weaviate/requirements.in +++ b/common/helpers/lasr_vector_database_weaviate/requirements.in @@ -1 +1,2 @@ weaviate-client==v4.4b2 +requests==2.31.0 \ No newline at end of file diff --git a/common/helpers/lasr_vector_database_weaviate/requirements.txt b/common/helpers/lasr_vector_database_weaviate/requirements.txt index 0e1cdb9ab..dd2387340 100644 --- a/common/helpers/lasr_vector_database_weaviate/requirements.txt +++ b/common/helpers/lasr_vector_database_weaviate/requirements.txt @@ -1,19 +1,19 @@ annotated-types==0.6.0 # via pydantic authlib==1.3.0 # via weaviate-client -certifi==2023.11.17 # via requests +certifi==2024.2.2 # via requests cffi==1.16.0 # via cryptography charset-normalizer==3.3.2 # via requests -cryptography==42.0.1 # via authlib -grpcio==1.60.0 # via grpcio-tools, weaviate-client -grpcio-tools==1.60.0 # via weaviate-client +cryptography==42.0.2 # via authlib +grpcio==1.60.1 # via grpcio-tools, weaviate-client +grpcio-tools==1.60.1 # via weaviate-client idna==3.6 # via requests protobuf==4.25.2 # via grpcio-tools pycparser==2.21 # via cffi -pydantic==2.5.3 # via weaviate-client -pydantic-core==2.14.6 # via pydantic -requests==2.31.0 # via weaviate-client +pydantic==2.6.1 # via weaviate-client +pydantic-core==2.16.2 # via pydantic +requests==2.31.0 # via -r requirements.in, weaviate-client typing-extensions==4.9.0 # via pydantic, pydantic-core -urllib3==2.1.0 # via requests +urllib3==2.2.0 # via requests validators==0.22.0 # via weaviate-client weaviate-client==v4.4b2 # via -r requirements.in diff --git a/common/helpers/lasr_vector_database_weaviate/scripts/server b/common/helpers/lasr_vector_database_weaviate/scripts/server index e0e65375f..159288843 100644 --- a/common/helpers/lasr_vector_database_weaviate/scripts/server +++ b/common/helpers/lasr_vector_database_weaviate/scripts/server @@ -1,11 +1,18 @@ #!/usr/bin/env python3 import os import tarfile +import subprocess import urllib.request +import weaviate +import weaviate.classes as wvc + import rospy import rospkg +from lasr_vector_database_msgs.msg import Property, SearchResult +from lasr_vector_database_msgs.srv import CreateCollection, CreateCollectionRequest, CreateCollectionResponse, InsertVector, InsertVectorRequest, InsertVectorResponse, QueryVector, QueryVectorRequest, QueryVectorResponse + # Initialise rospy (required for params) rospy.init_node('weaviate_vector_database', anonymous=True) @@ -13,6 +20,7 @@ rospy.init_node('weaviate_vector_database', anonymous=True) VERSION = rospy.get_param('~version', "1.22.11") HTTP_PORT = rospy.get_param('~http_port', "50050") GRPC_PORT = rospy.get_param('~grpc_port', "50051") +DB_NAME = rospy.get_param('~db_name', "weaviate") # Hardcode platform & arch, we never change these for our purposes PLATFORM="linux" @@ -26,8 +34,7 @@ package_path = rp.get_path("lasr_vector_database_weaviate") os.chdir(os.path.abspath(os.path.join(package_path, 'bin'))) # Also select database folder -DB_PATH = os.path.abspath(os.path.join(package_path, 'data', 'weaviate')) -# TODO: allow multiple vector databases +DB_PATH = os.path.abspath(os.path.join(package_path, 'data', DB_NAME)) # Determine constants ARCHIVE_FORMAT = "tar.gz" @@ -63,6 +70,81 @@ os.environ["ENABLE_MODULES"] = "" os.environ["CLUSTER_HOSTNAME"] = "node1" os.environ["GRPC_PORT"] = GRPC_PORT -# Hand over to weaviate -print(f"Starting Weaviate v{VERSION}") -os.execlp(os.path.abspath(FILENAME), '--host', '0.0.0.0', '--port', HTTP_PORT, '--scheme', 'http') +# Boot up Weaviate +rospy.loginfo(f"Starting Weaviate v{VERSION}") +subprocess.Popen([os.path.abspath(FILENAME), '--host', '0.0.0.0', '--port', HTTP_PORT, '--scheme', 'http']) + +# Wait for it to be ready +import requests +import time + +while True: + try: + requests.head(f"http://localhost:{HTTP_PORT}") + break + except requests.exceptions.ConnectionError: + time.sleep(0.5) + rospy.logdebug("Waiting for server to go up...") + +client = weaviate.connect_to_local( + port=int(HTTP_PORT), + grpc_port=int(GRPC_PORT), +) + +# Now we can initialise the service +def create_collection(request: CreateCollectionRequest) -> CreateCollectionResponse: + if client.collections.exists(request.name): + if request.skip_if_exists: + return CreateCollectionResponse() + + if request.clear_if_exists: + client.collections.delete(request.name) + + client.collections.create( + request.name, + vectorizer_config=wvc.Configure.Vectorizer.none(), + ) + + return CreateCollectionResponse() + +def insert_vector(request: InsertVectorRequest) -> InsertVectorResponse: + properties = {} + for property in request.properties: + properties[property.key] = property.value + + col = client.collections.get(request.name) + col.data.insert( + properties=properties, + vector=list(request.vector) + ) + + return InsertVectorResponse() + +def query_vector(request: QueryVectorRequest) -> QueryVectorResponse: + col = client.collections.get(request.name) + result = col.query.near_vector( + near_vector=request.vector, + limit=request.limit, + return_metadata=wvc.MetadataQuery(certainty=True) + ) + + results = [] + for object in result.objects: + results.append( + SearchResult( + certainty=object.metadata.certainty, + properties=[ + Property(key=key, value=object.properties[key]) + for key + in object.properties.keys() + ] + ) + ) + + return QueryVectorResponse(results) + +rospy.Service(f'/database/vectors/{DB_NAME}/create_collection', CreateCollection, create_collection) +rospy.Service(f'/database/vectors/{DB_NAME}/insert', InsertVector, insert_vector) +rospy.Service(f'/database/vectors/{DB_NAME}/query', QueryVector, query_vector) +rospy.loginfo('Weaviate database service ready!') +rospy.spin()