LASR-at-Home · m-barker · Mar 3, 2024 · Mar 3, 2024 · Mar 3, 2024 · Mar 4, 2024
diff --git a/common/vector_databases/lasr_vector_databases_faiss/CMakeLists.txt b/common/vector_databases/lasr_vector_databases_faiss/CMakeLists.txt
@@ -7,7 +7,11 @@ project(lasr_vector_databases_faiss)
 ## Find catkin macros and libraries
 ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
 ## is used, also find other catkin packages
-find_package(catkin REQUIRED catkin_virtualenv)
+find_package(catkin REQUIRED catkin_virtualenv COMPONENTS
+rospy
+std_msgs
+message_generation
+)
 
 ## System dependencies are found with CMake's conventions
 # find_package(Boost REQUIRED COMPONENTS system)
@@ -53,11 +57,11 @@ catkin_generate_virtualenv(
 # )
 
 ## Generate services in the 'srv' folder
-# add_service_files(
-#   FILES
-#   Service1.srv
-#   Service2.srv
-# )
+add_service_files(
+  FILES
+  TxtIndex.srv
+  TxtQuery.srv
+)
 
 # Generate actions in the 'action' folder
 # add_action_files(
@@ -66,11 +70,10 @@ catkin_generate_virtualenv(
 # )
 
 # Generate added messages and services with any dependencies listed here
-# generate_messages(
-#   DEPENDENCIES
-#   actionlib_msgs
-#   geometry_msgs
-# )
+generate_messages(
+  DEPENDENCIES
+  std_msgs
+)
 
 ################################################
 ## Declare ROS dynamic reconfigure parameters ##
@@ -157,22 +160,13 @@ include_directories(
 
 ## Mark executable scripts (Python etc.) for installation
 ## in contrast to setup.py, you can choose the destination
-# catkin_install_python(PROGRAMS
-#   nodes/qualification
-#   nodes/actions/wait_greet
-#   nodes/actions/identify
-#   nodes/actions/greet
-#   nodes/actions/get_name
-#   nodes/actions/learn_face
-#   nodes/actions/get_command
-#   nodes/actions/guide
-#   nodes/actions/find_person
-#   nodes/actions/detect_people
-#   nodes/actions/receive_object
-#   nodes/actions/handover_object
-#   nodes/better_qualification
-#   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
-# )
+catkin_install_python(PROGRAMS
+  nodes/txt_index_service
+  nodes/txt_query_service
+  scripts/test_index_service.py
+  scripts/test_query_service.py
+  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+)
 
 ## Mark executables for installation
 ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
@@ -196,11 +190,10 @@ include_directories(
 # )
 
 ## Mark other files for installation (e.g. launch and bag files, etc.)
-# install(FILES
-#   # myfile1
-#   # myfile2
-#   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
-# )
+install(FILES
+  requirements.txt
+  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+)
 
 #############
 ## Testing ##

diff --git a/common/vector_databases/lasr_vector_databases_faiss/doc/TECHNICAL.md b/common/vector_databases/lasr_vector_databases_faiss/doc/TECHNICAL.md
diff --git a/common/vector_databases/lasr_vector_databases_faiss/doc/USAGE.md b/common/vector_databases/lasr_vector_databases_faiss/doc/USAGE.md
@@ -0,0 +1,33 @@
+This package currently contains two services `txt_index_service` and `txt_query_service`. These services are used to create and search (respectively) a vector database of natural language sentence embeddings.
+
+# Index Service
+The Index service is used to create a [FAISS](https://github.com/facebookresearch/faiss) index object containing a set of sentence embeddings, where each sentence is assumed to be a line in a given `.txt` file. This Index object is saved to disk at a specified location, and can be thought of as a Vector Database. 
+
+## Request
+The request takes two string parameters: `txt_path` which is the path to the `.txt` file we wish to create sentence embeddings for, where each line in this file is treated as a sentence; and `index_path` which is the path to a `.index` file that will be created by the Service.
+
+## Response
+No response is given from this service.
+
+## Example Usage
+Please see the `scripts/test_index_service.py` script for a simple example of sending a request to the service.
+
+# Query Service
+The query service is used to search the `.index` file created by the Index Service to find the most similar sentences given an input query sentence.
+
+## Request
+The request requires four fields:
+
+1. `txt_path` -- this is a `string` that is the path to the txt file that contains the original sentences that the `.index` file was populated with.
+2. `index_path` -- this is a `string` that is the path to the `.index` file that was created with the Index Service, on the same txt file as the `txt_path`.
+3. `query_sentence` -- this is a `string` that is the sentence that you wish to query the index with and find the most similar sentence.
+4. `k` -- this is a `uint8` that is the number of closest sentences you wish to return.
+
+## Response
+The response contains two fields:
+
+1. `closest_sentences` -- this is an ordered list of `string`s that contain the closest sentences to the given query sentence.
+2. `cosine_similaities` -- this is an ordered list of `float32`s that contain the cosine similarity scores of the closest sentences.
+
+## Example Usage
+Please see the `scripts/test_query_service.py` script for a simple example of sending a request to the service.
diff --git a/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_index_service b/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_index_service
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+import rospy
+import numpy as np
+from lasr_vector_databases_faiss.srv import TxtIndexRequest, TxtIndexResponse, TxtIndex
+from lasr_vector_databases_faiss import (
+    load_model,
+    parse_txt_file,
+    get_sentence_embeddings,
+    create_vector_database,
+)
+
+
+class TxtIndexService:
+    def __init__(self):
+        rospy.init_node("txt_index_service")
+        rospy.Service("lasr_faiss/txt_index", TxtIndex, self.execute_cb)
+        self._sentence_embedding_model = load_model()
+        rospy.loginfo("Text index service started")
+
+    def execute_cb(self, req: TxtIndexRequest):
+        txt_fp: str = req.txt_path
+        sentences_to_embed: list[str] = parse_txt_file(txt_fp)
+        sentence_embeddings: np.ndarray = get_sentence_embeddings(
+            sentences_to_embed, self._sentence_embedding_model
+        )
+        index_path: str = req.index_path
+        create_vector_database(sentence_embeddings, index_path)
+        return TxtIndexResponse()
+
+
+if __name__ == "__main__":
+    TxtIndexService()
+    rospy.spin()
diff --git a/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_query_service b/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_query_service
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+import rospy
+import numpy as np
+from lasr_vector_databases_faiss.srv import TxtQueryRequest, TxtQueryResponse, TxtQuery
+from lasr_vector_databases_faiss import (
+    load_model,
+    parse_txt_file,
+    get_sentence_embeddings,
+    load_vector_database,
+    query_database,
+)
+
+
+class TxtQueryService:
+    def __init__(self):
+        rospy.init_node("txt_query_service")
+        rospy.Service("lasr_faiss/txt_query", TxtQuery, self.execute_cb)
+        self._sentence_embedding_model = load_model()
+        rospy.loginfo("Text Query service started")
+
+    def execute_cb(self, req: TxtQueryRequest) -> TxtQueryResponse:
+        txt_fp: str = req.txt_path
+        index_path: str = req.index_path
+        query_sentence: str = req.query_sentence
+        possible_matches: list[str] = parse_txt_file(txt_fp)
+        query_embedding: np.ndarray = get_sentence_embeddings(
+            [query_sentence], self._sentence_embedding_model  # requires list of strings
+        )
+        distances, indices = query_database(index_path, query_embedding, k=req.k)
+        nearest_matches = [possible_matches[i] for i in indices[0]]
+
+        return TxtQueryResponse(
+            closest_sentences=nearest_matches,
+            cosine_similarities=distances[0].tolist(),
+        )
+
+
+if __name__ == "__main__":
+    TxtQueryService()
+    rospy.spin()
diff --git a/common/vector_databases/lasr_vector_databases_faiss/package.xml b/common/vector_databases/lasr_vector_databases_faiss/package.xml
@@ -50,6 +50,8 @@
   <!--   <doc_depend>doxygen</doc_depend> -->
   <buildtool_depend>catkin</buildtool_depend>
   <build_depend>catkin_virtualenv</build_depend>
+  <build_depend>message_generation</build_depend>
+  <exec_depend>message_runtime</exec_depend>
   <!-- The export tag contains other, unspecified, tags -->
   <export>
     <!-- Other tools can request additional information be placed here -->

diff --git a/common/vector_databases/lasr_vector_databases_faiss/requirements.in b/common/vector_databases/lasr_vector_databases_faiss/requirements.in
@@ -1 +1,3 @@
-faiss-cpu
+faiss-cpu
+sentence-transformers
+torch
diff --git a/common/vector_databases/lasr_vector_databases_faiss/scripts/test_index_service.py b/common/vector_databases/lasr_vector_databases_faiss/scripts/test_index_service.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+import rospy
+from lasr_vector_databases_faiss.srv import TxtIndex, TxtIndexRequest
+
+request = TxtIndexRequest()
+
+request.txt_path = (
+    "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.txt"
+)
+
+request.index_path = (
+    "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.index"
+)
+rospy.ServiceProxy("lasr_faiss/txt_index", TxtIndex)(request)
diff --git a/common/vector_databases/lasr_vector_databases_faiss/scripts/test_query_service.py b/common/vector_databases/lasr_vector_databases_faiss/scripts/test_query_service.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+import rospy
+from lasr_vector_databases_faiss.srv import TxtQuery, TxtQueryRequest
+
+request = TxtQueryRequest()
+
+request.txt_path = (
+    "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.txt"
+)
+
+request.index_path = (
+    "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.index"
+)
+
+request.query_sentence = "Do French like snails?"
+
+request.k = 3
+
+response = rospy.ServiceProxy("lasr_faiss/txt_query", TxtQuery)(request)
+
+print(response.closest_sentences)
+print(response.cosine_similarities)
diff --git a/.../vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/__init__.py b/.../vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/__init__.py
@@ -0,0 +1,2 @@
+from .database_utils import create_vector_database, load_vector_database, query_database
+from .get_sentence_embeddings import get_sentence_embeddings, load_model, parse_txt_file
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .database_utils import create_vector_database, load_vector_database, query_database
		from .get_sentence_embeddings import get_sentence_embeddings, load_model, parse_txt_file