Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: text Services for the vector database package, and enhanced back-end #137

Closed
wants to merge 11 commits into from
57 changes: 25 additions & 32 deletions common/vector_databases/lasr_vector_databases_faiss/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ project(lasr_vector_databases_faiss)
## Find catkin macros and libraries
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
## is used, also find other catkin packages
find_package(catkin REQUIRED catkin_virtualenv)
find_package(catkin REQUIRED catkin_virtualenv COMPONENTS
rospy
std_msgs
message_generation
)

## System dependencies are found with CMake's conventions
# find_package(Boost REQUIRED COMPONENTS system)
Expand Down Expand Up @@ -53,11 +57,11 @@ catkin_generate_virtualenv(
# )

## Generate services in the 'srv' folder
# add_service_files(
# FILES
# Service1.srv
# Service2.srv
# )
add_service_files(
FILES
TxtIndex.srv
TxtQuery.srv
)

# Generate actions in the 'action' folder
# add_action_files(
Expand All @@ -66,11 +70,10 @@ catkin_generate_virtualenv(
# )

# Generate added messages and services with any dependencies listed here
# generate_messages(
# DEPENDENCIES
# actionlib_msgs
# geometry_msgs
# )
generate_messages(
DEPENDENCIES
std_msgs
)

################################################
## Declare ROS dynamic reconfigure parameters ##
Expand Down Expand Up @@ -157,22 +160,13 @@ include_directories(

## Mark executable scripts (Python etc.) for installation
## in contrast to setup.py, you can choose the destination
# catkin_install_python(PROGRAMS
# nodes/qualification
# nodes/actions/wait_greet
# nodes/actions/identify
# nodes/actions/greet
# nodes/actions/get_name
# nodes/actions/learn_face
# nodes/actions/get_command
# nodes/actions/guide
# nodes/actions/find_person
# nodes/actions/detect_people
# nodes/actions/receive_object
# nodes/actions/handover_object
# nodes/better_qualification
# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
# )
catkin_install_python(PROGRAMS
nodes/txt_index_service
nodes/txt_query_service
scripts/test_index_service.py
scripts/test_query_service.py
DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
)

## Mark executables for installation
## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
Expand All @@ -196,11 +190,10 @@ include_directories(
# )

## Mark other files for installation (e.g. launch and bag files, etc.)
# install(FILES
# # myfile1
# # myfile2
# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
# )
install(FILES
requirements.txt
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
)

#############
## Testing ##
Expand Down
Empty file.
33 changes: 33 additions & 0 deletions common/vector_databases/lasr_vector_databases_faiss/doc/USAGE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
This package currently contains two services `txt_index_service` and `txt_query_service`. These services are used to create and search (respectively) a vector database of natural language sentence embeddings.

# Index Service
The Index service is used to create a [FAISS](https://github.com/facebookresearch/faiss) index object containing a set of sentence embeddings, where each sentence is assumed to be a line in a given `.txt` file. This Index object is saved to disk at a specified location, and can be thought of as a Vector Database.

## Request
The request takes two string parameters: `txt_path` which is the path to the `.txt` file we wish to create sentence embeddings for, where each line in this file is treated as a sentence; and `index_path` which is the path to a `.index` file that will be created by the Service.

## Response
No response is given from this service.

## Example Usage
Please see the `scripts/test_index_service.py` script for a simple example of sending a request to the service.

# Query Service
The query service is used to search the `.index` file created by the Index Service to find the most similar sentences given an input query sentence.

## Request
The request requires four fields:

1. `txt_path` -- this is a `string` that is the path to the txt file that contains the original sentences that the `.index` file was populated with.
2. `index_path` -- this is a `string` that is the path to the `.index` file that was created with the Index Service, on the same txt file as the `txt_path`.
3. `query_sentence` -- this is a `string` that is the sentence that you wish to query the index with and find the most similar sentence.
4. `k` -- this is a `uint8` that is the number of closest sentences you wish to return.

## Response
The response contains two fields:

1. `closest_sentences` -- this is an ordered list of `string`s that contain the closest sentences to the given query sentence.
2. `cosine_similaities` -- this is an ordered list of `float32`s that contain the cosine similarity scores of the closest sentences.

## Example Usage
Please see the `scripts/test_query_service.py` script for a simple example of sending a request to the service.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
import rospy
import numpy as np
from lasr_vector_databases_faiss.srv import TxtIndexRequest, TxtIndexResponse, TxtIndex
from lasr_vector_databases_faiss import (
load_model,
parse_txt_file,
get_sentence_embeddings,
create_vector_database,
)


class TxtIndexService:
def __init__(self):
rospy.init_node("txt_index_service")
rospy.Service("lasr_faiss/txt_index", TxtIndex, self.execute_cb)
self._sentence_embedding_model = load_model()
rospy.loginfo("Text index service started")

def execute_cb(self, req: TxtIndexRequest):
txt_fp: str = req.txt_path
sentences_to_embed: list[str] = parse_txt_file(txt_fp)
sentence_embeddings: np.ndarray = get_sentence_embeddings(
sentences_to_embed, self._sentence_embedding_model
)
index_path: str = req.index_path
create_vector_database(sentence_embeddings, index_path)
return TxtIndexResponse()


if __name__ == "__main__":
TxtIndexService()
rospy.spin()
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python3
import rospy
import numpy as np
from lasr_vector_databases_faiss.srv import TxtQueryRequest, TxtQueryResponse, TxtQuery
from lasr_vector_databases_faiss import (
load_model,
parse_txt_file,
get_sentence_embeddings,
load_vector_database,
query_database,
)


class TxtQueryService:
def __init__(self):
rospy.init_node("txt_query_service")
rospy.Service("lasr_faiss/txt_query", TxtQuery, self.execute_cb)
self._sentence_embedding_model = load_model()
rospy.loginfo("Text Query service started")

def execute_cb(self, req: TxtQueryRequest) -> TxtQueryResponse:
txt_fp: str = req.txt_path
index_path: str = req.index_path
query_sentence: str = req.query_sentence
possible_matches: list[str] = parse_txt_file(txt_fp)
query_embedding: np.ndarray = get_sentence_embeddings(
[query_sentence], self._sentence_embedding_model # requires list of strings
)
distances, indices = query_database(index_path, query_embedding, k=req.k)
nearest_matches = [possible_matches[i] for i in indices[0]]

return TxtQueryResponse(
closest_sentences=nearest_matches,
cosine_similarities=distances[0].tolist(),
)


if __name__ == "__main__":
TxtQueryService()
rospy.spin()
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
<!-- <doc_depend>doxygen</doc_depend> -->
<buildtool_depend>catkin</buildtool_depend>
<build_depend>catkin_virtualenv</build_depend>
<build_depend>message_generation</build_depend>
<exec_depend>message_runtime</exec_depend>
<!-- The export tag contains other, unspecified, tags -->
<export>
<!-- Other tools can request additional information be placed here -->
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
faiss-cpu
faiss-cpu
sentence-transformers
torch
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python3
import rospy
from lasr_vector_databases_faiss.srv import TxtIndex, TxtIndexRequest

request = TxtIndexRequest()

request.txt_path = (
"/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.txt"
)

request.index_path = (
"/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.index"
)
rospy.ServiceProxy("lasr_faiss/txt_index", TxtIndex)(request)
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env python3
import rospy
from lasr_vector_databases_faiss.srv import TxtQuery, TxtQueryRequest

request = TxtQueryRequest()

request.txt_path = (
"/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.txt"
)

request.index_path = (
"/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.index"
)

request.query_sentence = "Do French like snails?"

request.k = 3

response = rospy.ServiceProxy("lasr_faiss/txt_query", TxtQuery)(request)

print(response.closest_sentences)
print(response.cosine_similarities)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .database_utils import create_vector_database, load_vector_database, query_database
from .get_sentence_embeddings import get_sentence_embeddings, load_model, parse_txt_file
Loading
Loading