diff --git a/common/vector_databases/lasr_vector_databases_faiss/CMakeLists.txt b/common/vector_databases/lasr_vector_databases_faiss/CMakeLists.txt index 1d550fbad..12fe169e8 100644 --- a/common/vector_databases/lasr_vector_databases_faiss/CMakeLists.txt +++ b/common/vector_databases/lasr_vector_databases_faiss/CMakeLists.txt @@ -7,7 +7,11 @@ project(lasr_vector_databases_faiss) ## Find catkin macros and libraries ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) ## is used, also find other catkin packages -find_package(catkin REQUIRED catkin_virtualenv) +find_package(catkin REQUIRED catkin_virtualenv COMPONENTS +rospy +std_msgs +message_generation +) ## System dependencies are found with CMake's conventions # find_package(Boost REQUIRED COMPONENTS system) @@ -53,11 +57,11 @@ catkin_generate_virtualenv( # ) ## Generate services in the 'srv' folder -# add_service_files( -# FILES -# Service1.srv -# Service2.srv -# ) +add_service_files( + FILES + TxtIndex.srv + TxtQuery.srv +) # Generate actions in the 'action' folder # add_action_files( @@ -66,11 +70,10 @@ catkin_generate_virtualenv( # ) # Generate added messages and services with any dependencies listed here -# generate_messages( -# DEPENDENCIES -# actionlib_msgs -# geometry_msgs -# ) +generate_messages( + DEPENDENCIES + std_msgs +) ################################################ ## Declare ROS dynamic reconfigure parameters ## @@ -157,22 +160,13 @@ include_directories( ## Mark executable scripts (Python etc.) for installation ## in contrast to setup.py, you can choose the destination -# catkin_install_python(PROGRAMS -# nodes/qualification -# nodes/actions/wait_greet -# nodes/actions/identify -# nodes/actions/greet -# nodes/actions/get_name -# nodes/actions/learn_face -# nodes/actions/get_command -# nodes/actions/guide -# nodes/actions/find_person -# nodes/actions/detect_people -# nodes/actions/receive_object -# nodes/actions/handover_object -# nodes/better_qualification -# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} -# ) +catkin_install_python(PROGRAMS + nodes/txt_index_service + nodes/txt_query_service + scripts/test_index_service.py + scripts/test_query_service.py + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) ## Mark executables for installation ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html @@ -196,11 +190,10 @@ include_directories( # ) ## Mark other files for installation (e.g. launch and bag files, etc.) -# install(FILES -# # myfile1 -# # myfile2 -# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} -# ) +install(FILES + requirements.txt + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +) ############# ## Testing ## diff --git a/common/vector_databases/lasr_vector_databases_faiss/doc/TECHNICAL.md b/common/vector_databases/lasr_vector_databases_faiss/doc/TECHNICAL.md new file mode 100644 index 000000000..e69de29bb diff --git a/common/vector_databases/lasr_vector_databases_faiss/doc/USAGE.md b/common/vector_databases/lasr_vector_databases_faiss/doc/USAGE.md new file mode 100644 index 000000000..d17914026 --- /dev/null +++ b/common/vector_databases/lasr_vector_databases_faiss/doc/USAGE.md @@ -0,0 +1,33 @@ +This package currently contains two services `txt_index_service` and `txt_query_service`. These services are used to create and search (respectively) a vector database of natural language sentence embeddings. + +# Index Service +The Index service is used to create a [FAISS](https://github.com/facebookresearch/faiss) index object containing a set of sentence embeddings, where each sentence is assumed to be a line in a given `.txt` file. This Index object is saved to disk at a specified location, and can be thought of as a Vector Database. + +## Request +The request takes two string parameters: `txt_path` which is the path to the `.txt` file we wish to create sentence embeddings for, where each line in this file is treated as a sentence; and `index_path` which is the path to a `.index` file that will be created by the Service. + +## Response +No response is given from this service. + +## Example Usage +Please see the `scripts/test_index_service.py` script for a simple example of sending a request to the service. + +# Query Service +The query service is used to search the `.index` file created by the Index Service to find the most similar sentences given an input query sentence. + +## Request +The request requires four fields: + +1. `txt_path` -- this is a `string` that is the path to the txt file that contains the original sentences that the `.index` file was populated with. +2. `index_path` -- this is a `string` that is the path to the `.index` file that was created with the Index Service, on the same txt file as the `txt_path`. +3. `query_sentence` -- this is a `string` that is the sentence that you wish to query the index with and find the most similar sentence. +4. `k` -- this is a `uint8` that is the number of closest sentences you wish to return. + +## Response +The response contains two fields: + +1. `closest_sentences` -- this is an ordered list of `string`s that contain the closest sentences to the given query sentence. +2. `cosine_similaities` -- this is an ordered list of `float32`s that contain the cosine similarity scores of the closest sentences. + +## Example Usage +Please see the `scripts/test_query_service.py` script for a simple example of sending a request to the service. \ No newline at end of file diff --git a/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_index_service b/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_index_service new file mode 100644 index 000000000..2d7ce7949 --- /dev/null +++ b/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_index_service @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +import rospy +import numpy as np +from lasr_vector_databases_faiss.srv import TxtIndexRequest, TxtIndexResponse, TxtIndex +from lasr_vector_databases_faiss import ( + load_model, + parse_txt_file, + get_sentence_embeddings, + create_vector_database, +) + + +class TxtIndexService: + def __init__(self): + rospy.init_node("txt_index_service") + rospy.Service("lasr_faiss/txt_index", TxtIndex, self.execute_cb) + self._sentence_embedding_model = load_model() + rospy.loginfo("Text index service started") + + def execute_cb(self, req: TxtIndexRequest): + txt_fp: str = req.txt_path + sentences_to_embed: list[str] = parse_txt_file(txt_fp) + sentence_embeddings: np.ndarray = get_sentence_embeddings( + sentences_to_embed, self._sentence_embedding_model + ) + index_path: str = req.index_path + create_vector_database(sentence_embeddings, index_path) + return TxtIndexResponse() + + +if __name__ == "__main__": + TxtIndexService() + rospy.spin() diff --git a/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_query_service b/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_query_service new file mode 100644 index 000000000..dae0970a2 --- /dev/null +++ b/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_query_service @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +import rospy +import numpy as np +from lasr_vector_databases_faiss.srv import TxtQueryRequest, TxtQueryResponse, TxtQuery +from lasr_vector_databases_faiss import ( + load_model, + parse_txt_file, + get_sentence_embeddings, + load_vector_database, + query_database, +) + + +class TxtQueryService: + def __init__(self): + rospy.init_node("txt_query_service") + rospy.Service("lasr_faiss/txt_query", TxtQuery, self.execute_cb) + self._sentence_embedding_model = load_model() + rospy.loginfo("Text Query service started") + + def execute_cb(self, req: TxtQueryRequest) -> TxtQueryResponse: + txt_fp: str = req.txt_path + index_path: str = req.index_path + query_sentence: str = req.query_sentence + possible_matches: list[str] = parse_txt_file(txt_fp) + query_embedding: np.ndarray = get_sentence_embeddings( + [query_sentence], self._sentence_embedding_model # requires list of strings + ) + distances, indices = query_database(index_path, query_embedding, k=req.k) + nearest_matches = [possible_matches[i] for i in indices[0]] + + return TxtQueryResponse( + closest_sentences=nearest_matches, + cosine_similarities=distances[0].tolist(), + ) + + +if __name__ == "__main__": + TxtQueryService() + rospy.spin() diff --git a/common/vector_databases/lasr_vector_databases_faiss/package.xml b/common/vector_databases/lasr_vector_databases_faiss/package.xml index f8128ea56..55594d8d3 100644 --- a/common/vector_databases/lasr_vector_databases_faiss/package.xml +++ b/common/vector_databases/lasr_vector_databases_faiss/package.xml @@ -50,6 +50,8 @@ catkin catkin_virtualenv + message_generation + message_runtime diff --git a/common/vector_databases/lasr_vector_databases_faiss/requirements.in b/common/vector_databases/lasr_vector_databases_faiss/requirements.in index 14955d38d..3259a62c9 100644 --- a/common/vector_databases/lasr_vector_databases_faiss/requirements.in +++ b/common/vector_databases/lasr_vector_databases_faiss/requirements.in @@ -1 +1,3 @@ -faiss-cpu \ No newline at end of file +faiss-cpu +sentence-transformers +torch \ No newline at end of file diff --git a/common/vector_databases/lasr_vector_databases_faiss/scripts/test_index_service.py b/common/vector_databases/lasr_vector_databases_faiss/scripts/test_index_service.py new file mode 100644 index 000000000..cc7f12f3c --- /dev/null +++ b/common/vector_databases/lasr_vector_databases_faiss/scripts/test_index_service.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +import rospy +from lasr_vector_databases_faiss.srv import TxtIndex, TxtIndexRequest + +request = TxtIndexRequest() + +request.txt_path = ( + "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.txt" +) + +request.index_path = ( + "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.index" +) +rospy.ServiceProxy("lasr_faiss/txt_index", TxtIndex)(request) diff --git a/common/vector_databases/lasr_vector_databases_faiss/scripts/test_query_service.py b/common/vector_databases/lasr_vector_databases_faiss/scripts/test_query_service.py new file mode 100644 index 000000000..4ae89e530 --- /dev/null +++ b/common/vector_databases/lasr_vector_databases_faiss/scripts/test_query_service.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +import rospy +from lasr_vector_databases_faiss.srv import TxtQuery, TxtQueryRequest + +request = TxtQueryRequest() + +request.txt_path = ( + "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.txt" +) + +request.index_path = ( + "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/gpsr/data/questions.index" +) + +request.query_sentence = "Do French like snails?" + +request.k = 3 + +response = rospy.ServiceProxy("lasr_faiss/txt_query", TxtQuery)(request) + +print(response.closest_sentences) +print(response.cosine_similarities) diff --git a/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/__init__.py b/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/__init__.py index e69de29bb..698927d9c 100644 --- a/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/__init__.py +++ b/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/__init__.py @@ -0,0 +1,2 @@ +from .database_utils import create_vector_database, load_vector_database, query_database +from .get_sentence_embeddings import get_sentence_embeddings, load_model, parse_txt_file diff --git a/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/command_similarity.py b/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/command_similarity.py deleted file mode 100755 index f7dbcbe24..000000000 --- a/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/command_similarity.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python3 -import os -import torch -import numpy as np - -# import rospy -import faiss # type: ignore -from sentence_transformers import SentenceTransformer # type: ignore -from typing import Optional - -DEVICE = "cuda" if torch.cuda.is_available() else "cpu" - - -def load_commands(command_path: str) -> list[str]: - """Loads the commands stored in the given txt file - into a list of string commands - Args: - command_path (str): path to the txt file containing - the commands -- assumes one command per line. - Returns: - list[str]: list of string commands where each entry in the - list is a command - """ - command_list = [] - with open(command_path, "r", encoding="utf8") as src: - for command in src: - # Strip newline char. - command_list.append(command[:-1]) - return command_list - - -def get_sentence_embeddings( - command_list: list[str], model: SentenceTransformer -) -> np.ndarray: - """Converts the list of command strings into an array of sentence - embeddings (where each command is a sentence and each sentence - is converted to a vector) - Args: - command_list (list[str]): list of string commands, where each - entry in the list is assumed to be a separate command - model (SentenceTransformer): model used to perform the embedding. - Assumes a method called encode that takes a list of strings - as input. - Returns: - np.ndarray: array of shape (n_commands, embedding_dim) - """ - - return model.encode( - command_list, - convert_to_numpy=True, - show_progress_bar=True, - batch_size=256, - device=DEVICE, - ) - - -def create_vector_database(vectors: np.ndarray) -> faiss.IndexFlatIP: - """Creates a vector database from an array of vectors of the same dimensionality - Args: - vectors (np.ndarray): shape (n_vectors, vector_dim) - - Returns: - faiss.IndexFlatIP: Flat index containing the vectors - """ - print("Creating vector database") - index_flat = faiss.IndexFlatIP(vectors.shape[1]) - faiss.normalize_L2(vectors) - index_flat.add(vectors) - print("Finished creating vector database") - return index_flat - - -def get_command_database( - index_path: str, command_path: Optional[str] = None -) -> faiss.IndexFlatL2: - """Gets a vector database containing a list of embedded commands. Creates the database - if the path does not exist, else, loads it into memory. - - Args: - index_path (str): Path to an existing faiss Index, or where to save a new one. - command_path (str, optional): Path of text file containing commands. - Only required if creating a new database. Defaults to None. - - Returns: - faiss.IndexFlatL2: faiss Index object containing the embedded commands. - """ - - if not os.path.exists(f"{index_path}.index"): - # rospy.loginfo("Creating new command vector database") - assert command_path is not None - command_list = load_commands(command_path) - model = SentenceTransformer("all-MiniLM-L6-v2") - command_embeddings = get_sentence_embeddings(command_list, model) - print(command_embeddings.shape) - command_database = create_vector_database(command_embeddings) - faiss.write_index(command_database, f"{index_path}.index") - # rospy.loginfo("Finished creating vector database") - else: - command_database = faiss.read_index(f"{index_path}.index") - - return command_database - - -def get_similar_commands( - command: str, - index_path: str, - command_path: str, - n_similar_commands: int = 100, - return_embeddings: bool = False, -) -> tuple[list[str], list[float]]: - """Gets the most similar commands to the given command string - Args: - command (str): command to compare against the database - index_path (str): path to the location to create or retrieve - the faiss index containing the embedded commands. - command_path (str): path to the txt file containing the commands - n_similar_commands (int, optional): number of similar commands to - return. Defaults to 100. - Returns: - list[str]: list of string commands, where each entry in the - list is a similar command - """ - command_database = get_command_database(index_path, command_path) - command_list = load_commands(command_path) - model = SentenceTransformer("all-MiniLM-L6-v2") - command_embedding = get_sentence_embeddings([command], model) - faiss.normalize_L2(command_embedding) - command_distances, command_indices = command_database.search( - command_embedding, n_similar_commands - ) - nearest_commands = [command_list[i] for i in command_indices[0]] - - if return_embeddings: - all_command_embeddings = get_sentence_embeddings(command_list, model) - # filter for only nererst commands - all_command_embeddings = all_command_embeddings[command_indices[0]] - return ( - nearest_commands, - list(command_distances[0]), - all_command_embeddings, - command_embedding, - ) - - return nearest_commands, list(command_distances[0]) - - -if __name__ == "__main__": - """Example usage of using this to find similar commands""" - command = "find Jared and asks if he needs help" - result, distances, command_embeddings, query_embedding = get_similar_commands( - command, - "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/qualification/data/command_index", - "/home/mattbarker/LASR/lasr_ws/src/lasr-base/tasks/qualification/data/command_list.txt", - n_similar_commands=1000, - return_embeddings=True, - ) - print(result) diff --git a/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/database_utils.py b/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/database_utils.py new file mode 100644 index 000000000..7a901ff3f --- /dev/null +++ b/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/database_utils.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +import os +import numpy as np +import faiss + + +def create_vector_database( + vectors: np.ndarray, + index_path: str, + overwrite: bool = False, + index_type: str = "Flat", + normalise_vecs: bool = True, +) -> None: + """Creates a FAISS Index using the factor constructor and the given + index type, and adds the given vector to the index, and then saves + it to disk using the given path. + + Args: + vectors (np.ndarray): vector of shape (n_vectors, vector_dim) + index_path (str): path to save the index + overwrite (bool, optional): Whether to replace an existing index + at the same filepath if it exists. Defaults to False. + index_type (str, optional): FAISS Index Factory string. Defaults to "IndexFlatIP". + normalise_vecs (bool, optional): Whether to normalise the vectors before + adding them to the Index. This converts the IP metric to Cosine Similarity. + Defaults to True. + """ + + if os.path.exists(index_path) and not overwrite: + raise FileExistsError( + f"Index already exists at {index_path}. Set overwrite=True to replace it." + ) + + index = faiss.index_factory( + vectors.shape[1], index_type, faiss.METRIC_INNER_PRODUCT + ) + if normalise_vecs: + faiss.normalize_L2(vectors) + index.add(vectors) + faiss.write_index(index, index_path) + + +def load_vector_database(index_path: str, use_gpu: bool = False) -> faiss.Index: + """Loads a FAISS Index from the given filepath + + Args: + index_path (str): path to the index file + use_gpu (bool, optional): Whether to load the index onto the GPU. + Defaults to False. + + Returns: + faiss.Index: FAISS Index object + """ + print("Loading index from", index_path) + index = faiss.read_index(index_path) + print("Loaded index with ntotal:", index.ntotal) + if use_gpu: + index = faiss.index_cpu_to_all_gpus(index) + return index + + +def query_database( + index_path: str, + query_vectors: np.ndarray, + normalise: bool = True, + k: int = 1, +) -> tuple[np.ndarray, np.ndarray]: + """Queries the given index with the given query vectors + + Args: + index_path (str): path to the index file + query_vectors (np.ndarray): query vectors of shape (n_queries, vector_dim) + normalise (bool, optional): Whether to normalise the query vectors. + Defaults to True. + k (int, optional): Number of nearest neighbours to return. Defaults to 1. + + Returns: + tuple[np.ndarray, np.ndarray]: (distances, indices) of the nearest neighbours + each of shape (n_queries, n_neighbours) + """ + index = load_vector_database(index_path) + if normalise: + faiss.normalize_L2(query_vectors) + distances, indices = index.search(query_vectors, k) + return distances, indices diff --git a/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/get_sentence_embeddings.py b/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/get_sentence_embeddings.py new file mode 100644 index 000000000..e28189e5c --- /dev/null +++ b/common/vector_databases/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/get_sentence_embeddings.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +import torch +import numpy as np +from sentence_transformers import SentenceTransformer + +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" + + +def load_model(model_name: str = "all-MiniLM-L6-v2") -> SentenceTransformer: + """Loads the sentence transformer model + Args: + model_name (str): name of the model to load + Returns: + sentence_transformers.SentenceTransformer: the loaded model + """ + return SentenceTransformer(model_name, device=DEVICE) + + +def parse_txt_file(fp: str) -> list[str]: + """Parses a txt file into a list of strings, + where each element is a line in the txt file with the + newline char stripped. + Args: + fp (str): path to the txt file to load + Returns: + list[str]: list of strings where each element is a line in the txt file + """ + sentences = [] + with open(fp, "r", encoding="utf8") as src: + for line in src: + # Strip newline char. + sentences.append(line[:-1]) + return sentences + + +def get_sentence_embeddings( + sentence_list: list[str], model: SentenceTransformer +) -> np.ndarray: + """Converts the list of string sentences into an array of sentence + embeddings + Args: + sentece_list (list[str]): list of string sentences, where each + entry in the list is assumed to be a separate sentence + model (SentenceTransformer): model used to perform the embedding. + Assumes a method called encode that takes a list of strings + as input. + Returns: + np.ndarray: array of shape (n_commands, embedding_dim) + """ + + return model.encode( + sentence_list, + convert_to_numpy=True, + show_progress_bar=True, + batch_size=256, + device=DEVICE, + ) diff --git a/common/vector_databases/lasr_vector_databases_faiss/srv/TxtIndex.srv b/common/vector_databases/lasr_vector_databases_faiss/srv/TxtIndex.srv new file mode 100644 index 000000000..79ac01654 --- /dev/null +++ b/common/vector_databases/lasr_vector_databases_faiss/srv/TxtIndex.srv @@ -0,0 +1,7 @@ +# Path to input text file +string txt_path + +# Output path to save index +string index_path + +--- diff --git a/common/vector_databases/lasr_vector_databases_faiss/srv/TxtQuery.srv b/common/vector_databases/lasr_vector_databases_faiss/srv/TxtQuery.srv new file mode 100644 index 000000000..bbcb04613 --- /dev/null +++ b/common/vector_databases/lasr_vector_databases_faiss/srv/TxtQuery.srv @@ -0,0 +1,19 @@ +# Path to input text file +string txt_path + +# Path to index file to load +string index_path + +# Sentence to query index with +string query_sentence + +# Number of nearest sentences to return +uint8 k + +--- +# Nearest sentence +string[] closest_sentences + +# Cosine similarity of distances +float32[] cosine_similarities + diff --git a/common/vision/lasr_vision_clip/CMakeLists.txt b/common/vision/lasr_vision_clip/CMakeLists.txt index a13eb6f2a..c2ce23209 100644 --- a/common/vision/lasr_vision_clip/CMakeLists.txt +++ b/common/vision/lasr_vision_clip/CMakeLists.txt @@ -196,11 +196,10 @@ include_directories( # ) ## Mark other files for installation (e.g. launch and bag files, etc.) -# install(FILES -# # myfile1 -# # myfile2 -# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} -# ) +install(FILES + requirements.txt + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +) ############# ## Testing ## diff --git a/tasks/gpsr/data/Gestures.xml b/tasks/gpsr/data/Gestures.xml new file mode 100644 index 000000000..59617e994 --- /dev/null +++ b/tasks/gpsr/data/Gestures.xml @@ -0,0 +1,11 @@ + + + + + + + + + diff --git a/tasks/gpsr/data/Locations.xml b/tasks/gpsr/data/Locations.xml new file mode 100644 index 000000000..aafef1b99 --- /dev/null +++ b/tasks/gpsr/data/Locations.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tasks/gpsr/data/Names.xml b/tasks/gpsr/data/Names.xml new file mode 100644 index 000000000..cc53d5ea4 --- /dev/null +++ b/tasks/gpsr/data/Names.xml @@ -0,0 +1,58 @@ + + + + Adel + Angel + Axel + Charlie + Jane + Jules + Morgan + Paris + Robin + Simone + Adel + Angel + Axel + Charlie + James + Jules + Morgan + Paris + Robin + Simone + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tasks/gpsr/data/Objects.xml b/tasks/gpsr/data/Objects.xml new file mode 100644 index 000000000..aba1235bd --- /dev/null +++ b/tasks/gpsr/data/Objects.xml @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tasks/gpsr/data/Questions.xml b/tasks/gpsr/data/Questions.xml new file mode 100644 index 000000000..1d5d42c83 --- /dev/null +++ b/tasks/gpsr/data/Questions.xml @@ -0,0 +1,160 @@ + + + + + + Do french like snails? + The French eat around 30,000 tons of snails a year. + + + + Would you mind me kissing you on a train? + I would. French law forbids couples from kissing on train platforms. + + + + Which French king ruled the least? + Louis XIX was the king of France for just 20 minutes, the shortest ever reign. + + + + What's the busiest train station in Europe? + Paris Gare du Nord is Europe's busiest railway station. + + + + Which is the highest mountain in Europe? + The highest mountain in Europe is Mont Blank in the French Alps. + + + + Which bread is most french, a croissant or a baguette? + The croissant was actually invented in Austria in the 13th century. + + + + Which is the most visited museum in the wrorld? + The Louvre is the most visited museum in the world. + + + + What's France's cheese production? + France produces around 1.7 million tons of cheese a year in around 1,600 varieties + + + + Which 21-stage, 23-day, 2,200-mile men's bike race is held each summer and ends at the Champs-Élysées? + That would be the Tour de France. + + + + France shares a land border with what country that also immediately follows it on an alphabetical list of the English names of E.U. nations? + I'm sure you're talking about Germany. + + + + What colour features in the national flags of all the countries that border mainland France? + Belgium, Luxemburg, Germany, Switzerland, Italy, and Spain, all have the red color in their flags. + + + + What is Vincenzo Peruggia famous for? + Vincenzo Peruggia is infamous for stealing the Mona Lisa in 1911. + + + + Which airport is the biggest and busiest in France? + The Charles de Gaulle Airport. + + + + Lyon, France is home to what border-spanning law enforcement agency? + Lyon, France is home to the Interpol. + + + + What metallic element gets its name from France's old Latin name? + The gallium element got its name from France's old Latin name + + + + Which major public square is located at the eastern end of the Champs-Elysees + The Place De La Concorde + + + + Which are the five countries that are represented at every modern Olympics since its beginning. + Australia, France, Great Britain, Greece, and Switzerland. + + + + What did Napoleon said in the Waterloo battle? + I surrender. + + + + In what city is the European Disney theme park located? + The European Disney theme park is located in Paris. + + + + How big is a nanobot? + A nanobot is 50-100nm wide. + + + + Why most computerized robot voices tend to be female? + One of the reasons is that females traditionally are lovely and caretaking. + + + + Who is the world's most expensive robot? + Honda's Asimo is the most expensive robot, costing circa $2.5 million USD. + + + + What is the main source of inspiration in robotics. + Nature, contributing to the field of bio-inspired robotics. + + + + Who crafted the word Robot? + The czech writer Karel Čapek in his 1920's play Rossum's Universal Robots + + + + What does the word Robot mean? + Labor or work. That would make me a servant. + + + + Who formulated the principles of Cybernetics in 1948. + Norbert Wiener formulated the principles of Cybernetics in 1948. + + + + Do you like super-hero movies? + Yes, I do. Zack Snyder's are the best and my favorite character is Cyborg. + + + + What did Nikola Tesla demonstrate in 1898? + In 1898, Nikola Tesla demonstrated the first radio-controlled vessel. + + + + What was developed in 1978? + The first object-level robot programming language. + + + + What is the shortest path to the Dark Side? + My A-star algorithm indicates the answer is Fear. Fear leads to anger, anger leads to hate, and hate leads to suffering. + + + + diff --git a/tasks/gpsr/scripts/parse_gpsr_xmls.py b/tasks/gpsr/scripts/parse_gpsr_xmls.py new file mode 100644 index 000000000..e85fdf706 --- /dev/null +++ b/tasks/gpsr/scripts/parse_gpsr_xmls.py @@ -0,0 +1,27 @@ +import xml.etree.ElementTree as ET + + +def parse_question_xml(xml_file_path: str) -> dict: + """Parses the GPSR Q/A xml file and returns a dictionary + consisting of two lists, one for questions and one for answers, + where the index of each question corresponds to the index of its + corresponding answer. + + Args: + xml_file_path (str): full path to xml file to parse + + Returns: + dict: dictionary with keys "questions" and "answers" + each of which is a list of strings. + """ + tree = ET.parse(xml_file_path) + root = tree.getroot() + parsed_questions = [] + parsed_answers = [] + for q_a in root: + question = q_a.find("q").text + answer = q_a.find("a").text + parsed_questions.append(question) + parsed_answers.append(answer) + + return {"questions": parsed_questions, "answers": parsed_answers}