Gpsr state machine factory (#247)

Co-authored-by: Siyao <[email protected]> Co-authored-by: m-barker <[email protected]> Co-authored-by: Haiwei L <[email protected]> Co-authored-by: fireblonde <[email protected]> Co-authored-by: Benteng Ma <[email protected]>
LASR-at-Home · Jul 23, 2024 · b55959e · b55959e
1 parent 64a7495
commit b55959e
Show file tree

Hide file tree

Showing 74 changed files with 6,492 additions and 1,255 deletions.
diff --git a/100644 b/100644
diff --git a/common/custom_worlds b/common/custom_worlds
diff --git a/common/helpers/navigation_helpers/src/navigation_helpers/__init__.py b/common/helpers/navigation_helpers/src/navigation_helpers/__init__.py
@@ -1,9 +1,18 @@
+import rospy
+
+
 from geometry_msgs.msg import (
     Point,
     Pose,
+    PoseStamped,
+    Quaternion,
 )
+from nav_msgs.srv import GetPlan
+from nav_msgs.msg import Path
 
 import numpy as np
+import math
+from scipy.spatial.transform import Rotation as R
 from itertools import permutations
 
 from typing import Union, List
@@ -27,3 +36,49 @@ def min_hamiltonian_path(start: Pose, poses: List[Pose]) -> Union[None, List[Pos
             best_order = list(perm)
 
     return best_order
+
+
+def get_pose_on_path(
+    p1: PoseStamped, p2: PoseStamped, dist_to_goal: float = 1.0, tolerance: float = 0.5
+) -> Union[None, PoseStamped]:
+    make_plan: rospy.ServiceProxy = rospy.ServiceProxy("/move_base/make_plan", GetPlan)
+
+    chosen_pose: Union[None, PoseStamped] = None
+
+    rospy.loginfo(f"Getting plan from {p1} to {p2}.")
+
+    if p1.header.frame_id != p2.header.frame_id != "map":
+        rospy.loginfo(
+            f"Frames of reference are not 'map' ({p1.header.frame_id} and {p2.header.frame_id})."
+        )
+        return chosen_pose
+
+    try:
+        make_plan.wait_for_service(timeout=rospy.Duration.from_sec(10.0))
+    except rospy.ROSException:
+        rospy.loginfo("Service /move_base/make_plan not available.")
+        return chosen_pose
+
+    try:
+        plan: Path = make_plan(p1, p2, tolerance).plan
+    except rospy.ServiceException as e:
+        rospy.loginfo(e)
+        return chosen_pose
+
+    rospy.loginfo(f"Got plan with {len(plan.poses)} poses.")
+
+    if len(plan.poses) > 0:
+        for pose in reversed(plan.poses):
+            if euclidian_distance(pose.pose.position, p2.pose.position) >= dist_to_goal:
+                chosen_pose = pose
+                break
+
+    return chosen_pose
+
+
+def compute_face_quat(p1: Pose, p2: Pose) -> Quaternion:
+    dx: float = p2.position.x - p1.position.x
+    dy: float = p2.position.y - p1.position.y
+    theta_deg = np.degrees(math.atan2(dy, dx))
+    x, y, z, w = R.from_euler("z", theta_deg, degrees=True).as_quat()
+    return Quaternion(x, y, z, w)
diff --git a/common/helpers/numpy2message/src/__init__.py b/common/helpers/numpy2message/src/__init__.py
@@ -0,0 +1,18 @@
+import numpy as np
+
+
+def numpy2message(np_array: np.ndarray) -> list[bytes, list[int], str]:
+    data = np_array.tobytes()
+    shape = list(np_array.shape)
+    dtype = str(np_array.dtype)
+    return data, shape, dtype
+
+
+def message2numpy(data: bytes, shape: list[int], dtype: str) -> np.ndarray:
+    array_shape = tuple(shape)
+    array_dtype = np.dtype(dtype)
+
+    deserialized_array = np.frombuffer(data, dtype=array_dtype)
+    deserialized_array = deserialized_array.reshape(array_shape)
+
+    return deserialized_array
diff --git a/common/speech/lasr_speech_recognition_whisper/nodes/transcribe_microphone_server b/common/speech/lasr_speech_recognition_whisper/nodes/transcribe_microphone_server
@@ -87,6 +87,7 @@ class TranscribeSpeechAction(object):
         self._listening = False
 
         self._action_server.start()
+        rospy.loginfo(f"Speech Action server {self._action_name} started")
 
     def _configure_microphone(self) -> sr.Microphone:
         """Configures the microphone for listening to speech based on the
@@ -332,8 +333,8 @@ def configure_model_params(config: dict) -> speech_model_params:
         model_params.mic_device = config["mic_device"]
     if config["no_warmup"]:
         model_params.warmup = False
-    if config["energy_threshold"]:
-        model_params.energy_threshold = config["energy_threshold"]
+    # if config["energy_threshold"]:
+    #     model_params.energy_threshold = config["energy_threshold"]
     if config["pause_threshold"]:
         model_params.pause_threshold = config["pause_threshold"]
 

diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py b/common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py
@@ -28,6 +28,7 @@ def main():
     args = parse_args()
 
     recognizer = sr.Recognizer()
+    recognizer.pause_threshold = 2
     microphone = sr.Microphone(device_index=args["device_index"], sample_rate=16000)
     threshold = 100
     recognizer.dynamic_energy_threshold = False
@@ -39,7 +40,9 @@ def main():
     while transcription_result != "":
         print(f"Listening...")
         with microphone as source:
-            wav_data = recognizer.listen(source).get_wav_data()
+            wav_data = recognizer.listen(
+                source, phrase_time_limit=10, timeout=5
+            ).get_wav_data()
         print(f"Processing...")
         # Magic number 32768.0 is the maximum value of a 16-bit signed integer
         float_data = (

diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/repeat_after_me.py b/common/speech/lasr_speech_recognition_whisper/scripts/repeat_after_me.py
@@ -18,6 +18,7 @@
 
 if USE_ACTIONLIB:
     client = actionlib.SimpleActionClient("transcribe_speech", TranscribeSpeechAction)
+    rospy.loginfo("Waiting for server...")
     client.wait_for_server()
     repeating = False
     rospy.loginfo("Done waiting")

diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py b/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py
@@ -34,9 +34,10 @@ def main(args: dict) -> None:
     output_dir = args["output_dir"]
 
     r = sr.Recognizer()
-    with sr.Microphone(device_index=13, sample_rate=16000) as source:
+    r.pause_threshold = 2
+    with sr.Microphone(device_index=9, sample_rate=16000) as source:
         print("Say something!")
-        audio = r.listen(source, timeout=5, phrase_time_limit=5)
+        audio = r.listen(source, timeout=5, phrase_time_limit=10)
         print("Finished listening")
 
     with open(os.path.join(output_dir, "microphone.raw"), "wb") as f:

diff --git a/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_index_service b/common/vector_databases/lasr_vector_databases_faiss/nodes/txt_index_service
@@ -31,7 +31,7 @@ class TxtIndexService:
                 f"/tmp/xn.dat",
                 dtype="float32",
                 mode="w+",
-                shape=(11779430, 384),
+                shape=(8403420, 384),
             )
             for i, txt_fp in enumerate(txt_fps):
                 sentences_to_embed: List[str] = parse_txt_file(txt_fp)

diff --git a/...es/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/get_sentence_embeddings.py b/...es/lasr_vector_databases_faiss/src/lasr_vector_databases_faiss/get_sentence_embeddings.py
@@ -13,6 +13,7 @@ def load_model(model_name: str = "all-MiniLM-L6-v2") -> SentenceTransformer:
     Returns:
         sentence_transformers.SentenceTransformer: the loaded model
     """
+    print(f"Loading model...")
     return SentenceTransformer(model_name, device=DEVICE)
 
 

diff --git a/...sion/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py b/...sion/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py
@@ -171,10 +171,10 @@ def _3d_bbox_crop(
         )
         for det in detections
     ]
+
     if crop_method == "closest":
         detections = [det for _, det in sorted(zip(distances, detections))]
         distances.sort()
-
     elif crop_method == "furthest":
         detections = [
             det for _, det in sorted(zip(distances, detections), reverse=True)

diff --git a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/__init__.py b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/__init__.py
@@ -1,6 +1,5 @@
 import json
 from os import path
-
 import cv2
 import numpy as np
 import rospkg
@@ -20,6 +19,25 @@
 from lasr_vision_msgs.srv import Vqa, VqaRequest
 
 
+def gaussian_blur(image, kernel_size, rep=3):
+    """
+    Apply Gaussian blur to an RGB image.
+
+    Parameters:
+    image (numpy.ndarray): The input RGB image.
+    kernel_size (int): The size of the Gaussian kernel. If an even number is provided, it will be incremented to the next odd number.
+
+    Returns:
+    numpy.ndarray: The blurred RGB image.
+    """
+    if kernel_size % 2 == 0:
+        kernel_size += 1  # Increment kernel size to make it odd if it's even
+
+    for _ in range(rep):
+        image = cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)
+    return image
+
+
 def X2conv(in_channels, out_channels, inner_channels=None):
     inner_channels = out_channels // 2 if inner_channels is None else inner_channels
     down_conv = nn.Sequential(

diff --git a/..._feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py b/..._feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py
@@ -3,6 +3,120 @@
     CategoriesAndAttributes,
 )
 
+reference_colours = {
+    "blue_very_light": np.array([240, 248, 255]),  # Alice blue
+    "blue_light": np.array([173, 216, 230]),  # Light blue
+    "blue_sky": np.array([135, 206, 235]),  # Sky blue
+    "blue_powder": np.array([176, 224, 230]),  # Powder blue
+    "blue_celeste": np.array([178, 255, 255]),  # Celeste, very pale blue shade
+    "blue_periwinkle": np.array(
+        [204, 204, 255]
+    ),  # Periwinkle, a mix of light blue and lavender
+    "blue_cadet": np.array([95, 158, 160]),  # Cadet blue, a muted blue-green
+    "blue": np.array([0, 0, 255]),  # Standard blue
+    "blue_royal": np.array([65, 105, 225]),  # Royal blue
+    "blue_deep": np.array([0, 0, 139]),  # Deep blue
+    "blue_dark": np.array([0, 0, 128]),  # Dark blue
+    # "blue_navy": np.array([0, 0, 80]),             # Navy blue
+    "yellow_very_light": np.array([255, 255, 204]),  # Very light yellow
+    "yellow_light": np.array([255, 255, 224]),  # Light yellow
+    "yellow": np.array([255, 255, 0]),  # Standard yellow
+    "yellow_gold": np.array([255, 215, 0]),  # Gold yellow
+    "yellow_dark": np.array([204, 204, 0]),  # Dark yellow
+    "yellow_mustard": np.array([255, 219, 88]),  # Mustard yellow
+    "red_very_light": np.array([255, 204, 204]),  # Very light red
+    "red_light": np.array([255, 102, 102]),  # Light red
+    "red": np.array([255, 0, 0]),  # Standard red
+    "red_dark": np.array([139, 0, 0]),  # Dark red
+    "red_maroon": np.array([128, 0, 0]),  # Maroon
+    "orange_very_light": np.array([255, 229, 180]),  # Very light orange
+    "orange_light": np.array([255, 179, 71]),  # Light orange
+    "orange": np.array([255, 165, 0]),  # Standard orange
+    "orange_dark": np.array([255, 140, 0]),  # Dark orange
+    "orange_burnt": np.array([204, 85, 0]),  # Burnt orange
+    "black": np.array([30, 30, 30]),  # Black
+    "white": np.array([255, 255, 255]),  # White
+    "gray": np.array([160, 160, 160]),  # Gray
+}
+
+colour_group_map = {
+    "blue_very_light": "blue",
+    "blue_light": "blue",
+    "blue_sky": "blue",
+    "blue_powder": "blue",
+    "blue_celeste": "blue",
+    "blue_periwinkle": "blue",
+    "blue_cadet": "blue",
+    "blue": "blue",
+    "blue_royal": "blue",
+    "blue_deep": "blue",
+    "blue_dark": "blue",
+    "yellow_very_light": "yellow",
+    "yellow_light": "yellow",
+    "yellow": "yellow",
+    "yellow_gold": "yellow",
+    "yellow_dark": "yellow",
+    "yellow_mustard": "yellow",
+    "red_very_light": "red",
+    "red_light": "red",
+    "red": "red",
+    "red_dark": "red",
+    "red_maroon": "red",
+    "orange_very_light": "orange",
+    "orange_light": "orange",
+    "orange": "orange",
+    "orange_dark": "orange",
+    "orange_burnt": "orange",
+    "black": "black",
+    "white": "white",
+    "gray": "gray",
+}
+
+possible_colours = ["blue", "yellow", "red", "orange", "black", "white", "gray"]
+
+
+def estimate_colour(rgb_array):
+    # Calculate distances to each reference colour
+    # distances = {colour: cie76_distance(rgb_array, ref_rgb) for colour, ref_rgb in reference_colours.items()}
+    distances = {
+        colour: np.linalg.norm(rgb_array - ref_rgb)
+        for colour, ref_rgb in reference_colours.items()
+    }
+
+    # Find the colour with the smallest distance
+    estimated_colour = min(distances, key=distances.get)
+
+    return estimated_colour
+
+
+def split_and_sample_colours(image, mask, square_size):
+    height, width, _ = image.shape
+    squares_colours = {}
+    valid_squares = set()
+
+    square_index = 0
+
+    for y in range(0, height, square_size):
+        for x in range(0, width, square_size):
+            square = image[y : y + square_size, x : x + square_size]
+            mask_square = mask[y : y + square_size, x : x + square_size]
+
+            # Calculate the average colour
+            average_colour = square.mean(axis=(0, 1))
+
+            # Save the average colour in the dictionary
+            squares_colours[square_index] = estimate_colour(average_colour)
+            # squares_colours[square_index] = average_colour  # estimate_colour(average_colour)
+
+            # Check the mask condition
+            a = np.sum(mask_square)
+            if np.sum(mask_square) > 0.5 * square_size * square_size:
+                valid_squares.add(square_index)
+
+            square_index += 1
+
+    return squares_colours, valid_squares
+
 
 def _softmax(x: list[float]) -> list[float]:
     """Compute softmax values for each set of scores in x without using NumPy."""
@@ -91,6 +205,8 @@ def from_parent_instance(
         )
 
     def describe(self) -> dict:
+        # Maybe not keep ‘sleeveless’ anymore but might do this in the actual environment.
+
         result = {
             "top": self.attributes["top"] / 2,
             "outwear": self.attributes["outwear"] / 2,
@@ -133,4 +249,38 @@ def describe(self) -> dict:
             max_attribute = "sleeveless dress"
         result["max_dress"] = max_attribute
 
+        # QUICK FIX that won't break the code but not returning dress anymore.
+        result["dress"] = 0.0
+
+        # ========= colour estimation below: =========
+        # blurred_imagge kept here so that we can quickly make it work if we need.
+        # blurred_image = gaussian_blur(self.image, kernel_size=13, rep=3)
+        blurred_image = self.image
+        for cloth in ["top", "outwear", "dress"]:  #  "down",
+            mask = self.masks[cloth]
+            squares_colours, valid_squares = split_and_sample_colours(
+                blurred_image, mask, 20
+            )
+            _squares_colours = {}
+            for k in squares_colours.keys():
+                if k in valid_squares:
+                    _squares_colours[k] = squares_colours[k]
+            squares_colours = {
+                k: colour_group_map[colour] for k, colour in _squares_colours.items()
+            }
+            squares_colours_count = {}
+            for k, colour in squares_colours.items():
+                if colour not in squares_colours_count.keys():
+                    squares_colours_count[colour] = 1
+                else:
+                    squares_colours_count[colour] += 1
+            print(squares_colours_count)
+            tag = cloth + " colour"
+            result[tag] = {}
+            for k in possible_colours:
+                if k in squares_colours_count.keys():
+                    result[tag][k] = squares_colours_count[k] / len(squares_colours)
+                else:
+                    result[tag][k] = 0.0
+
         return result