From 23e004faa0a07326faddeb34e5cd673aec63f60c Mon Sep 17 00:00:00 2001
From: m-barker <mattbarker322@gmail.com>
Date: Tue, 23 Apr 2024 11:02:08 +0100
Subject: [PATCH 1/5] fix: incorrect folder for command similarity states

---
 tasks/gpsr/{ => src/gpsr}/states/command_similarity_matcher.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tasks/gpsr/{ => src/gpsr}/states/command_similarity_matcher.py (100%)

diff --git a/tasks/gpsr/states/command_similarity_matcher.py b/tasks/gpsr/src/gpsr/states/command_similarity_matcher.py
similarity index 100%
rename from tasks/gpsr/states/command_similarity_matcher.py
rename to tasks/gpsr/src/gpsr/states/command_similarity_matcher.py

From 874067339f0bb4dd1b19dc1d0ed2fdf18b074caf Mon Sep 17 00:00:00 2001
From: m-barker <mattbarker322@gmail.com>
Date: Tue, 23 Apr 2024 11:23:50 +0100
Subject: [PATCH 2/5] feat: add command parser state machine

---
 tasks/gpsr/scripts/main.py                   |  0
 tasks/gpsr/src/gpsr/states/__init__.py       |  2 +
 tasks/gpsr/src/gpsr/states/command_parser.py | 84 ++++++++++++++++++++
 3 files changed, 86 insertions(+)
 create mode 100644 tasks/gpsr/scripts/main.py
 create mode 100644 tasks/gpsr/src/gpsr/states/command_parser.py

diff --git a/tasks/gpsr/scripts/main.py b/tasks/gpsr/scripts/main.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tasks/gpsr/src/gpsr/states/__init__.py b/tasks/gpsr/src/gpsr/states/__init__.py
index 74a382e32..fcf034f64 100644
--- a/tasks/gpsr/src/gpsr/states/__init__.py
+++ b/tasks/gpsr/src/gpsr/states/__init__.py
@@ -1 +1,3 @@
 from .talk import Talk
+from .command_parser import ParseCommand, CommandParserStateMachine
+from .command_similarity_matcher import CommandSimilarityMatcher
diff --git a/tasks/gpsr/src/gpsr/states/command_parser.py b/tasks/gpsr/src/gpsr/states/command_parser.py
new file mode 100644
index 000000000..60dfb510d
--- /dev/null
+++ b/tasks/gpsr/src/gpsr/states/command_parser.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+import argparse
+import smach
+import rospy
+
+from gpsr.load_known_data import GPSRDataLoader
+from gpsr.regex_command_parser import Configuration, gpsr_compile_and_parse
+from gpsr.states import CommandSimilarityMatcher
+from lasr_skills import AskAndListen, Say
+
+
+class ParseCommand(smach.State):
+    def __init__(self, data_config: Configuration):
+        """Takes in a string containing the command and runs the command parser
+        that outputs a dictionary of parameters for the command.
+
+        Args:
+            data_config (Configuration): Configuration object containing the regex patterns
+        """
+        smach.State.__init__(
+            self,
+            outcomes=["succeeded", "failed"],
+            input_keys=["raw_command"],
+            output_keys=["parsed_command"],
+        )
+        self.data_config = data_config
+
+    def execute(self, userdata):
+        rospy.loginfo(f"Received command : {userdata.raw_command.lower()}")
+        try:
+            userdata.parsed_command = gpsr_compile_and_parse(
+                self.data_config, userdata.transcribed_speech.lower()
+            )
+        except Exception as e:
+            rospy.logerr(e)
+            return "failed"
+        return "succeeded"
+
+
+class CommandParserStateMachine(smach.StateMachine):
+    def __init__(
+        self,
+        data_config: Configuration,
+        n_vecs_per_txt_file: int = 1177943,
+        total_txt_files: int = 10,
+    ):
+        """State machine that takes in a command, matches it to a known command, and
+        outputs the parsed command.
+
+        Args:
+            data_config (Configuration): Configuration object containing the regex patterns
+            n_vecs_per_txt_file (int, optional): number of vectors in each gpsr txt
+            file. Defaults to 100.
+            total_txt_files (int, optional): total number of gpsr txt files. Defaults to 10.
+        """
+        smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
+
+        with self:
+            smach.StateMachine.add(
+                "ASK_FOR_COMMAND",
+                AskAndListen(),
+                transitions={"succeeded": "PARSE_COMMAND", "failed": "failed"},
+                remapping={"transcribed_speech": "raw_command"},
+            )
+
+            smach.StateMachine.add(
+                "PARSE_COMMAND",
+                ParseCommand(data_config),
+                transitions={
+                    "succeeded": "succeeded",
+                    "failed": "COMMAND_SIMILARITY_MATCHER",
+                },
+                remapping={"parsed_command": "parsed_command"},
+            )
+
+            smach.StateMachine.add(
+                "COMMAND_SIMILARITY_MATCHER",
+                CommandSimilarityMatcher([n_vecs_per_txt_file] * total_txt_files),
+                transitions={"succeeded": "PARSE_COMMAND", "failed": "failed"},
+                remapping={
+                    "command": "parsed_command",
+                    "matched_command": "matched_command",
+                },
+            )

From 6f822bf159fc58617f29633e334f3da5ef962605 Mon Sep 17 00:00:00 2001
From: m-barker <mattbarker322@gmail.com>
Date: Tue, 23 Apr 2024 11:48:08 +0100
Subject: [PATCH 3/5] feat: working command parser sm

---
 tasks/gpsr/CMakeLists.txt                    |  1 +
 tasks/gpsr/scripts/main.py                   | 41 ++++++++++++++++++++
 tasks/gpsr/src/gpsr/states/__init__.py       |  2 +-
 tasks/gpsr/src/gpsr/states/command_parser.py | 12 +++---
 4 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/tasks/gpsr/CMakeLists.txt b/tasks/gpsr/CMakeLists.txt
index af1f6c908..a1c9b8bc6 100644
--- a/tasks/gpsr/CMakeLists.txt
+++ b/tasks/gpsr/CMakeLists.txt
@@ -155,6 +155,7 @@ include_directories(
 ## in contrast to setup.py, you can choose the destination
 catkin_install_python(PROGRAMS
   scripts/parse_gpsr_xmls.py
+  scripts/main.py
   nodes/commands/question_answer
   nodes/command_parser
   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
diff --git a/tasks/gpsr/scripts/main.py b/tasks/gpsr/scripts/main.py
index e69de29bb..d4d44bda3 100644
--- a/tasks/gpsr/scripts/main.py
+++ b/tasks/gpsr/scripts/main.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+import smach
+import rospy
+import sys
+from typing import Dict
+from gpsr.load_known_data import GPSRDataLoader
+from gpsr.regex_command_parser import Configuration
+from gpsr.states import CommandParserStateMachine
+
+
+def load_gpsr_configuration() -> Configuration:
+    gpsr_data_dir = sys.argv[1]
+    """Loads the configuration for the GPSR command parser"""
+    data_loader = GPSRDataLoader(data_dir=gpsr_data_dir)
+    gpsr_known_data: Dict = data_loader.load_data()
+    config = Configuration(
+        {
+            "person_names": gpsr_known_data["names"],
+            "location_names": gpsr_known_data["non_placeable_locations"],
+            "placement_location_names": gpsr_known_data["placeable_locations"],
+            "room_names": gpsr_known_data["rooms"],
+            "object_names": gpsr_known_data["objects"],
+            "object_categories_plural": gpsr_known_data["categories_plural"],
+            "object_categories_singular": gpsr_known_data["categories_singular"],
+        }
+    )
+    return config
+
+
+def main():
+    config = load_gpsr_configuration()
+    command_parser_sm = CommandParserStateMachine(data_config=config)
+    command_parser_sm.execute()
+    parsed_command: Dict = command_parser_sm.userdata.parsed_command
+    rospy.loginfo(f"Parsed command: {parsed_command}")
+
+
+if __name__ == "__main__":
+    rospy.init_node("gpsr_main")
+    main()
+    rospy.spin()
diff --git a/tasks/gpsr/src/gpsr/states/__init__.py b/tasks/gpsr/src/gpsr/states/__init__.py
index fcf034f64..84504f968 100644
--- a/tasks/gpsr/src/gpsr/states/__init__.py
+++ b/tasks/gpsr/src/gpsr/states/__init__.py
@@ -1,3 +1,3 @@
 from .talk import Talk
-from .command_parser import ParseCommand, CommandParserStateMachine
 from .command_similarity_matcher import CommandSimilarityMatcher
+from .command_parser import ParseCommand, CommandParserStateMachine
diff --git a/tasks/gpsr/src/gpsr/states/command_parser.py b/tasks/gpsr/src/gpsr/states/command_parser.py
index 60dfb510d..f0345bdcb 100644
--- a/tasks/gpsr/src/gpsr/states/command_parser.py
+++ b/tasks/gpsr/src/gpsr/states/command_parser.py
@@ -1,12 +1,10 @@
 #!/usr/bin/env python3
-import argparse
 import smach
 import rospy
 
-from gpsr.load_known_data import GPSRDataLoader
 from gpsr.regex_command_parser import Configuration, gpsr_compile_and_parse
 from gpsr.states import CommandSimilarityMatcher
-from lasr_skills import AskAndListen, Say
+from lasr_skills import AskAndListen
 
 
 class ParseCommand(smach.State):
@@ -29,7 +27,7 @@ def execute(self, userdata):
         rospy.loginfo(f"Received command : {userdata.raw_command.lower()}")
         try:
             userdata.parsed_command = gpsr_compile_and_parse(
-                self.data_config, userdata.transcribed_speech.lower()
+                self.data_config, userdata.raw_command.lower()
             )
         except Exception as e:
             rospy.logerr(e)
@@ -58,7 +56,7 @@ def __init__(
         with self:
             smach.StateMachine.add(
                 "ASK_FOR_COMMAND",
-                AskAndListen(),
+                AskAndListen(tts_phrase="Hello, please tell me your command."),
                 transitions={"succeeded": "PARSE_COMMAND", "failed": "failed"},
                 remapping={"transcribed_speech": "raw_command"},
             )
@@ -78,7 +76,7 @@ def __init__(
                 CommandSimilarityMatcher([n_vecs_per_txt_file] * total_txt_files),
                 transitions={"succeeded": "PARSE_COMMAND", "failed": "failed"},
                 remapping={
-                    "command": "parsed_command",
-                    "matched_command": "matched_command",
+                    "command": "raw_command",
+                    "matched_command": "raw_command",
                 },
             )

From 64a1b4bbe7d67e11b832409f32e4898b1e140221 Mon Sep 17 00:00:00 2001
From: m-barker <mattbarker322@gmail.com>
Date: Tue, 23 Apr 2024 13:54:38 +0100
Subject: [PATCH 4/5] feat: initial state machine factory

---
 tasks/gpsr/data/mock_data/names.json         |  2 +-
 tasks/gpsr/scripts/main.py                   |  3 +
 tasks/gpsr/src/gpsr/state_machine_factory.py | 76 ++++++++++++++++++++
 tasks/gpsr/src/gpsr/states/talk.py           | 14 ++--
 4 files changed, 87 insertions(+), 8 deletions(-)
 create mode 100644 tasks/gpsr/src/gpsr/state_machine_factory.py

diff --git a/tasks/gpsr/data/mock_data/names.json b/tasks/gpsr/data/mock_data/names.json
index 7dbbe563f..0882a43c4 100644
--- a/tasks/gpsr/data/mock_data/names.json
+++ b/tasks/gpsr/data/mock_data/names.json
@@ -4,7 +4,7 @@
         "angel",
         "axel",
         "charlie",
-        "janes",
+        "jane",
         "jules",
         "morgan",
         "paris",
diff --git a/tasks/gpsr/scripts/main.py b/tasks/gpsr/scripts/main.py
index d4d44bda3..4c0e15f39 100644
--- a/tasks/gpsr/scripts/main.py
+++ b/tasks/gpsr/scripts/main.py
@@ -4,6 +4,7 @@
 import sys
 from typing import Dict
 from gpsr.load_known_data import GPSRDataLoader
+from gpsr.state_machine_factory import build_state_machine
 from gpsr.regex_command_parser import Configuration
 from gpsr.states import CommandParserStateMachine
 
@@ -33,6 +34,8 @@ def main():
     command_parser_sm.execute()
     parsed_command: Dict = command_parser_sm.userdata.parsed_command
     rospy.loginfo(f"Parsed command: {parsed_command}")
+    sm = build_state_machine(parsed_command)
+    sm.execute()
 
 
 if __name__ == "__main__":
diff --git a/tasks/gpsr/src/gpsr/state_machine_factory.py b/tasks/gpsr/src/gpsr/state_machine_factory.py
new file mode 100644
index 000000000..1618aa415
--- /dev/null
+++ b/tasks/gpsr/src/gpsr/state_machine_factory.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+import rospy
+import smach
+from smach_ros import ServiceState
+from typing import Dict, List
+from lasr_skills import GoToLocation, FindNamedPerson
+from gpsr.states import Talk
+
+STATE_COUNT = 0
+
+
+def increment_state_count() -> int:
+    global STATE_COUNT
+    STATE_COUNT += 1
+    return STATE_COUNT
+
+
+def build_state_machine(parsed_command: Dict) -> smach.StateMachine:
+    """Constructs the parameterized state machine for the GPSR task,
+    given the parsed command.
+
+    Args:
+        parsed_command (Dict): parsed command.
+
+    Returns:
+        smach.StateMachine: paramaterized state machine ready to be executed.
+    """
+    command_verbs: List[str] = parsed_command["commands"]
+    command_params: List[Dict] = parsed_command["params"]
+    sm = smach.StateMachine(outcomes=["succeeded", "failed"])
+    with sm:
+        for command_verb, command_param in zip(command_verbs, command_params):
+            if command_verb == "greet":
+                if "name" in command_param:
+                    location_param = (
+                        f"/gpsr/arena/rooms/{command_param['location']}/pose"
+                    )
+                    sm.add(
+                        f"STATE_{increment_state_count()}",
+                        GoToLocation(location_param=location_param),
+                        transitions={
+                            "succeeded": f"STATE_{STATE_COUNT + 1}",
+                            "failed": "failed",
+                        },
+                    )
+                    sm.add(
+                        f"STATE_{increment_state_count()}",
+                        FindNamedPerson(
+                            name=command_param["name"], location_param=location_param
+                        ),
+                        transitions={
+                            "succeeded": f"STATE_{STATE_COUNT + 1}",
+                            "failed": "failed",
+                        },
+                    )
+                elif "clothes" in command_param:
+                    pass
+                else:
+                    raise ValueError(
+                        "Greet command received with no name or clothes in command parameters"
+                    )
+            elif command_verb == "talk":
+                if "gesture" in command_param:
+                    pass
+                elif "talk" in command_param:
+                    sm.add(
+                        f"STATE_{increment_state_count()}",
+                        Talk(command_param["talk"]),
+                        transitions={"succeeded": "succeded", "failed": "failed"},
+                    )
+                else:
+                    raise ValueError(
+                        "Talk command received with no gesture or talk in command parameters"
+                    )
+
+    return sm
diff --git a/tasks/gpsr/src/gpsr/states/talk.py b/tasks/gpsr/src/gpsr/states/talk.py
index ea22ff951..29204c573 100644
--- a/tasks/gpsr/src/gpsr/states/talk.py
+++ b/tasks/gpsr/src/gpsr/states/talk.py
@@ -8,13 +8,13 @@
 # In future we might want to add looking at person talking to the state machine.
 class Talk(smach.StateMachine):
     class GenerateResponse(smach.State):
-        def __init__(self):
+        def __init__(self, talk_phrase: str):
             smach.State.__init__(
                 self,
                 outcomes=["succeeded", "failed"],
-                input_keys=["talk_phrase"],
                 output_keys=["response"],
             )
+            self._talk_phrase = talk_phrase
 
         def _create_responses(self) -> Dict[str, str]:
             response = {}
@@ -43,23 +43,23 @@ def _create_responses(self) -> Dict[str, str]:
 
         def execute(self, userdata):
             try:
-                userdata.response = self._create_responses()[userdata.talk_phrase]
+                userdata.response = self._create_responses()[self._talk_phrase]
             except KeyError:
                 rospy.loginfo(
-                    f"Failed to generate response for {userdata.talk_phrase} as it is not in the list of possible questions."
+                    f"Failed to generate response for {self._talk_phrase} as it is not in the list of possible questions."
                 )
                 return "failed"
             return "succeeded"
 
-    def __init__(self):
+    def __init__(self, talk_phrase: str):
         smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
 
         with self:
             smach.StateMachine.add(
                 "GENERATE_RESPONSE",
-                self.GenerateResponse(),
+                self.GenerateResponse(talk_phrase),
                 transitions={"succeeded": "SAY_RESPONSE", "failed": "failed"},
-                remapping={"talk_phrase": "talk_phrase", "response": "response"},
+                remapping={"response": "response"},
             )
 
             smach.StateMachine.add(

From aa5dc3ed8a2da35e6a13bee4c4de00295ede6355 Mon Sep 17 00:00:00 2001
From: Jared Swift <j.w.swift@outlook.com>
Date: Fri, 5 Jul 2024 03:06:49 +0100
Subject: [PATCH 5/5] Receptionist polishing (#236)

Co-authored-by: Matt <mattbarker322@gmail.com>
Co-authored-by: fireblonde <nicollehchevska@gmail.com>
Co-authored-by: Haiwei L <haiwei8809@gmail.com>
---
 .../src/numpy2message/__init__.py             |   3 +-
 common/helpers/tf_pcl/CMakeLists.txt          | 202 ++++
 common/helpers/tf_pcl/package.xml             |  65 ++
 common/helpers/tf_pcl/setup.py                |   8 +
 common/helpers/tf_pcl/src/tf_pcl/__init__.py  |  59 ++
 .../scripts/microphone_tuning_test.py         |   0
 .../scripts/test_microphones.py               |   2 +-
 .../src/lasr_vision_bodypix/bodypix.py        |  31 +-
 common/vision/lasr_vision_clip/CMakeLists.txt |   5 +-
 .../examples/encode_image_example.py          |  25 +
 .../examples/test_person_detector.py          | 107 +++
 .../lasr_vision_clip/nodes/img_encoder.py     |  47 +
 .../lasr_vision_clip/nodes/learn_face.py      |   8 +
 .../vision/lasr_vision_clip/requirements.in   |   4 +-
 .../vision/lasr_vision_clip/requirements.txt  |  51 +-
 .../src/lasr_vision_clip/__init__.py          |   3 +-
 .../src/lasr_vision_clip/clip_utils.py        |  34 +-
 .../src/lasr_vision_clip/learn_face.py        | 112 +++
 .../examples/request.py                       |   4 +-
 ...{setup.launch => cropped_detection.launch} |   4 +-
 .../cropped_detection.py                      |  92 +-
 .../lasr_vision_deepface/launch/camera.launch |   4 +-
 .../launch/service.launch                     |   4 +-
 .../vision/lasr_vision_deepface/nodes/service |  85 +-
 .../src/lasr_vision_deepface/deepface.py      |  71 +-
 common/vision/lasr_vision_msgs/CMakeLists.txt |   4 +
 .../msg/BodyPixKeypointNormalized.msg         |   8 +
 .../lasr_vision_msgs/msg/CDResponse.msg       |   2 +-
 .../srv/BodyPixKeypointDetection.srv          |   6 +
 .../lasr_vision_msgs/srv/ClipImageEncoder.srv |   8 +
 .../lasr_vision_msgs/srv/ClipLearnFace.srv    |   7 +
 .../srv/ClipRecogniseFace.srv                 |  13 +
 .../vision/lasr_vision_msgs/srv/LearnFace.srv |   4 +-
 .../src/lasr_vision_yolov8/yolo.py            |   3 +
 skills/CMakeLists.txt                         |   1 +
 skills/config/motions.yaml                    |  64 +-
 .../launch/unit_test_describe_people.launch   |   6 +-
 skills/scripts/test_learn_face.py             |  22 -
 skills/scripts/unit_test_adjust_camera.py     |  18 +
 skills/src/lasr_skills/__init__.py            |   3 +-
 skills/src/lasr_skills/adjust_camera.py       | 445 +++++++++
 skills/src/lasr_skills/describe_people.py     |  62 +-
 skills/src/lasr_skills/detect_3d_in_area.py   |   9 +-
 skills/src/lasr_skills/detect_gesture.py      |   1 +
 skills/src/lasr_skills/learn_face.py          |  26 -
 skills/src/lasr_skills/look_at_person.py      |  20 +-
 skills/src/lasr_skills/look_to_given_point.py |  58 --
 skills/src/lasr_skills/look_to_point.py       |  26 +-
 skills/src/lasr_skills/validate_keypoints.py  |   1 -
 .../lasr_skills/vision/get_cropped_image.py   | 382 +-------
 tasks/receptionist/config/lab.yaml            |  64 +-
 tasks/receptionist/config/motions.yaml        |  10 +
 tasks/receptionist/launch/setup.launch        |   1 +
 tasks/receptionist/scripts/main.py            |  79 +-
 tasks/receptionist/scripts/test_seat_guest.py |  36 +
 .../src/receptionist/state_machine.py         | 868 +++---------------
 .../src/receptionist/states/__init__.py       |   8 +-
 .../src/receptionist/states/check_sofa.py     |  75 ++
 .../receptionist/states/find_and_look_at.py   | 493 +++++-----
 .../receptionist/states/get_name_or_drink.py  |   1 -
 .../src/receptionist/states/handle_guest.py   | 367 ++++++++
 .../src/receptionist/states/introduce.py      |   5 +-
 .../states/introduce_and_seat_guest.py        | 402 ++++++++
 .../receptionist/states/pointcloud_sweep.py   | 145 +++
 .../states/receptionist_learn_face.py         |  76 +-
 .../receptionist/states/recognise_people.py   |  36 +
 .../states/run_and_process_detections.py      | 348 +++++++
 .../src/receptionist/states/seat_guest.py     | 110 +--
 68 files changed, 3593 insertions(+), 1760 deletions(-)
 create mode 100644 common/helpers/tf_pcl/CMakeLists.txt
 create mode 100644 common/helpers/tf_pcl/package.xml
 create mode 100644 common/helpers/tf_pcl/setup.py
 create mode 100644 common/helpers/tf_pcl/src/tf_pcl/__init__.py
 mode change 100755 => 100644 common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py
 create mode 100644 common/vision/lasr_vision_clip/examples/encode_image_example.py
 create mode 100755 common/vision/lasr_vision_clip/examples/test_person_detector.py
 create mode 100644 common/vision/lasr_vision_clip/nodes/img_encoder.py
 create mode 100644 common/vision/lasr_vision_clip/nodes/learn_face.py
 create mode 100644 common/vision/lasr_vision_clip/src/lasr_vision_clip/learn_face.py
 rename common/vision/lasr_vision_cropped_detection/launch/{setup.launch => cropped_detection.launch} (66%)
 create mode 100644 common/vision/lasr_vision_msgs/msg/BodyPixKeypointNormalized.msg
 create mode 100644 common/vision/lasr_vision_msgs/srv/ClipImageEncoder.srv
 create mode 100644 common/vision/lasr_vision_msgs/srv/ClipLearnFace.srv
 create mode 100644 common/vision/lasr_vision_msgs/srv/ClipRecogniseFace.srv
 delete mode 100755 skills/scripts/test_learn_face.py
 create mode 100644 skills/scripts/unit_test_adjust_camera.py
 create mode 100644 skills/src/lasr_skills/adjust_camera.py
 delete mode 100755 skills/src/lasr_skills/learn_face.py
 delete mode 100755 skills/src/lasr_skills/look_to_given_point.py
 create mode 100755 tasks/receptionist/scripts/test_seat_guest.py
 create mode 100644 tasks/receptionist/src/receptionist/states/check_sofa.py
 create mode 100644 tasks/receptionist/src/receptionist/states/handle_guest.py
 create mode 100644 tasks/receptionist/src/receptionist/states/introduce_and_seat_guest.py
 create mode 100755 tasks/receptionist/src/receptionist/states/pointcloud_sweep.py
 create mode 100644 tasks/receptionist/src/receptionist/states/recognise_people.py
 create mode 100755 tasks/receptionist/src/receptionist/states/run_and_process_detections.py

diff --git a/common/helpers/numpy2message/src/numpy2message/__init__.py b/common/helpers/numpy2message/src/numpy2message/__init__.py
index 328194092..8696e7247 100644
--- a/common/helpers/numpy2message/src/numpy2message/__init__.py
+++ b/common/helpers/numpy2message/src/numpy2message/__init__.py
@@ -1,7 +1,8 @@
+from typing import Tuple
 import numpy as np
 
 
-def numpy2message(np_array: np.ndarray) -> list:
+def numpy2message(np_array: np.ndarray) -> Tuple:
     data = np_array.tobytes()
     shape = list(np_array.shape)
     dtype = str(np_array.dtype)
diff --git a/common/helpers/tf_pcl/CMakeLists.txt b/common/helpers/tf_pcl/CMakeLists.txt
new file mode 100644
index 000000000..bc77c47f1
--- /dev/null
+++ b/common/helpers/tf_pcl/CMakeLists.txt
@@ -0,0 +1,202 @@
+cmake_minimum_required(VERSION 3.0.2)
+project(tf_pcl)
+
+## Compile as C++11, supported in ROS Kinetic and newer
+# add_compile_options(-std=c++11)
+
+## Find catkin macros and libraries
+## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
+## is used, also find other catkin packages
+find_package(catkin REQUIRED)
+
+## System dependencies are found with CMake's conventions
+# find_package(Boost REQUIRED COMPONENTS system)
+
+
+## Uncomment this if the package has a setup.py. This macro ensures
+## modules and global scripts declared therein get installed
+## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
+catkin_python_setup()
+
+################################################
+## Declare ROS messages, services and actions ##
+################################################
+
+## To declare and build messages, services or actions from within this
+## package, follow these steps:
+## * Let MSG_DEP_SET be the set of packages whose message types you use in
+##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
+## * In the file package.xml:
+##   * add a build_depend tag for "message_generation"
+##   * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
+##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in
+##     but can be declared for certainty nonetheless:
+##     * add a exec_depend tag for "message_runtime"
+## * In this file (CMakeLists.txt):
+##   * add "message_generation" and every package in MSG_DEP_SET to
+##     find_package(catkin REQUIRED COMPONENTS ...)
+##   * add "message_runtime" and every package in MSG_DEP_SET to
+##     catkin_package(CATKIN_DEPENDS ...)
+##   * uncomment the add_*_files sections below as needed
+##     and list every .msg/.srv/.action file to be processed
+##   * uncomment the generate_messages entry below
+##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
+
+## Generate messages in the 'msg' folder
+# add_message_files(
+#   FILES
+#   Message1.msg
+#   Message2.msg
+# )
+
+## Generate services in the 'srv' folder
+# add_service_files(
+#   FILES
+#   Service1.srv
+#   Service2.srv
+# )
+
+## Generate actions in the 'action' folder
+# add_action_files(
+#   FILES
+#   Action1.action
+#   Action2.action
+# )
+
+## Generate added messages and services with any dependencies listed here
+# generate_messages(
+#   DEPENDENCIES
+#   std_msgs  # Or other packages containing msgs
+# )
+
+################################################
+## Declare ROS dynamic reconfigure parameters ##
+################################################
+
+## To declare and build dynamic reconfigure parameters within this
+## package, follow these steps:
+## * In the file package.xml:
+##   * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
+## * In this file (CMakeLists.txt):
+##   * add "dynamic_reconfigure" to
+##     find_package(catkin REQUIRED COMPONENTS ...)
+##   * uncomment the "generate_dynamic_reconfigure_options" section below
+##     and list every .cfg file to be processed
+
+## Generate dynamic reconfigure parameters in the 'cfg' folder
+# generate_dynamic_reconfigure_options(
+#   cfg/DynReconf1.cfg
+#   cfg/DynReconf2.cfg
+# )
+
+###################################
+## catkin specific configuration ##
+###################################
+## The catkin_package macro generates cmake config files for your package
+## Declare things to be passed to dependent projects
+## INCLUDE_DIRS: uncomment this if your package contains header files
+## LIBRARIES: libraries you create in this project that dependent projects also need
+## CATKIN_DEPENDS: catkin_packages dependent projects also need
+## DEPENDS: system dependencies of this project that dependent projects also need
+catkin_package(
+#  INCLUDE_DIRS include
+#  LIBRARIES tf_pcl
+#  CATKIN_DEPENDS other_catkin_pkg
+#  DEPENDS system_lib
+)
+
+###########
+## Build ##
+###########
+
+## Specify additional locations of header files
+## Your package locations should be listed before other locations
+include_directories(
+# include
+# ${catkin_INCLUDE_DIRS}
+)
+
+## Declare a C++ library
+# add_library(${PROJECT_NAME}
+#   src/${PROJECT_NAME}/tf_pcl.cpp
+# )
+
+## Add cmake target dependencies of the library
+## as an example, code may need to be generated before libraries
+## either from message generation or dynamic reconfigure
+# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+
+## Declare a C++ executable
+## With catkin_make all packages are built within a single CMake context
+## The recommended prefix ensures that target names across packages don't collide
+# add_executable(${PROJECT_NAME}_node src/tf_pcl_node.cpp)
+
+## Rename C++ executable without prefix
+## The above recommended prefix causes long target names, the following renames the
+## target back to the shorter version for ease of user use
+## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
+# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
+
+## Add cmake target dependencies of the executable
+## same as for the library above
+# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+
+## Specify libraries to link a library or executable target against
+# target_link_libraries(${PROJECT_NAME}_node
+#   ${catkin_LIBRARIES}
+# )
+
+#############
+## Install ##
+#############
+
+# all install targets should use catkin DESTINATION variables
+# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
+
+## Mark executable scripts (Python etc.) for installation
+## in contrast to setup.py, you can choose the destination
+# catkin_install_python(PROGRAMS
+#   scripts/my_python_script
+#   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+# )
+
+## Mark executables for installation
+## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
+# install(TARGETS ${PROJECT_NAME}_node
+#   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+# )
+
+## Mark libraries for installation
+## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
+# install(TARGETS ${PROJECT_NAME}
+#   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+#   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+#   RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
+# )
+
+## Mark cpp header files for installation
+# install(DIRECTORY include/${PROJECT_NAME}/
+#   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
+#   FILES_MATCHING PATTERN "*.h"
+#   PATTERN ".svn" EXCLUDE
+# )
+
+## Mark other files for installation (e.g. launch and bag files, etc.)
+# install(FILES
+#   # myfile1
+#   # myfile2
+#   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+# )
+
+#############
+## Testing ##
+#############
+
+## Add gtest based cpp test target and link libraries
+# catkin_add_gtest(${PROJECT_NAME}-test test/test_tf_pcl.cpp)
+# if(TARGET ${PROJECT_NAME}-test)
+#   target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
+# endif()
+
+## Add folders to be run by python nosetests
+# catkin_add_nosetests(test)
diff --git a/common/helpers/tf_pcl/package.xml b/common/helpers/tf_pcl/package.xml
new file mode 100644
index 000000000..9397064b6
--- /dev/null
+++ b/common/helpers/tf_pcl/package.xml
@@ -0,0 +1,65 @@
+<?xml version="1.0"?>
+<package format="2">
+  <name>tf_pcl</name>
+  <version>0.0.0</version>
+  <description>The tf_pcl package</description>
+
+  <!-- One maintainer tag required, multiple allowed, one person per tag -->
+  <!-- Example:  -->
+  <!-- <maintainer email="jane.doe@example.com">Jane Doe</maintainer> -->
+  <maintainer email="matthew.s.barker@kcl.ac.uk">mattbarker</maintainer>
+
+
+  <!-- One license tag required, multiple allowed, one license per tag -->
+  <!-- Commonly used license strings: -->
+  <!--   BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
+  <license>TODO</license>
+
+
+  <!-- Url tags are optional, but multiple are allowed, one per tag -->
+  <!-- Optional attribute type can be: website, bugtracker, or repository -->
+  <!-- Example: -->
+  <!-- <url type="website">http://wiki.ros.org/tf_pcl</url> -->
+
+
+  <!-- Author tags are optional, multiple are allowed, one per tag -->
+  <!-- Authors do not have to be maintainers, but could be -->
+  <!-- Example: -->
+  <!-- <author email="jane.doe@example.com">Jane Doe</author> -->
+
+
+  <!-- The *depend tags are used to specify dependencies -->
+  <!-- Dependencies can be catkin packages or system dependencies -->
+  <!-- Examples: -->
+  <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
+  <!--   <depend>roscpp</depend> -->
+  <!--   Note that this is equivalent to the following: -->
+  <!--   <build_depend>roscpp</build_depend> -->
+  <!--   <exec_depend>roscpp</exec_depend> -->
+  <!-- Use build_depend for packages you need at compile time: -->
+  <!--   <build_depend>message_generation</build_depend> -->
+  <!-- Use build_export_depend for packages you need in order to build against this package: -->
+  <!--   <build_export_depend>message_generation</build_export_depend> -->
+  <!-- Use buildtool_depend for build tool packages: -->
+  <!--   <buildtool_depend>catkin</buildtool_depend> -->
+  <!-- Use exec_depend for packages you need at runtime: -->
+  <!--   <exec_depend>message_runtime</exec_depend> -->
+  <!-- Use test_depend for packages you need only for testing: -->
+  <!--   <test_depend>gtest</test_depend> -->
+  <!-- Use doc_depend for packages you need only for building documentation: -->
+  <!--   <doc_depend>doxygen</doc_depend> -->
+  <buildtool_depend>catkin</buildtool_depend>
+  <depend>sensor_msgs</depend>
+  <depend>geometry_msgs</depend>
+  <depend>scipy</depend>
+  <depend>tf2_ros</depend>
+  <depend>numpy</depend>
+  <depend>ros_numpy</depend>
+
+
+  <!-- The export tag contains other, unspecified, tags -->
+  <export>
+    <!-- Other tools can request additional information be placed here -->
+
+  </export>
+</package>
diff --git a/common/helpers/tf_pcl/setup.py b/common/helpers/tf_pcl/setup.py
new file mode 100644
index 000000000..3d7d01673
--- /dev/null
+++ b/common/helpers/tf_pcl/setup.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+
+from distutils.core import setup
+from catkin_pkg.python_setup import generate_distutils_setup
+
+setup_args = generate_distutils_setup(packages=["tf_pcl"], package_dir={"": "src"})
+
+setup(**setup_args)
diff --git a/common/helpers/tf_pcl/src/tf_pcl/__init__.py b/common/helpers/tf_pcl/src/tf_pcl/__init__.py
new file mode 100644
index 000000000..cc2cd2035
--- /dev/null
+++ b/common/helpers/tf_pcl/src/tf_pcl/__init__.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+from copy import deepcopy
+import numpy as np
+import tf2_ros as tf
+import ros_numpy as rnp
+
+from scipy.spatial.transform import Rotation as R
+from sensor_msgs.msg import PointCloud2
+from geometry_msgs.msg import TransformStamped
+
+
+def pcl_transform(
+    pcl: PointCloud2, transform: TransformStamped, target_frame: str = "map"
+) -> PointCloud2:
+    """Transforms a pointclound using a given transform message.
+    Needed as the tf2 transform function returns an un-orderded pcl.
+    Whilst we want an ordered pcl.
+
+    Args:
+        pcl (PointCloud2): source pointcloud to transform.
+        transform (TransformStamped): transform to apply
+
+    Returns:
+        PointCloud2: transformed pointcloud
+    """
+
+    pcl_arr = deepcopy(rnp.point_cloud2.pointcloud2_to_array(pcl))
+
+    translation = transform.transform.translation
+    rotation_q = transform.transform.rotation
+
+    rotation_matrix = R.from_quat(
+        [rotation_q.x, rotation_q.y, rotation_q.z, rotation_q.w]
+    )
+
+    pcl_x = pcl_arr["x"]
+    pcl_y = pcl_arr["y"]
+    pcl_z = pcl_arr["z"]
+
+    pcl_x_y_z_arr = np.array([pcl_x, pcl_y, pcl_z])
+
+    C, H, W = pcl_x_y_z_arr.shape
+
+    pcl_x_y_z_arr = pcl_x_y_z_arr.reshape(-1, H * W).T
+
+    transformed_pcl = rotation_matrix.apply(pcl_x_y_z_arr) + np.array(
+        [translation.x, translation.y, translation.z]
+    )
+
+    transformed_pcl = transformed_pcl.T.reshape(C, H, W)
+
+    pcl_arr["x"] = transformed_pcl[0]
+    pcl_arr["y"] = transformed_pcl[1]
+    pcl_arr["z"] = transformed_pcl[2]
+
+    transformed_pcl = rnp.point_cloud2.array_to_pointcloud2(
+        pcl_arr, stamp=pcl.header.stamp, frame_id=target_frame
+    )
+    return transformed_pcl
diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py b/common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py
old mode 100755
new mode 100644
diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py b/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py
index 418f9bb78..921097691 100644
--- a/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py
+++ b/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py
@@ -34,7 +34,7 @@ def main(args: dict) -> None:
     output_dir = args["output_dir"]
 
     r = sr.Recognizer()
-    with sr.Microphone(device_index=mic_index) as source:
+    with sr.Microphone(device_index=13, sample_rate=16000) as source:
         print("Say something!")
         audio = r.listen(source, timeout=5, phrase_time_limit=5)
         print("Finished listening")
diff --git a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
index db4c1cd60..1e34fc738 100644
--- a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
+++ b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
@@ -12,7 +12,7 @@
 
 from sensor_msgs.msg import Image as SensorImage
 
-from lasr_vision_msgs.msg import BodyPixMask, BodyPixKeypoint
+from lasr_vision_msgs.msg import BodyPixMask, BodyPixKeypoint, BodyPixKeypointNormalized
 from lasr_vision_msgs.srv import (
     BodyPixMaskDetectionRequest,
     BodyPixMaskDetectionResponse,
@@ -23,7 +23,11 @@
 import rospkg
 
 # model cache
-loaded_models = {}
+# preload resnet 50 model so that it won't waste the time
+# doing that in the middle of the task.
+loaded_models = {
+    "resnet50": load_model(download_model(BodyPixModelPaths.RESNET50_FLOAT_STRIDE_16))
+}
 r = rospkg.RosPack()
 
 
@@ -143,6 +147,7 @@ def detect_keypoints(
     poses = result.get_poses()
 
     detected_keypoints: List[BodyPixKeypoint] = []
+    detected_keypoints_normalized: List[BodyPixKeypointNormalized] = []
 
     for pose in poses:
         for keypoint in pose.keypoints.values():
@@ -150,8 +155,13 @@ def detect_keypoints(
             x = int(keypoint.position.x)
             y = int(keypoint.position.y)
             try:
-                if mask[y, x] == 0:
-                    continue
+                # if mask[y, x] == 0:
+                #     continue
+                if not request.keep_out_of_bounds:
+                    if x < 0.0 or y < 0.0:
+                        continue
+                    if x >= mask.shape[1] or y >= mask.shape[0]:
+                        continue
             # Throws an error if the keypoint is out of bounds
             # but not clear what type (some TF stuff)
             except:
@@ -160,6 +170,13 @@ def detect_keypoints(
             detected_keypoints.append(
                 BodyPixKeypoint(keypoint_name=keypoint.part, x=x, y=y)
             )
+            detected_keypoints_normalized.append(
+                BodyPixKeypointNormalized(
+                    keypoint_name=keypoint.part,
+                    x=float(x) / mask.shape[1],
+                    y=float(y) / mask.shape[0],
+                )
+            )
 
     # publish to debug topic
     if debug_publisher is not None:
@@ -179,7 +196,7 @@ def detect_keypoints(
             cv2.putText(
                 coloured_mask,
                 f"{keypoint.keypoint_name}",
-                (keypoint.x, keypoint.y),
+                (int(keypoint.x), int(keypoint.y)),
                 cv2.FONT_HERSHEY_SIMPLEX,
                 0.5,
                 (255, 255, 255),
@@ -188,4 +205,6 @@ def detect_keypoints(
             )
         debug_publisher.publish(cv2_img.cv2_img_to_msg(coloured_mask))
 
-    return BodyPixKeypointDetectionResponse(keypoints=detected_keypoints)
+    return BodyPixKeypointDetectionResponse(
+        keypoints=detected_keypoints, normalized_keypoints=detected_keypoints_normalized
+    )
diff --git a/common/vision/lasr_vision_clip/CMakeLists.txt b/common/vision/lasr_vision_clip/CMakeLists.txt
index 739bfda15..1b6de6c4a 100644
--- a/common/vision/lasr_vision_clip/CMakeLists.txt
+++ b/common/vision/lasr_vision_clip/CMakeLists.txt
@@ -19,7 +19,7 @@ find_package(catkin REQUIRED catkin_virtualenv)
 catkin_python_setup()
 catkin_generate_virtualenv(
   INPUT_REQUIREMENTS requirements.in
-  PYTHON_INTERPRETER python3.10
+  PYTHON_INTERPRETER python3.9
 )
 ################################################
 ## Declare ROS messages, services and actions ##
@@ -157,6 +157,9 @@ include_directories(
 ## in contrast to setup.py, you can choose the destination
 catkin_install_python(PROGRAMS
   nodes/vqa
+  nodes/img_encoder.py
+  nodes/learn_face.py
+  examples/encode_image_example.py
   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
 )
 
diff --git a/common/vision/lasr_vision_clip/examples/encode_image_example.py b/common/vision/lasr_vision_clip/examples/encode_image_example.py
new file mode 100644
index 000000000..3e45e8329
--- /dev/null
+++ b/common/vision/lasr_vision_clip/examples/encode_image_example.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+import rospy
+from typing import List
+from lasr_vision_clip.clip_utils import load_model, encode_img
+from lasr_vision_msgs.srv import (
+    ClipImageEncoder,
+    ClipImageEncoderResponse,
+    ClipImageEncoderRequest,
+)
+from sensor_msgs.msg import Image
+from cv2_img import msg_to_cv2_img
+
+
+if __name__ == "__main__":
+    rospy.init_node("clip_encoder_test")
+    img_topic = "/usb_cam/image_raw"
+    rospy.wait_for_service("/clip/img_encoder")
+    clip_encoder = rospy.ServiceProxy("/clip/img_encoder", ClipImageEncoder)
+    while not rospy.is_shutdown():
+        img_msg = rospy.wait_for_message(img_topic, Image)
+        request = ClipImageEncoderRequest(image_raw=img_msg)
+        response = clip_encoder(request)
+        rospy.loginfo(f"Received response: {response}")
+
+    rospy.spin()
diff --git a/common/vision/lasr_vision_clip/examples/test_person_detector.py b/common/vision/lasr_vision_clip/examples/test_person_detector.py
new file mode 100755
index 000000000..a9d2e00d9
--- /dev/null
+++ b/common/vision/lasr_vision_clip/examples/test_person_detector.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+from lasr_vision_msgs.srv import (
+    ClipLearnFaceRequest,
+    ClipLearnFace,
+    ClipLearnFaceResponse,
+    CroppedDetection,
+    CroppedDetectionRequest,
+    CroppedDetectionResponse,
+    ClipRecogniseFaceRequest,
+    ClipRecogniseFace,
+    ClipRecogniseFaceResponse,
+)
+from lasr_vision_msgs.msg import CDRequest
+from sensor_msgs.msg import Image
+import cv2
+from cv2_img import msg_to_cv2_img, cv2_img_to_msg
+import rospy
+from typing import List
+import numpy as np
+
+
+if __name__ == "__main__":
+    rospy.init_node("clip_encoder_test")
+    cropped_detector = rospy.ServiceProxy("/vision/cropped_detection", CroppedDetection)
+    learn_face_service = rospy.ServiceProxy("/vision/learn_face", ClipLearnFace)
+    detect_face_service = rospy.ServiceProxy(
+        "/vision/face_detection", ClipRecogniseFace
+    )
+    debug_pub = rospy.Publisher("/clip/recognise/debug", Image, queue_size=1)
+    input_str = ""
+    while True:
+        input_str = input("Please enter your name and hit enter to learn your face: ")
+        if input_str == "done":
+            break
+        person_1_imgs = []
+        for i in range(10):
+            cropped_response = cropped_detector(
+                CroppedDetectionRequest(
+                    [
+                        CDRequest(
+                            method="centered",
+                            use_mask=True,
+                            object_names=["person"],
+                            yolo_model="yolov8x-seg.pt",
+                            yolo_model_confidence=0.8,
+                            yolo_nms_threshold=0.4,
+                        )
+                    ]
+                )
+            )
+            rospy.sleep(0.1)
+            try:
+                person_1_imgs.append(cropped_response.responses[0].cropped_imgs[0])
+            except:
+                continue
+
+        learn_face_service(ClipLearnFaceRequest(raw_imgs=person_1_imgs, name=input_str))
+
+    # Run inference
+    while not rospy.is_shutdown():
+        cropped_response = cropped_detector(
+            CroppedDetectionRequest(
+                [
+                    CDRequest(
+                        method="centered",
+                        use_mask=True,
+                        object_names=["person"],
+                        yolo_model="yolov8x-seg.pt",
+                        yolo_model_confidence=0.8,
+                        yolo_nms_threshold=0.4,
+                    )
+                ]
+            )
+        )
+
+        try:
+            names = []
+            xywhs = []
+            for cropped_img in cropped_response.responses[0].cropped_imgs:
+                response = detect_face_service(
+                    ClipRecogniseFaceRequest(image_raw=cropped_img)
+                )
+                names.append(response.name)
+                xywhs.append(response.xywh)
+                rospy.loginfo(f"Recognised face: {response.name}")
+
+            # Add names to image
+            cv2_img = msg_to_cv2_img(cropped_response.responses[0].masked_img)
+            for name, xywh in zip(names, xywhs):
+                x, y, w, h = xywh[0], xywh[1], xywh[2], xywh[3]
+                cv2.rectangle(cv2_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
+                cv2.putText(
+                    cv2_img,
+                    name,
+                    (x, y),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    0.5,
+                    (0, 255, 0),
+                    2,
+                    cv2.LINE_AA,
+                )
+            debug_pub.publish(cv2_img_to_msg(cv2_img))
+        except Exception as e:
+            rospy.loginfo(e)
+            continue
+
+    rospy.spin()
diff --git a/common/vision/lasr_vision_clip/nodes/img_encoder.py b/common/vision/lasr_vision_clip/nodes/img_encoder.py
new file mode 100644
index 000000000..958d329ba
--- /dev/null
+++ b/common/vision/lasr_vision_clip/nodes/img_encoder.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+import rospy
+from typing import List
+from lasr_vision_clip.clip_utils import load_model, encode_img
+from lasr_vision_msgs.srv import (
+    ClipImageEncoder,
+    ClipImageEncoderResponse,
+    ClipImageEncoderRequest,
+)
+from sensor_msgs.msg import Image
+from cv2_img import msg_to_cv2_img
+
+
+class EncoderService:
+    def __init__(self, model_device: str = "cuda") -> None:
+        """Caches the clip model.
+
+        Args:
+            model_device (str, optional): device to load model onto. Defaults to "cuda".
+
+        """
+
+        self._model = load_model(model_device)
+        self._debug_pub = rospy.Publisher(
+            "/clip/img_encoder/debug", Image, queue_size=1
+        )
+        rospy.loginfo("Clip encoder service started")
+
+    def encode_image(
+        self, request: ClipImageEncoderRequest
+    ) -> ClipImageEncoderResponse:
+        """Encodes a given image to a vector.
+
+        Returns:
+            ClipImageEncoderResponse: the encoded vector
+        """
+        raw_image = request.image_raw
+        encoded_vector = encode_img(self._model, raw_image)
+        encoded_vector = encoded_vector.flatten()
+        return ClipImageEncoderResponse(encoded_vector=encoded_vector.tolist())
+
+
+if __name__ == "__main__":
+    rospy.init_node("clip_vqa_service")
+    service = EncoderService()
+    rospy.Service("/clip/img_encoder", ClipImageEncoder, service.encode_image)
+    rospy.spin()
diff --git a/common/vision/lasr_vision_clip/nodes/learn_face.py b/common/vision/lasr_vision_clip/nodes/learn_face.py
new file mode 100644
index 000000000..2d9185e46
--- /dev/null
+++ b/common/vision/lasr_vision_clip/nodes/learn_face.py
@@ -0,0 +1,8 @@
+import rospy
+from lasr_vision_clip import FaceService
+
+
+if __name__ == "__main__":
+    rospy.init_node("clip_vqa_service")
+    face_service = FaceService()
+    rospy.spin()
diff --git a/common/vision/lasr_vision_clip/requirements.in b/common/vision/lasr_vision_clip/requirements.in
index b03f1e5cc..7ee0832cf 100644
--- a/common/vision/lasr_vision_clip/requirements.in
+++ b/common/vision/lasr_vision_clip/requirements.in
@@ -1,2 +1,4 @@
+facenet-pytorch
 sentence-transformers
-opencv-python
\ No newline at end of file
+opencv-python
+opencv-contrib-python
\ No newline at end of file
diff --git a/common/vision/lasr_vision_clip/requirements.txt b/common/vision/lasr_vision_clip/requirements.txt
index 7c61ba101..264d9aefb 100644
--- a/common/vision/lasr_vision_clip/requirements.txt
+++ b/common/vision/lasr_vision_clip/requirements.txt
@@ -1,15 +1,16 @@
-certifi==2024.2.2         # via requests
+certifi==2024.7.4         # via requests
 charset-normalizer==3.3.2  # via requests
-filelock==3.13.4          # via huggingface-hub, torch, transformers, triton
-fsspec==2024.3.1          # via huggingface-hub, torch
-huggingface-hub==0.22.2   # via sentence-transformers, tokenizers, transformers
+facenet-pytorch==2.6.0    # via -r requirements.in
+filelock==3.15.4          # via huggingface-hub, torch, transformers, triton
+fsspec==2024.6.1          # via huggingface-hub, torch
+huggingface-hub==0.23.4   # via sentence-transformers, tokenizers, transformers
 idna==3.7                 # via requests
-jinja2==3.1.3             # via torch
-joblib==1.4.0             # via scikit-learn
+jinja2==3.1.4             # via torch
+joblib==1.4.2             # via scikit-learn
 markupsafe==2.1.5         # via jinja2
 mpmath==1.3.0             # via sympy
 networkx==3.2.1           # via torch
-numpy==1.26.4             # via opencv-python, scikit-learn, scipy, sentence-transformers, transformers
+numpy==1.26.4             # via facenet-pytorch, opencv-contrib-python, opencv-python, scikit-learn, scipy, sentence-transformers, torchvision, transformers
 nvidia-cublas-cu12==12.1.3.1  # via nvidia-cudnn-cu12, nvidia-cusolver-cu12, torch
 nvidia-cuda-cupti-cu12==12.1.105  # via torch
 nvidia-cuda-nvrtc-cu12==12.1.105  # via torch
@@ -20,24 +21,26 @@ nvidia-curand-cu12==10.3.2.106  # via torch
 nvidia-cusolver-cu12==11.4.5.107  # via torch
 nvidia-cusparse-cu12==12.1.0.106  # via nvidia-cusolver-cu12, torch
 nvidia-nccl-cu12==2.19.3  # via torch
-nvidia-nvjitlink-cu12==12.4.127  # via nvidia-cusolver-cu12, nvidia-cusparse-cu12
+nvidia-nvjitlink-cu12==12.5.82  # via nvidia-cusolver-cu12, nvidia-cusparse-cu12
 nvidia-nvtx-cu12==12.1.105  # via torch
-opencv-python==4.9.0.80   # via -r requirements.in
-packaging==24.0           # via huggingface-hub, transformers
-pillow==10.3.0            # via sentence-transformers
+opencv-contrib-python==4.10.0.84  # via -r requirements.in
+opencv-python==4.10.0.84  # via -r requirements.in
+packaging==24.1           # via huggingface-hub, transformers
+pillow==10.2.0            # via facenet-pytorch, sentence-transformers, torchvision
 pyyaml==6.0.1             # via huggingface-hub, transformers
-regex==2024.4.16          # via transformers
-requests==2.31.0          # via huggingface-hub, transformers
+regex==2024.5.15          # via transformers
+requests==2.32.3          # via facenet-pytorch, huggingface-hub, transformers
 safetensors==0.4.3        # via transformers
-scikit-learn==1.4.2       # via sentence-transformers
-scipy==1.13.0             # via scikit-learn, sentence-transformers
-sentence-transformers==2.7.0  # via -r requirements.in
-sympy==1.12               # via torch
-threadpoolctl==3.4.0      # via scikit-learn
-tokenizers==0.15.2        # via transformers
-torch==2.2.2              # via sentence-transformers
-tqdm==4.66.2              # via huggingface-hub, sentence-transformers, transformers
-transformers==4.39.3      # via sentence-transformers
+scikit-learn==1.5.1       # via sentence-transformers
+scipy==1.13.1             # via scikit-learn, sentence-transformers
+sentence-transformers==3.0.1  # via -r requirements.in
+sympy==1.12.1             # via torch
+threadpoolctl==3.5.0      # via scikit-learn
+tokenizers==0.19.1        # via transformers
+torch==2.2.2              # via facenet-pytorch, sentence-transformers, torchvision
+torchvision==0.17.2       # via facenet-pytorch
+tqdm==4.66.4              # via facenet-pytorch, huggingface-hub, sentence-transformers, transformers
+transformers==4.42.3      # via sentence-transformers
 triton==2.2.0             # via torch
-typing-extensions==4.11.0  # via huggingface-hub, torch
-urllib3==2.2.1            # via requests
+typing-extensions==4.12.2  # via huggingface-hub, torch
+urllib3==2.2.2            # via requests
diff --git a/common/vision/lasr_vision_clip/src/lasr_vision_clip/__init__.py b/common/vision/lasr_vision_clip/src/lasr_vision_clip/__init__.py
index 8b1378917..b783e5137 100644
--- a/common/vision/lasr_vision_clip/src/lasr_vision_clip/__init__.py
+++ b/common/vision/lasr_vision_clip/src/lasr_vision_clip/__init__.py
@@ -1 +1,2 @@
-
+from .clip_utils import load_model, encode_img, load_face_model, infer
+from .learn_face import FaceService
diff --git a/common/vision/lasr_vision_clip/src/lasr_vision_clip/clip_utils.py b/common/vision/lasr_vision_clip/src/lasr_vision_clip/clip_utils.py
index 2d98a2169..ddf9ffd74 100644
--- a/common/vision/lasr_vision_clip/src/lasr_vision_clip/clip_utils.py
+++ b/common/vision/lasr_vision_clip/src/lasr_vision_clip/clip_utils.py
@@ -6,7 +6,6 @@
 import numpy as np
 from copy import deepcopy
 from sentence_transformers import SentenceTransformer, util
-
 from sensor_msgs.msg import Image
 
 
@@ -41,11 +40,42 @@ def run_clip(
     txt = model.encode(labels)
     img = model.encode(img)
     with torch.no_grad():
-        torch
         cos_scores = util.cos_sim(img, txt)
     return cos_scores
 
 
+def load_face_model():
+    from transformers import AutoImageProcessor, AutoModel
+
+    processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+    model = AutoModel.from_pretrained("google/vit-base-patch16-224").to("cuda")
+
+    return processor, model
+
+
+def infer(image, processor, model):
+    image = cv2_img.msg_to_cv2_img(image)
+    inputs = processor(image, return_tensors="pt").to("cuda")
+    outputs = model(**inputs)
+    # squeeze and flatten
+    outputs.pooler_output = outputs.pooler_output.squeeze(0).flatten()
+    return outputs.pooler_output.detach().cpu().numpy()
+
+
+def encode_img(model, img_msg: Image) -> np.ndarray:
+    """Run the CLIP model.
+
+    Args:
+        model (Any): clip model loaded into memory
+        img (np.ndarray): the image to query
+
+    Returns:
+        np.ndarray: the image embedding
+    """
+    img = cv2_img.msg_to_cv2_img(img_msg)
+    return model(img.unsqueeze(0)).detach().numpy()
+
+
 def query_image_stream(
     model: SentenceTransformer,
     answers: list[str],
diff --git a/common/vision/lasr_vision_clip/src/lasr_vision_clip/learn_face.py b/common/vision/lasr_vision_clip/src/lasr_vision_clip/learn_face.py
new file mode 100644
index 000000000..90945fb53
--- /dev/null
+++ b/common/vision/lasr_vision_clip/src/lasr_vision_clip/learn_face.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+import os
+import cv2
+import rospy
+from typing import Dict
+import numpy as np
+import rospkg
+from lasr_vision_msgs.srv import (
+    ClipRecogniseFaceRequest,
+    ClipRecogniseFaceResponse,
+    ClipLearnFace,
+    ClipRecogniseFace,
+    ClipLearnFaceRequest,
+    ClipLearnFaceResponse,
+)
+from sensor_msgs.msg import Image
+from cv2_img import msg_to_cv2_img, cv2_img_to_msg
+from lasr_vision_clip import load_face_model, encode_img, infer
+
+
+class FaceService:
+    def __init__(self, similarity_threshold: float = 6.0) -> None:
+        self._face_classifier = cv2.CascadeClassifier(
+            os.path.join(
+                rospkg.RosPack().get_path("lasr_vision_clip"),
+                "data",
+                "haarcascade_frontalface_default.xml",
+            )
+        )
+        self.learned_faces: Dict[str, np.ndarray] = {}
+        self._similarity_threshold = similarity_threshold
+        self.processor, self.model = load_face_model()
+        self._face_pub = rospy.Publisher("/clip/face_detection", Image, queue_size=1)
+
+        rospy.Service("/vision/face_detection", ClipRecogniseFace, self.face_detection)
+        rospy.Service("/vision/learn_face", ClipLearnFace, self.learn_face)
+
+        rospy.loginfo("Face detector service started")
+
+    def _detect_faces(self, img: np.ndarray):
+        faces = self._face_classifier.detectMultiScale(
+            img, 1.1, minNeighbors=5, minSize=(10, 10)
+        )
+        return faces
+
+    def face_detection(
+        self, req: ClipRecogniseFaceRequest
+    ) -> ClipRecogniseFaceResponse:
+        img = req.image_raw
+        cv2_img = msg_to_cv2_img(img)
+        # cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2GRAY)
+        try:
+            faces = self._detect_faces(cv2_img)
+
+            # Assume only one face in image
+            encoded_face = None
+            closest_name = "Unknown"
+            min_dist = float("inf")
+            min_xywh = None
+            for x, y, w, h in faces:
+                cv2_face = cv2_img[y : y + h, x : x + w]
+                # cv2_face = cv2.cvtColor(cv2_face, cv2.COLOR_GRAY2BGR)
+                face_msg = cv2_img_to_msg(cv2_face)
+                self._face_pub.publish(face_msg)
+                encoded_face = infer(
+                    cv2_img_to_msg(cv2_img), self.processor, self.model
+                )
+                encoded_face = encoded_face.flatten()
+                for name, face in self.learned_faces.items():
+                    distance = np.linalg.norm(encoded_face - face)
+                    rospy.loginfo(f"Distance to {name} : {distance}")
+                    if distance < min_dist:
+                        min_dist = distance
+                        min_xywh = [x, y, w, h]
+                        closest_name = name
+            return ClipRecogniseFaceResponse(
+                name=closest_name, distance=min_dist, xywh=min_xywh
+            )
+        except Exception as e:
+            rospy.loginfo(e)
+            return ClipRecogniseFaceResponse(name="Unknown", distance=None, xywh=None)
+
+    def learn_face(self, request: ClipLearnFaceRequest) -> ClipLearnFaceResponse:
+        imgs = request.raw_imgs
+
+        embedding_vectors = []
+        for img in imgs:
+            cv2_img = msg_to_cv2_img(img)
+            # cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2GRAY)
+            rospy.loginfo(f"Image shape: {cv2_img.shape}")
+            try:
+                faces = self._detect_faces(cv2_img)
+            except Exception as e:  # No face detected
+                rospy.loginfo(e)
+                continue
+            for x, y, w, h in faces:
+                cv2_face = cv2_img[y : y + h, x : x + w]
+                # cv2_face = cv2.cvtColor(cv2_face, cv2.COLOR_GRAY2BGR)
+                face_msg = cv2_img_to_msg(cv2_face)
+                self._face_pub.publish(face_msg)
+                encoded_face = infer(
+                    cv2_img_to_msg(cv2_img), self.processor, self.model
+                )
+                encoded_face = encoded_face.flatten()
+                embedding_vectors.append(encoded_face)
+
+        embedding_vectors = np.array(embedding_vectors)
+        embedding_vector = np.mean(embedding_vectors, axis=0)
+        self.learned_faces[request.name] = embedding_vector
+        rospy.loginfo(f"Learned {request.name}")
+
+        return ClipLearnFaceResponse()
diff --git a/common/vision/lasr_vision_cropped_detection/examples/request.py b/common/vision/lasr_vision_cropped_detection/examples/request.py
index 368c2c658..7dbb0fc8a 100644
--- a/common/vision/lasr_vision_cropped_detection/examples/request.py
+++ b/common/vision/lasr_vision_cropped_detection/examples/request.py
@@ -10,11 +10,11 @@
     while not rospy.is_shutdown():
         request_srv = CroppedDetectionRequest()
         request = CDRequest()
-        request.method = "centered"
+        request.method = "closest"
         request.use_mask = True
         request.yolo_model = "yolov8x-seg.pt"
         request.yolo_model_confidence = 0.5
         request.yolo_nms_threshold = 0.3
-        request.object_names = ["person", "bottle", "chair"]
+        request.object_names = ["person"]
         request_srv.requests = [request]
         response = service(request_srv)
diff --git a/common/vision/lasr_vision_cropped_detection/launch/setup.launch b/common/vision/lasr_vision_cropped_detection/launch/cropped_detection.launch
similarity index 66%
rename from common/vision/lasr_vision_cropped_detection/launch/setup.launch
rename to common/vision/lasr_vision_cropped_detection/launch/cropped_detection.launch
index bdacd9b0a..05db9d185 100644
--- a/common/vision/lasr_vision_cropped_detection/launch/setup.launch
+++ b/common/vision/lasr_vision_cropped_detection/launch/cropped_detection.launch
@@ -1,4 +1,4 @@
 <launch>
-    <node name="yolo" pkg="lasr_vision_yolov8" type="service" output="screen"></node>
-    <node name="cropped_detection" pkg="lasr_vision_cropped_detection" type="service.py" output="screen"></node>a
+    <node name="yolo" pkg="lasr_vision_yolov8" type="service" output="screen"/>
+    <node name="cropped_detection" pkg="lasr_vision_cropped_detection" type="service.py" output="screen"/>
 </launch>
\ No newline at end of file
diff --git a/common/vision/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py b/common/vision/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py
index ad282452a..e9360cc57 100644
--- a/common/vision/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py
+++ b/common/vision/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py
@@ -3,10 +3,11 @@
 import numpy as np
 import cv2
 import rospy
-
+from shapely.validation import explain_validity
 from sensor_msgs.msg import Image, PointCloud2
 from geometry_msgs.msg import Point, PoseWithCovarianceStamped, Polygon
 from shapely.geometry.polygon import Polygon as ShapelyPolygon
+from shapely.geometry.point import Point as ShapelyPoint
 
 from lasr_vision_msgs.msg import (
     Detection,
@@ -58,9 +59,6 @@ def _2d_bbox_crop(
     else:
         raise ValueError(f"Invalid 2D crop_method: {crop_method}")
 
-    if len(detections) == 0:
-        raise ValueError("No detections found")
-
     distances = [
         np.sqrt((x_to_compare - det.xywh[0]) ** 2 + (y_to_compare - det.xywh[1]) ** 2)
         for det in detections
@@ -120,12 +118,6 @@ def _2d_mask_crop(
     else:
         raise ValueError(f"Invalid 2D crop_method: {crop_method}")
 
-    if len(detections) == 0:
-        raise ValueError("No detections found")
-
-    if len(detections[0].xyseg) == 0:
-        raise ValueError("No segmentation found")
-
     distances = [
         np.sqrt((x_to_compare - det.xywh[0]) ** 2 + (y_to_compare - det.xywh[1]) ** 2)
         for det in detections
@@ -171,9 +163,6 @@ def _3d_bbox_crop(
         List[np.ndarray]: List of cropped images.
     """
 
-    if len(detections) == 0:
-        raise ValueError("No detections found")
-
     distances = [
         np.sqrt(
             (robot_location.x - det.point.x) ** 2
@@ -228,8 +217,6 @@ def _3d_mask_crop(
         Tuple[List[np.ndarray], np.ndarray, List[Detection3D]]: Tuple of cropped images, the combined mask, and the detections.
     """
 
-    if len(detections) == 0:
-        raise ValueError("No detections found")
     distances = [
         np.sqrt(
             (robot_location.x - det.point.x) ** 2
@@ -288,10 +275,20 @@ def filter_detections_by_polygon(
     filtered_detections: List[Detection3D] = []
     for index, polygon in enumerate(polygons):
         area_polygon = ShapelyPolygon([(point.x, point.y) for point in polygon.points])
+        print(f"Area polygon: {area_polygon}")
+        print(f"Polygon Area: {area_polygon.area}")
+        print(f"Polygon is valid: {area_polygon.is_valid}")
+        print(explain_validity(area_polygon))
         for detection in detections:
-            if area_polygon.contains(Point(detection.point.x, detection.point.y)):
+            print(f"Point: {detection.point}")
+            if area_polygon.contains(
+                ShapelyPoint(detection.point.x, detection.point.y)
+            ):
+                print(f"Detection {detection} is within polygon {index}")
                 detection_polygon_ids.append(index)
                 filtered_detections.append(detection)
+            else:
+                print(f"Detection {detection} is not within polygon {index}")
 
     return filtered_detections, detection_polygon_ids
 
@@ -391,7 +388,7 @@ def process_single_detection_request(
             cropped_images, detections, distances = _3d_bbox_crop(
                 pointcloud_rgb,
                 request.method,
-                request.robot_location,
+                robot_location,
                 detections,
             )
         response.detections_3d = detections
@@ -406,6 +403,7 @@ def process_single_detection_request(
     ]
 
     debug_publisher = rospy.Publisher(debug_topic, Image, queue_size=10)
+    closest_pub = rospy.Publisher(debug_topic + "_closest", Image, queue_size=10)
     combined_mask_debug_publisher = rospy.Publisher(
         debug_topic + "_mask", Image, queue_size=10
     )
@@ -414,36 +412,44 @@ def process_single_detection_request(
     if combined_mask is not None:
         # Add distances to the image
         for i, (dist, detect) in enumerate(zip(distances, detections)):
-            cv2.putText(
-                combined_mask,
-                f"Dist: {round(dist, 2)}",
-                (detect.xywh[0], detect.xywh[1]),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                1,
-                (0, 255, 0),
-                2,
-                cv2.LINE_AA,
-            )
+            continue
+            # cv2.putText(
+            #     combined_mask,
+            #     f"Dist: {round(dist, 2)}",
+            #     (detect.xywh[0], detect.xywh[1]),
+            #     cv2.FONT_HERSHEY_SIMPLEX,
+            #     1,
+            #     (0, 255, 0),
+            #     2,
+            #     cv2.LINE_AA,
+            # )
         combined_mask_debug_publisher.publish(cv2_img_to_msg(combined_mask))
         response.masked_img = cv2_img_to_msg(combined_mask)
 
+    try:
+        closest_pub.publish(cv2_img_to_msg(cropped_images[0]))
+    except IndexError:
+        rospy.logwarn("No detections found")
     response.distances = distances
-
-    debug_image = np.hstack(cropped_images)
-    # Add distances to the image
-    for i, dist in enumerate(distances):
-        cv2.putText(
-            debug_image,
-            f"Dist: {round(dist, 2)}",
-            (i * cropped_images[0].shape[0] + 150, 50),
-            cv2.FONT_HERSHEY_SIMPLEX,
-            1,
-            (0, 255, 0),
-            2,
-            cv2.LINE_AA,
-        )
-
-    debug_publisher.publish(cv2_img_to_msg(debug_image))
+    try:
+        print("...")
+        # debug_image = np.hstack(cropped_images)
+        # # Add distances to the image
+        # for i, dist in enumerate(distances):
+        #     cv2.putText(
+        #         debug_image,
+        #         f"Dist: {round(dist, 2)}",
+        #         (i * cropped_images[0].shape[0] + 150, 50),
+        #         cv2.FONT_HERSHEY_SIMPLEX,
+        #         1,
+        #         (0, 255, 0),
+        #         2,
+        #         cv2.LINE_AA,
+        #     )
+
+        # debug_publisher.publish(cv2_img_to_msg(debug_image))
+    except ValueError:
+        rospy.logwarn("No detections found")
 
     return response
 
diff --git a/common/vision/lasr_vision_deepface/launch/camera.launch b/common/vision/lasr_vision_deepface/launch/camera.launch
index a9c85fbb1..47b66da94 100644
--- a/common/vision/lasr_vision_deepface/launch/camera.launch
+++ b/common/vision/lasr_vision_deepface/launch/camera.launch
@@ -6,9 +6,7 @@
     <arg name="dataset" default="lab" doc="Dataset to use for the demo" />
 
     <!-- yolo service -->
-    <include file="$(find lasr_vision_deepface)/launch/service.launch">
-        <arg name="debug" value="true" />
-    </include>
+    <include file="$(find lasr_vision_deepface)/launch/service.launch"/>
     
     <!-- show debug topic -->
     <node name="image_view" pkg="rqt_image_view" type="rqt_image_view" respawn="false" output="screen" args="/recognise/debug" />
diff --git a/common/vision/lasr_vision_deepface/launch/service.launch b/common/vision/lasr_vision_deepface/launch/service.launch
index 337c5ae88..f749d479f 100644
--- a/common/vision/lasr_vision_deepface/launch/service.launch
+++ b/common/vision/lasr_vision_deepface/launch/service.launch
@@ -5,7 +5,5 @@
 
     <arg name="debug" default="false" doc="Whether to publish plotted images to /recognise/debug" />
 
-    <node name="face_recognition_service" pkg="lasr_vision_deepface" type="service" output="screen">
-        <param name="debug" type="bool" value="$(arg debug)" />
-    </node>
+    <node name="face_recognition_service" pkg="lasr_vision_deepface" type="service" output="screen"/>
 </launch>
\ No newline at end of file
diff --git a/common/vision/lasr_vision_deepface/nodes/service b/common/vision/lasr_vision_deepface/nodes/service
index 0bffa407e..ccc966370 100644
--- a/common/vision/lasr_vision_deepface/nodes/service
+++ b/common/vision/lasr_vision_deepface/nodes/service
@@ -18,71 +18,58 @@ from lasr_vision_msgs.srv import (
 
 rospy.init_node("recognise_service")
 
-# Determine variables
-DEBUG = rospy.get_param("~debug", False)
-
 recognise_debug_publishers = {}
 learn_face_debug_publishers = {}
 detect_faces_debug_publisher = None
 
-if DEBUG:
-    recognise_debug_publisher = rospy.Publisher("/recognise/debug", Image, queue_size=1)
-    learn_face_debug_publisher = rospy.Publisher(
-        "/learn_face/debug", Image, queue_size=1
-    )
-    cropped_face_publisher = rospy.Publisher(
-        "/learn_face/debug/cropped_query_face", Image, queue_size=1
-    )
-    detect_faces_debug_publisher = rospy.Publisher(
-        "/detect_faces/debug", Image, queue_size=1
-    )
+recognise_debug_publisher = rospy.Publisher("/recognise/debug", Image, queue_size=1)
+learn_face_debug_publisher = rospy.Publisher("/learn_face/debug", Image, queue_size=1)
+cropped_face_publisher = rospy.Publisher(
+    "/learn_face/debug/cropped_query_face", Image, queue_size=1
+)
+detect_faces_debug_publisher = rospy.Publisher(
+    "/detect_faces/debug", Image, queue_size=1
+)
 
 
 def recognise(request: RecogniseRequest) -> RecogniseResponse:
-    debug_publisher = None
-    similar_face_debug_publisher = None
-    cropped_face_publisher = None
-    if DEBUG:
-        if request.dataset in recognise_debug_publishers:
-            debug_publisher, similar_face_debug_publisher, cropped_face_publisher = (
-                recognise_debug_publishers[request.dataset]
-            )
-        else:
-            topic_name = re.sub(r"[\W_]+", "", request.dataset)
-            debug_publisher = rospy.Publisher(
-                f"/recognise/debug/{topic_name}", Image, queue_size=1
-            )
-            similar_face_debug_publisher = rospy.Publisher(
-                f"/recognise/debug/{topic_name}/similar_face", Image, queue_size=1
-            )
-            cropped_face_publisher = rospy.Publisher(
-                "/recognise/debug/cropped_query_face", Image, queue_size=1
-            )
-            recognise_debug_publishers[request.dataset] = (
-                debug_publisher,
-                similar_face_debug_publisher,
-                cropped_face_publisher,
-            )
+    if request.dataset in recognise_debug_publishers:
+        debug_publisher, similar_face_debug_publisher, cropped_face_publisher = (
+            recognise_debug_publishers[request.dataset]
+        )
+    else:
+        topic_name = re.sub(r"[\W_]+", "", request.dataset)
+        debug_publisher = rospy.Publisher(
+            f"/recognise/debug/{topic_name}", Image, queue_size=1
+        )
+        similar_face_debug_publisher = rospy.Publisher(
+            f"/recognise/debug/{topic_name}/similar_face", Image, queue_size=1
+        )
+        cropped_face_publisher = rospy.Publisher(
+            "/recognise/debug/cropped_query_face", Image, queue_size=1
+        )
+        recognise_debug_publishers[request.dataset] = (
+            debug_publisher,
+            similar_face_debug_publisher,
+            cropped_face_publisher,
+        )
     return face_recognition.recognise(
         request, debug_publisher, similar_face_debug_publisher, cropped_face_publisher
     )
 
 
 def learn_face(request: LearnFaceRequest) -> LearnFaceResponse:
-    debug_publisher = None
-    if DEBUG:
-        if request.dataset in learn_face_debug_publishers:
-            debug_publisher = learn_face_debug_publishers[request.dataset]
-        else:
-            topic_name = re.sub(r"[\W_]+", "", request.dataset)
-            debug_publisher = rospy.Publisher(
-                f"/learn_face/debug/{topic_name}", Image, queue_size=1
-            )
+    if request.dataset in learn_face_debug_publishers:
+        debug_publisher = learn_face_debug_publishers[request.dataset]
+    else:
+        topic_name = re.sub(r"[\W_]+", "", request.dataset)
+        debug_publisher = rospy.Publisher(
+            f"/learn_face/debug/{topic_name}", Image, queue_size=1
+        )
     face_recognition.create_dataset(
-        "/xtion/rgb/image_raw",
         request.dataset,
         request.name,
-        request.n_images,
+        request.images,
         debug_publisher,
     )
     return LearnFaceResponse()
diff --git a/common/vision/lasr_vision_deepface/src/lasr_vision_deepface/deepface.py b/common/vision/lasr_vision_deepface/src/lasr_vision_deepface/deepface.py
index d96fe664e..2a888636c 100644
--- a/common/vision/lasr_vision_deepface/src/lasr_vision_deepface/deepface.py
+++ b/common/vision/lasr_vision_deepface/src/lasr_vision_deepface/deepface.py
@@ -18,7 +18,7 @@
 
 from sensor_msgs.msg import Image
 
-from typing import Union
+from typing import Union, List
 
 DATASET_ROOT = os.path.join(
     rospkg.RosPack().get_path("lasr_vision_deepface"), "datasets"
@@ -80,31 +80,26 @@ def _extract_face(cv_im: Mat) -> Union[Mat, None]:
 
 
 def create_dataset(
-    topic: str,
     dataset: str,
     name: str,
-    size: int,
-    debug_publisher: Union[rospy.Publisher, None],
+    images: List[Image],
+    debug_publisher: rospy.Publisher,
 ) -> None:
     dataset_path = os.path.join(DATASET_ROOT, dataset, name)
     if not os.path.exists(dataset_path):
         os.makedirs(dataset_path)
-    rospy.loginfo(f"Taking {size} pictures of {name} and saving to {dataset_path}")
-
-    images = []
-    for i in range(size):
-        img_msg = rospy.wait_for_message(topic, Image)
-        cv_im = cv2_img.msg_to_cv2_img(img_msg)
+    rospy.loginfo(
+        f"Received {len(images)} pictures of {name} and saving to {dataset_path}"
+    )
+    cv_images: List[Mat] = [cv2_img.msg_to_cv2_img(img) for img in images]
+    for i, cv_im in enumerate(cv_images):
         face_cropped_cv_im = _extract_face(cv_im)
         if face_cropped_cv_im is None:
             continue
-        cv2.imwrite(os.path.join(dataset_path, f"{name}_{i + 1}.png"), face_cropped_cv_im)  # type: ignore
-        rospy.loginfo(f"Took picture {i + 1}")
-        images.append(face_cropped_cv_im)
-        if debug_publisher is not None:
-            debug_publisher.publish(
-                cv2_img.cv2_img_to_msg(create_image_collage(images))
-            )
+        cv2.imwrite(
+            os.path.join(dataset_path, f"{name}_{i + 1}.png"), face_cropped_cv_im
+        )
+    debug_publisher.publish(cv2_img.cv2_img_to_msg(create_image_collage(cv_images)))
 
     # Force retraining
     DeepFace.find(
@@ -118,9 +113,9 @@ def create_dataset(
 
 def recognise(
     request: RecogniseRequest,
-    debug_publisher: Union[rospy.Publisher, None],
-    debug_inference_pub: Union[rospy.Publisher, None],
-    cropped_detect_pub: Union[rospy.Publisher, None],
+    debug_publisher: rospy.Publisher,
+    debug_inference_pub: rospy.Publisher,
+    cropped_detect_pub: rospy.Publisher,
 ) -> RecogniseResponse:
     # Decode the image
     rospy.loginfo("Decoding")
@@ -159,8 +154,7 @@ def recognise(
 
         cropped_image = cv_im[:][y : y + h, x : x + w]
 
-        if cropped_detect_pub is not None:
-            cropped_detect_pub.publish(cv2_img.cv2_img_to_msg(cropped_image))
+        cropped_detect_pub.publish(cv2_img.cv2_img_to_msg(cropped_image))
 
         # Draw bounding boxes and labels for debugging
         cv2.rectangle(cv_im, (x, y), (x + w, y + h), (0, 0, 255), 2)
@@ -175,26 +169,24 @@ def recognise(
         )
 
     # publish to debug topic
-    if debug_publisher is not None:
-        debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im))
-    if debug_inference_pub is not None:
-        result = pd.concat(result)
-        # check for empty result
-        if not result.empty:
-            result_paths = list(result["identity"])
-            if len(result_paths) > 5:
-                result_paths = result_paths[:5]
-            result_images = [cv2.imread(path) for path in result_paths]
-            debug_inference_pub.publish(
-                cv2_img.cv2_img_to_msg(create_image_collage(result_images))
-            )
+    debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im))
+    result = pd.concat(result)
+    # check for empty result
+    if not result.empty:
+        result_paths = list(result["identity"])
+        if len(result_paths) > 5:
+            result_paths = result_paths[:5]
+        result_images = [cv2.imread(path) for path in result_paths]
+        debug_inference_pub.publish(
+            cv2_img.cv2_img_to_msg(create_image_collage(result_images))
+        )
 
     return response
 
 
 def detect_faces(
     request: DetectFacesRequest,
-    debug_publisher: Union[rospy.Publisher, None],
+    debug_publisher: rospy.Publisher,
 ) -> DetectFacesResponse:
     cv_im = cv2_img.msg_to_cv2_img(request.image_raw)
 
@@ -204,7 +196,9 @@ def detect_faces(
         faces = DeepFace.extract_faces(
             cv_im, detector_backend="mtcnn", enforce_detection=True
         )
-    except ValueError:
+    except ValueError as e:
+        rospy.loginfo(f"Error: {e}")
+        debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im))
         return response
 
     for i, face in enumerate(faces):
@@ -233,7 +227,6 @@ def detect_faces(
         )
 
     # publish to debug topic
-    if debug_publisher is not None:
-        debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im))
+    debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im))
 
     return response
diff --git a/common/vision/lasr_vision_msgs/CMakeLists.txt b/common/vision/lasr_vision_msgs/CMakeLists.txt
index 19c162e93..4b71ef21d 100644
--- a/common/vision/lasr_vision_msgs/CMakeLists.txt
+++ b/common/vision/lasr_vision_msgs/CMakeLists.txt
@@ -48,6 +48,7 @@ add_message_files(
   Detection.msg
   Detection3D.msg
   BodyPixKeypoint.msg
+  BodyPixKeypointNormalized.msg
   BodyPixMask.msg
   CDRequest.msg
   CDResponse.msg
@@ -68,6 +69,9 @@ add_service_files(
   DetectFaces.srv
   CheckKnownPeople.srv
   CroppedDetection.srv
+  ClipImageEncoder.srv
+  ClipLearnFace.srv
+  ClipRecogniseFace.srv
 )
 
 # Generate actions in the 'action' folder
diff --git a/common/vision/lasr_vision_msgs/msg/BodyPixKeypointNormalized.msg b/common/vision/lasr_vision_msgs/msg/BodyPixKeypointNormalized.msg
new file mode 100644
index 000000000..81faad1a1
--- /dev/null
+++ b/common/vision/lasr_vision_msgs/msg/BodyPixKeypointNormalized.msg
@@ -0,0 +1,8 @@
+# Keypoint.msg
+
+# name of the keypoint
+string keypoint_name
+
+# the x and y coordinates of the body part
+float32 x
+float32 y
diff --git a/common/vision/lasr_vision_msgs/msg/CDResponse.msg b/common/vision/lasr_vision_msgs/msg/CDResponse.msg
index 03091fcfa..95911c281 100644
--- a/common/vision/lasr_vision_msgs/msg/CDResponse.msg
+++ b/common/vision/lasr_vision_msgs/msg/CDResponse.msg
@@ -22,4 +22,4 @@ uint8[] polygon_ids
 sensor_msgs/Image rgb_image
 
 # The pointcloud used for the 3D crop
-sensor_msgs/PointCloud2 pointcloud
\ No newline at end of file
+sensor_msgs/PointCloud2 pointcloud  
\ No newline at end of file
diff --git a/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv b/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv
index ca056b268..04500b732 100644
--- a/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv
+++ b/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv
@@ -7,6 +7,12 @@ string dataset
 # How certain the detection should be to include
 float32 confidence
 
+# Whether to return keypoints that are out of bound
+bool keep_out_of_bounds
+
 ---
 # keypoints
 lasr_vision_msgs/BodyPixKeypoint[] keypoints
+
+# keypoints
+lasr_vision_msgs/BodyPixKeypointNormalized[] normalized_keypoints
diff --git a/common/vision/lasr_vision_msgs/srv/ClipImageEncoder.srv b/common/vision/lasr_vision_msgs/srv/ClipImageEncoder.srv
new file mode 100644
index 000000000..dae50b645
--- /dev/null
+++ b/common/vision/lasr_vision_msgs/srv/ClipImageEncoder.srv
@@ -0,0 +1,8 @@
+# Image to encode
+sensor_msgs/Image image_raw 
+
+---
+
+# Encoded image vector
+float32[] encoded_vector
+
diff --git a/common/vision/lasr_vision_msgs/srv/ClipLearnFace.srv b/common/vision/lasr_vision_msgs/srv/ClipLearnFace.srv
new file mode 100644
index 000000000..a41a904c2
--- /dev/null
+++ b/common/vision/lasr_vision_msgs/srv/ClipLearnFace.srv
@@ -0,0 +1,7 @@
+# Images to use to learn face
+sensor_msgs/Image[] raw_imgs
+
+# Name of person to be learned
+string name
+
+---
\ No newline at end of file
diff --git a/common/vision/lasr_vision_msgs/srv/ClipRecogniseFace.srv b/common/vision/lasr_vision_msgs/srv/ClipRecogniseFace.srv
new file mode 100644
index 000000000..a191f2dc3
--- /dev/null
+++ b/common/vision/lasr_vision_msgs/srv/ClipRecogniseFace.srv
@@ -0,0 +1,13 @@
+# Raw image to run inference on 
+sensor_msgs/Image image_raw
+
+---
+
+# Name of face
+string name
+
+# Distance from detected face
+float32 distance
+
+# Face bounding box
+int32[] xywh
\ No newline at end of file
diff --git a/common/vision/lasr_vision_msgs/srv/LearnFace.srv b/common/vision/lasr_vision_msgs/srv/LearnFace.srv
index 9410424fa..146f4aa90 100644
--- a/common/vision/lasr_vision_msgs/srv/LearnFace.srv
+++ b/common/vision/lasr_vision_msgs/srv/LearnFace.srv
@@ -4,7 +4,7 @@ string dataset
 # Name to associate
 string name
 
-# Number of images to take
-int32 n_images
+# Images
+sensor_msgs/Image[] images
 
 ---
diff --git a/common/vision/lasr_vision_yolov8/src/lasr_vision_yolov8/yolo.py b/common/vision/lasr_vision_yolov8/src/lasr_vision_yolov8/yolo.py
index 4171de2de..d1e7065b7 100644
--- a/common/vision/lasr_vision_yolov8/src/lasr_vision_yolov8/yolo.py
+++ b/common/vision/lasr_vision_yolov8/src/lasr_vision_yolov8/yolo.py
@@ -146,6 +146,9 @@ def detect_3d(
                 width=request.pcl.width,
             )
             detection.point = Point(*centroid)
+            rospy.loginfo(
+                f"Detected point: {detection.point} of object {detection.name}"
+            )
 
         if debug_point_publisher is not None:
             markers.create_and_publish_marker(
diff --git a/skills/CMakeLists.txt b/skills/CMakeLists.txt
index 6a5978ed6..1b97b6944 100644
--- a/skills/CMakeLists.txt
+++ b/skills/CMakeLists.txt
@@ -161,6 +161,7 @@ include_directories(
 ## in contrast to setup.py, you can choose the destination
 catkin_install_python(PROGRAMS
   scripts/unit_test_describe_people.py
+  scripts/unit_test_adjust_camera.py
   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
 )
 
diff --git a/skills/config/motions.yaml b/skills/config/motions.yaml
index f85a155d1..9409d156b 100644
--- a/skills/config/motions.yaml
+++ b/skills/config/motions.yaml
@@ -79,4 +79,66 @@ play_motion:
       joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
       points:
       - positions: [2.63, 0.10, -3.21, 1.61, 1.53, 0.00, 0.13]
-        time_from_start: 0.0
\ No newline at end of file
+        time_from_start: 0.0
+    
+    u3l:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.4, 0.35, 0.15]
+        time_from_start: 0.0
+    u3m:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.4, 0.0, 0.15]
+        time_from_start: 0.0
+    u3r:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.4, -0.35, 0.15]
+        time_from_start: 0.0
+
+    u2l:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.4, 0.35, 0.05]
+        time_from_start: 0.0
+    u2m:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.4, 0.0, 0.05]
+        time_from_start: 0.0
+    u2r:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.4, -0.35, 0.05]
+        time_from_start: 0.0
+    u1l:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.3, 0.35, 0.05]
+        time_from_start: 0.0
+    u1m:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.3, 0.0, 0.05]
+        time_from_start: 0.0
+    u1r:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.3, -0.35, 0.05]
+        time_from_start: 0.0
+    ml:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.2, 0.35, 0.0]
+        time_from_start: 0.0
+    mm:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.2, 0.0, 0.0]
+        time_from_start: 0.0
+    mr:
+      joints: [torso_lift_joint, head_1_joint, head_2_joint]
+      points:
+      - positions: [0.2, -0.35, 0.0]
+        time_from_start: 0.0
diff --git a/skills/launch/unit_test_describe_people.launch b/skills/launch/unit_test_describe_people.launch
index 649279233..81d43f9b4 100644
--- a/skills/launch/unit_test_describe_people.launch
+++ b/skills/launch/unit_test_describe_people.launch
@@ -16,10 +16,12 @@
 
     <node pkg="lasr_vision_feature_extraction" type="service" name="torch_service" output="screen"/>
     
+    <node name="yolo" pkg="lasr_vision_yolov8" type="service" output="screen"/>
+    <node name="cropped_detection" pkg="lasr_vision_cropped_detection" type="service.py" output="screen"/>
 
-   <include file="$(find video_stream_opencv)/launch/camera.launch">
+   <!--<include file="$(find video_stream_opencv)/launch/camera.launch">
         <arg name="visualize" value="true" />
-   </include>
+   </include>-->
     <!-- <arg name="file" doc="Video file to run inference on" />
    <include file="$(find video_stream_opencv)/launch/camera.launch">
         <arg name="video_stream_provider" value="$(arg file)" />
diff --git a/skills/scripts/test_learn_face.py b/skills/scripts/test_learn_face.py
deleted file mode 100755
index e85672d29..000000000
--- a/skills/scripts/test_learn_face.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-
-import rospy
-import smach
-from lasr_skills import LearnFace
-
-if __name__ == "__main__":
-    rospy.init_node("learn_face")
-    # make segmentation instead for create dataset
-
-    s = smach.StateMachine(outcomes=["succeeded", "failed"])
-    with s:
-        smach.StateMachine.add(
-            "LEARN_FACE",
-            LearnFace(dataset="receptionist", name="nicole", n_images=10),
-            transitions={
-                "succeeded": "succeeded",
-                "failed": "failed",
-            },
-        )
-
-    s.execute()
diff --git a/skills/scripts/unit_test_adjust_camera.py b/skills/scripts/unit_test_adjust_camera.py
new file mode 100644
index 000000000..51fe74251
--- /dev/null
+++ b/skills/scripts/unit_test_adjust_camera.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import smach
+import rospy
+
+from lasr_skills import AdjustCamera
+
+if __name__ == "__main__":
+    rospy.init_node("test_adjust_camera")
+    sm = smach.StateMachine(outcomes=["end"], output_keys=[])
+    with sm:
+        sm.add(
+            "AdjustCamera",
+            AdjustCamera(debug=True),
+            transitions={"finished": "end", "failed": "end", "truncated": "end"},
+        )
+    sm.execute()
+    rospy.signal_shutdown("down")
diff --git a/skills/src/lasr_skills/__init__.py b/skills/src/lasr_skills/__init__.py
index b9f7080c5..c606f34b1 100755
--- a/skills/src/lasr_skills/__init__.py
+++ b/skills/src/lasr_skills/__init__.py
@@ -20,8 +20,7 @@
 from .detect_faces import DetectFaces
 from .recognise import Recognise
 from .detect_gesture import DetectGesture
-from .learn_face import LearnFace
 from .look_at_person import LookAtPerson
 from .wait import Wait
-from .look_to_given_point import LookToGivenPoint
 from .find_gesture_person import FindGesturePerson
+from .adjust_camera import AdjustCamera
diff --git a/skills/src/lasr_skills/adjust_camera.py b/skills/src/lasr_skills/adjust_camera.py
new file mode 100644
index 000000000..c885398ae
--- /dev/null
+++ b/skills/src/lasr_skills/adjust_camera.py
@@ -0,0 +1,445 @@
+import smach
+import rospy
+from lasr_vision_msgs.srv import (
+    BodyPixKeypointDetection,
+    BodyPixKeypointDetectionRequest,
+)
+from .vision import GetCroppedImage
+from lasr_skills.play_motion import PlayMotion
+import rospkg
+import rosparam
+import os
+
+
+LEFT = {
+    "leftEye",
+    # 'leftEar',
+    "leftShoulder",
+}
+
+RIGHT = {
+    "rightEye",
+    # 'rightEar',
+    "rightShoulder",
+}
+
+HEAD = {
+    # 'nose',
+    "leftEye",
+    "rightEye",
+    # 'leftEar',
+    # 'rightEar',
+}
+
+MIDDLE = {
+    "leftShoulder",
+    "rightShoulder",
+}
+
+TORSO = {
+    "leftWrist",
+    "rightWrist",
+    "leftHip",
+    "rightHip",
+}
+
+ALL_KEYS_WITHOUT_TORSO = LEFT.union(RIGHT).union(HEAD).union(MIDDLE)
+
+ALL_KEYS = ALL_KEYS_WITHOUT_TORSO.union(TORSO)
+
+positions = [
+    "u3l",
+    "u3m",
+    "u3r",
+    "u2l",
+    "u2m",
+    "u2r",
+    "u1l",
+    "u1m",
+    "u1r",
+    "ml",
+    "mm",
+    "mr",
+]
+
+position_dict = {
+    (3, -1): "u3l",
+    (3, 0): "u3m",
+    (3, 1): "u3r",
+    (2, -1): "u2l",
+    (2, 0): "u2m",
+    (2, 1): "u2r",
+    (1, -1): "u1l",
+    (1, 0): "u1m",
+    (1, 1): "u1r",
+    (0, -1): "ml",
+    (0, 0): "mm",
+    (0, 1): "mr",
+}
+
+
+class AdjustCamera(smach.StateMachine):
+    def __init__(
+        self,
+        bodypix_model: str = "resnet50",
+        bodypix_confidence: float = 0.7,
+        max_attempts=1000,
+        debug=False,
+    ):
+        smach.StateMachine.__init__(
+            self,
+            outcomes=[
+                "finished",
+                "failed",
+                "truncated",
+            ],
+            input_keys=[],
+            output_keys=[],
+        )
+        r = rospkg.RosPack()
+        els = rosparam.load_file(
+            os.path.join(r.get_path("lasr_skills"), "config", "motions.yaml")
+        )
+        for param, ns in els:
+            rosparam.upload_params(ns, param)
+
+        with self:
+            smach.StateMachine.add(
+                "init_u2m",
+                PlayMotion(motion_name="u2m"),
+                transitions={
+                    "succeeded": "GET_IMAGE",
+                    "aborted": "GET_IMAGE",
+                    "preempted": "GET_IMAGE",
+                },
+            )
+
+            if debug:
+                _transitions = {
+                    "succeeded": "DECIDE_ADJUST_CAMERA",
+                    "failed": "GET_IMAGE",
+                }
+            else:
+                _transitions = {
+                    "succeeded": "DECIDE_ADJUST_CAMERA",
+                    "failed": "failed",
+                }
+            smach.StateMachine.add(
+                "GET_IMAGE",
+                GetCroppedImage(
+                    object_name="person",
+                    method="closest",
+                    use_mask=True,
+                ),
+                transitions={
+                    "succeeded": "DECIDE_ADJUST_CAMERA",
+                    "failed": "failed",
+                },
+            )
+
+            if debug:
+                _transitions = {
+                    "finished": "GET_IMAGE",
+                    "failed": "GET_IMAGE",
+                    "truncated": "GET_IMAGE",
+                }
+            else:
+                _transitions = {
+                    "finished": "finished",
+                    "failed": "failed",
+                    "truncated": "truncated",
+                }
+            for position in positions:
+                _transitions[position] = position
+
+            smach.StateMachine.add(
+                "DECIDE_ADJUST_CAMERA",
+                self.DecideAdjustCamera(
+                    bodypix_model=bodypix_model,
+                    bodypix_confidence=bodypix_confidence,
+                    max_attempts=max_attempts,
+                ),
+                transitions=_transitions,
+            )
+
+            for motion in positions:
+                smach.StateMachine.add(
+                    motion,
+                    PlayMotion(motion_name=motion),
+                    transitions={
+                        "succeeded": "GET_IMAGE",
+                        "aborted": "GET_IMAGE",
+                        "preempted": "GET_IMAGE",
+                    },
+                )
+
+    class DecideAdjustCamera(smach.State):
+        def __init__(
+            self,
+            # keypoints_to_detect: List[str] = ALL_KEYS,
+            bodypix_model: str = "resnet50",
+            bodypix_confidence: float = 0.7,
+            max_attempts=1000,
+        ):
+            smach.State.__init__(
+                self,
+                outcomes=[
+                    "finished",
+                    "failed",
+                    "truncated",
+                ]
+                + positions,
+                input_keys=[
+                    "img_msg",
+                ],
+                output_keys=[],
+            )
+            self.max_attempts = max_attempts
+            # self._keypoints_to_detect = keypoints_to_detect
+            self._bodypix_model = bodypix_model
+            self._bodypix_confidence = bodypix_confidence
+            self._bodypix_client = rospy.ServiceProxy(
+                "/bodypix/keypoint_detection", BodyPixKeypointDetection
+            )
+
+            self.position = [2, 0]
+            self.counter = 0
+
+        def execute(self, userdata):
+
+            req = BodyPixKeypointDetectionRequest()
+            req.image_raw = userdata.img_msg
+            req.dataset = self._bodypix_model
+            req.confidence = self._bodypix_confidence
+            req.keep_out_of_bounds = True
+
+            try:
+                res = self._bodypix_client(req)
+            except Exception as e:
+                print(e)
+                return "failed"
+
+            detected_keypoints = res.normalized_keypoints
+
+            keypoint_names = [keypoint.keypoint_name for keypoint in detected_keypoints]
+            rospy.logwarn(f"detected: {keypoint_names}")
+            keypoint_info = {
+                keypoint.keypoint_name: [keypoint.x, keypoint.y]
+                for keypoint in detected_keypoints
+            }
+            missing_keypoints = {
+                keypoint for keypoint in ALL_KEYS if keypoint not in keypoint_names
+            }
+
+            has_both_shoulders = len(missing_keypoints.intersection(MIDDLE)) == 0
+            has_both_eyes = len(missing_keypoints.intersection(HEAD)) == 0
+
+            has_more_than_one_shoulder = (
+                len(missing_keypoints.intersection(MIDDLE)) <= 1
+            )
+            has_more_than_one_one_eye = len(missing_keypoints.intersection(HEAD)) <= 1
+
+            rospy.logwarn(f"missing keypoints: {missing_keypoints}")
+            rospy.logwarn(
+                f"missing shoulders: {missing_keypoints.intersection(MIDDLE)}, missing eyes: {missing_keypoints.intersection(HEAD)}"
+            )
+            # has_torso = len(missing_keypoints.intersection(TORSO)) <= 1
+
+            if not has_more_than_one_shoulder and not has_more_than_one_one_eye:
+                # 'Try recovery behaviour or give up, need a bit polish
+                miss_head = len(missing_keypoints.intersection(HEAD)) >= 2
+                miss_middle = len(missing_keypoints.intersection(MIDDLE)) >= 2
+                miss_torso = len(missing_keypoints.intersection(TORSO)) >= 4
+                miss_left = len(missing_keypoints.intersection(LEFT)) >= 1
+                miss_right = len(missing_keypoints.intersection(RIGHT)) >= 1
+                rospy.logwarn(
+                    f"Missing head: {miss_head}, middle: {miss_middle}, torso: {miss_torso}, left: {miss_left}, right: {miss_right}."
+                )
+                needs_to_move_up = miss_head and (not miss_torso or not miss_middle)
+                needs_to_move_down = not miss_head and miss_middle and miss_torso
+                needs_to_move_left = miss_right
+                needs_to_move_right = miss_left
+                rospy.logwarn(
+                    f"Needs to move up: {needs_to_move_up}, down: {needs_to_move_down}, left: {needs_to_move_left}, right: {needs_to_move_right}."
+                )
+
+                # if counter > maxmum, check if head is in, if not, move up to get head, otherwise return finished.
+                if self.counter > self.max_attempts:
+                    if not miss_head or self.counter > self.max_attempts + 2:
+                        return "truncated"
+
+                # self.counter += 1
+                if not (needs_to_move_left and needs_to_move_right):
+                    # return "failed"
+                    if needs_to_move_left:
+                        self.position = (
+                            self.position[0],
+                            (
+                                self.position[1] - 1
+                                if self.position[1] > -1
+                                else self.position[1]
+                            ),
+                        )
+                        return position_dict[self.position]
+                    if needs_to_move_right:
+                        self.position = (
+                            self.position[0],
+                            (
+                                self.position[1] + 1
+                                if self.position[1] < 1
+                                else self.position[1]
+                            ),
+                        )
+                        return position_dict[self.position]
+                if needs_to_move_up and needs_to_move_down:
+                    return "failed"
+                if needs_to_move_up:
+                    self.position = (
+                        (
+                            self.position[0] + 1
+                            if self.position[0] < 3
+                            else self.position[0]
+                        ),
+                        self.position[1],
+                    )
+                    return position_dict[userdata.position]
+                if needs_to_move_down:
+                    self.position = (
+                        (
+                            self.position[0] - 1
+                            if self.position[0] > 0
+                            else self.position[0]
+                        ),
+                        self.position[1],
+                    )
+                    return position_dict[userdata.position]
+                return "finished"
+            elif has_both_eyes and not has_both_shoulders:
+                # in this case try to make eyes into the upper 1/3 of the frame,
+                eyes_middle = (
+                    (keypoint_info["leftEye"][0] + keypoint_info["rightEye"][0]) / 2,
+                    (keypoint_info["leftEye"][1] + keypoint_info["rightEye"][1]) / 2,
+                )
+                # if y at down 1/5: down move 2 steps
+                if eyes_middle[1] >= 4 / 5:
+                    self.position[0] -= 2
+                # if y at down 1/2: down move 1 step
+                elif eyes_middle[1] >= 1 / 2:
+                    self.position[0] -= 1
+                # if y at upper 1/3: wonder why no shoulders but never mind in this case
+                else:
+                    pass
+                # if x at left 1/3 or left shoulder dissappear, move left 1 step
+                if eyes_middle[0] <= 1 / 3:
+                    self.position[1] -= 1
+                # if x at right 1/3 or right shoulder dissappear, move right 1 step
+                elif eyes_middle[0] >= 2 / 3:
+                    self.position[1] += 1
+                pass
+            elif not has_both_eyes and has_both_shoulders:
+                shoulders_middle = (
+                    (
+                        keypoint_info["leftShoulder"][0]
+                        + keypoint_info["rightShoulder"][0]
+                    )
+                    / 2,
+                    (keypoint_info["leftEye"][1] + keypoint_info["rightEye"][1]) / 2,
+                )
+                # if y at down 1/5: down move 1 step
+                if shoulders_middle[1] >= 4 / 5:
+                    self.position[0] -= 1
+                # if y at upper 1/4: up move 1 step
+                elif shoulders_middle[1] <= 1 / 4:
+                    self.position[0] += 1
+                # if x at left 1/3, move left 1 step
+                if shoulders_middle[0] <= 1 / 3:
+                    self.position[1] -= 1
+                # if x at right 1/3, move right 1 step
+                elif shoulders_middle[0] >= 2 / 3:
+                    self.position[1] += 1
+                pass
+            elif has_both_eyes and has_both_shoulders:
+                eyes_middle = (
+                    (keypoint_info["leftEye"][0] + keypoint_info["rightEye"][0]) / 2,
+                    (keypoint_info["leftEye"][1] + keypoint_info["rightEye"][1]) / 2,
+                )
+                shoulders_middle = (
+                    (
+                        keypoint_info["leftShoulder"][0]
+                        + keypoint_info["rightShoulder"][0]
+                    )
+                    / 2,
+                    (keypoint_info["leftEye"][1] + keypoint_info["rightEye"][1]) / 2,
+                )
+                very_middle = (
+                    (eyes_middle[0] + shoulders_middle[0]) / 2,
+                    (eyes_middle[1] + shoulders_middle[1]) / 2,
+                )
+                rospy.logwarn(f"very middle {very_middle}")
+                # if y at upper 1/5 for eyes: move up 1 step
+                if eyes_middle[1] <= 1 / 5:
+                    self.position[0] += 1
+                    print("if y at upper 1/5 for eyes: move up 1 step")
+                else:
+                    if (
+                        1 / 4 <= very_middle[1] <= 2 / 3
+                        and 1 / 3 <= very_middle[0] <= 2 / 3
+                    ):
+                        print("finished.")
+                        return "finished"
+                    # if y at down 1/3: down move 1 step
+                    if very_middle[1] >= 2 / 3:
+                        self.position[0] -= 1
+                        print("if y at down 1/3: down move 1 step.")
+                    # if y at upper 1/4: up move 1 step
+                    elif very_middle[1] <= 1 / 4:
+                        self.position[0] += 1
+                        print("if y at upper 1/3: up move 1 step.")
+                # if x at left 1/3, move left 1 step
+                if very_middle[0] <= 1 / 3:
+                    self.position[1] -= 1
+                    print("if x at left 1/3, move left 1 step.")
+                # if x at right 1/3, move right 1 step
+                elif very_middle[0] >= 2 / 3:
+                    self.position[1] += 1
+                    print("if x at right 1/3, move right 1 step.")
+                pass
+            elif has_more_than_one_shoulder:  # but not both
+                # shoulders_middle = ((keypoint_info["leftShoulder"][0] + keypoint_info["rightShoulder"][0]) / 2, (keypoint_info["leftEye"][1] + keypoint_info["rightEye"][1]) / 2)
+                # # move one step opposite left or right
+                # # if x at left 1/3, move left 1 step
+                # if shoulders_middle[0] <= 1/3:
+                #     position[1] -= 1
+                # # if x at right 1/3, move right 1 step
+                # elif shoulders_middle[0] >= 2/3:
+                #     position[1] += 1
+                # pass
+                # if not has_more_than_one_one_eye:
+                #     # move up!
+                #     position[0] += 1
+                #     pass
+                pass
+            else:  # has_more_than_one_one_eye:
+                # eyes_middle = ((keypoint_info["leftEye"][0] + keypoint_info["rightEye"][0]) / 2, (keypoint_info["leftEye"][1] + keypoint_info["rightEye"][1]) / 2)
+                # # move one step opposite,
+                # # if x at left 1/3, move left 1 step
+                # if eyes_middle[0] <= 1/3:
+                #     position[1] += 1
+                # # if x at right 1/3, move right 1 step
+                # elif eyes_middle[0] >= 2/3:
+                #     position[1] -= 1
+                # # probably move down
+                # position[0] -= 1
+                # pass
+                pass
+
+            if self.position[0] < 0:
+                self.position[0] = 0
+            elif self.position[0] > 3:
+                self.position[0] = 3
+            if self.position[1] < -1:
+                self.position[1] = -1
+            elif self.position[1] > 1:
+                self.position[1] = 1
+
+            return position_dict[(self.position[0], self.position[1])]
diff --git a/skills/src/lasr_skills/describe_people.py b/skills/src/lasr_skills/describe_people.py
index 9368bc654..8479ed251 100755
--- a/skills/src/lasr_skills/describe_people.py
+++ b/skills/src/lasr_skills/describe_people.py
@@ -6,7 +6,6 @@
 import cv2_img
 import numpy as np
 
-from lasr_skills import Say
 from lasr_vision_msgs.srv import (
     YoloDetection,
     BodyPixMaskDetection,
@@ -29,52 +28,19 @@ def __init__(self):
         )
 
         with self:
-            # conditional topic and crop method for flexibility
-            rgb_topic = (
-                "/xtion/rgb/image_raw"
-                if "tiago" in os.environ["ROS_MASTER_URI"]
-                else "/camera/image_raw"
-            )
-            crop_method = (
-                "closest" if "tiago" in os.environ["ROS_MASTER_URI"] else "centered"
-            )
             smach.StateMachine.add(
                 "GET_IMAGE",
                 GetCroppedImage(
                     object_name="person",
-                    crop_method=crop_method,
-                    rgb_topic=rgb_topic,
-                    use_mask=False,
+                    method="closest",
+                    use_mask=False,  # If true prediction can be very wrong!!!
                 ),
                 transitions={
                     "succeeded": "CONVERT_IMAGE",
-                    "failed": "SAY_GET_IMAGE_AGAIN",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_GET_IMAGE_AGAIN",
-                Say(
-                    text="Make sure you're looking into my eyes, I can't seem to see you."
-                ),
-                transitions={
-                    "succeeded": "GET_IMAGE_AGAIN",
-                    "preempted": "GET_IMAGE_AGAIN",
-                    "aborted": "GET_IMAGE_AGAIN",
+                    "failed": "failed",
                 },
             )
 
-            smach.StateMachine.add(
-                "GET_IMAGE_AGAIN",
-                GetCroppedImage(
-                    object_name="person",
-                    crop_method=crop_method,
-                    rgb_topic=rgb_topic,
-                    use_mask=False,
-                ),
-                transitions={"succeeded": "CONVERT_IMAGE", "failed": "failed"},
-            )
-
             smach.StateMachine.add(
                 "CONVERT_IMAGE", ImageMsgToCv2(), transitions={"succeeded": "SEGMENT"}
             )
@@ -251,15 +217,19 @@ def execute(self, userdata):
 
                 full_frame = cv2_img.cv2_img_to_msg(img)
 
-                rst = self.face_features(
-                    full_frame,
-                    head_mask_data,
-                    head_mask_shape,
-                    head_mask_dtype,
-                    torso_mask_data,
-                    torso_mask_shape,
-                    torso_mask_dtype,
-                ).description
+                try:
+                    rst = self.face_features(
+                        full_frame,
+                        head_mask_data,
+                        head_mask_shape,
+                        head_mask_dtype,
+                        torso_mask_data,
+                        torso_mask_shape,
+                        torso_mask_dtype,
+                    ).description
+                except rospy.ServiceException as e:
+                    rospy.logerr(f"Service call failed: {e}")
+                    return "failed"
 
                 people.append({"detection": person, "features": rst})
 
diff --git a/skills/src/lasr_skills/detect_3d_in_area.py b/skills/src/lasr_skills/detect_3d_in_area.py
index 0fac82a2a..7b5f3686b 100644
--- a/skills/src/lasr_skills/detect_3d_in_area.py
+++ b/skills/src/lasr_skills/detect_3d_in_area.py
@@ -3,9 +3,10 @@
 from lasr_skills import Detect3D
 from typing import List, Union
 
-from geometry_msgs.msg import Polygon, Point, Point32
+from geometry_msgs.msg import Polygon, Point, Point32, PolygonStamped
 from shapely.geometry import Point
 from shapely.geometry.polygon import Polygon as ShapelyPolygon
+from std_msgs.msg import Header
 
 
 class Detect3DInArea(smach.StateMachine):
@@ -23,7 +24,7 @@ def __init__(
             )
             self.area_polygon = area_polygon
             self.debug_publisher = rospy.Publisher(
-                debug_publisher, Polygon, queue_size=1
+                debug_publisher, PolygonStamped, queue_size=1
             )
 
         def execute(self, userdata):
@@ -35,7 +36,9 @@ def execute(self, userdata):
                 Point32(x=point[0], y=point[1], z=0.0)
                 for point in self.area_polygon.exterior.coords
             ]
-            self.debug_publisher.publish(polygon_msg)
+            self.debug_publisher.publish(
+                PolygonStamped(polygon=polygon_msg, header=Header(frame_id="map"))
+            )
             satisfied_points = [
                 self.area_polygon.contains(Point(object.point.x, object.point.y))
                 for object in detected_objects
diff --git a/skills/src/lasr_skills/detect_gesture.py b/skills/src/lasr_skills/detect_gesture.py
index aa55c298a..d1448d49b 100755
--- a/skills/src/lasr_skills/detect_gesture.py
+++ b/skills/src/lasr_skills/detect_gesture.py
@@ -53,6 +53,7 @@ def execute(self, userdata):
         req.image_raw = userdata.img_msg
         req.dataset = self.bodypix_model
         req.confidence = self.bodypix_confidence
+        req.keep_out_of_bounds = False
 
         try:
             res = self.bodypix_client(req)
diff --git a/skills/src/lasr_skills/learn_face.py b/skills/src/lasr_skills/learn_face.py
deleted file mode 100755
index 064efc67e..000000000
--- a/skills/src/lasr_skills/learn_face.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import rospy
-import smach
-from lasr_vision_msgs.srv import LearnFace as LearnFaceSrv
-
-
-class LearnFace(smach.State):
-    def __init__(
-        self,
-        dataset: str,
-        name: str,
-        n_images: int,
-    ):
-        smach.State.__init__(self, outcomes=["succeeded", "failed"])
-
-        self._dataset = dataset
-        self._name = name
-        self._n_images = n_images
-        self._learn_face = rospy.ServiceProxy("/learn_face", LearnFaceSrv)
-
-    def execute(self, userdata):
-        try:
-            result = self._learn_face(self._dataset, self._name, self._n_images)
-            return "succeeded"
-        except rospy.ServiceException as e:
-            rospy.logwarn(f"Unable to learn face. ({str(e)})")
-            return "failed"
diff --git a/skills/src/lasr_skills/look_at_person.py b/skills/src/lasr_skills/look_at_person.py
index b31a3f8ea..0f20690c5 100755
--- a/skills/src/lasr_skills/look_at_person.py
+++ b/skills/src/lasr_skills/look_at_person.py
@@ -146,15 +146,15 @@ def execute(self, userdata):
                     look_at.point.y = 0.0
                     look_at.point.z = 0.0
 
-                goal = PointHeadGoal()
-                goal.pointing_frame = "head_2_link"
-                goal.pointing_axis = Point(1.0, 0.0, 0.0)
-                goal.max_velocity = 1.0
-                goal.target = look_at
-                rospy.loginfo(
-                    f"LOOKING AT POINT {look_at.point.x}, {look_at.point.y}, {look_at.point.z}"
-                )
-                self.look_at_pub.send_goal(goal)
+                # goal = PointHeadGoal()
+                # goal.pointing_frame = "head_2_link"
+                # goal.pointing_axis = Point(1.0, 0.0, 0.0)
+                # goal.max_velocity = 1.0
+                # goal.target = look_at
+                # rospy.loginfo(
+                #     f"LOOKING AT POINT {look_at.point.x}, {look_at.point.y}, {look_at.point.z}"
+                # )
+                # self.look_at_pub.send_goal(goal)
 
                 print(self.look_at_pub.get_state())
 
@@ -292,7 +292,7 @@ def __init__(self, filter=False):
                 "CHECK_EYES",
                 self.CheckEyes(self.DEBUG, filter=filter),
                 transitions={
-                    "succeeded": "LOOP",
+                    "succeeded": "LOOK_TO_POINT",
                     "failed": "failed",
                     "no_detection": "no_detection",
                 },
diff --git a/skills/src/lasr_skills/look_to_given_point.py b/skills/src/lasr_skills/look_to_given_point.py
deleted file mode 100755
index 9c2347d5d..000000000
--- a/skills/src/lasr_skills/look_to_given_point.py
+++ /dev/null
@@ -1,58 +0,0 @@
-"""Look to a specific point passed in as a parameter
-Similar to look_to_point but the input key is replaced with a passed argument for the point to look at
-"""
-
-import smach_ros
-import smach
-import actionlib
-import rospy
-from control_msgs.msg import PointHeadGoal, PointHeadAction
-from geometry_msgs.msg import Point, PointStamped
-from std_msgs.msg import Header
-from actionlib_msgs.msg import GoalStatus
-from typing import List
-
-
-class LookToGivenPoint(smach.State):
-    def __init__(self, look_position: List[float]):
-        """
-        Args:
-            look_position (List[float]): Position to look to
-        """
-        smach.State.__init__(
-            self,
-            outcomes=["succeeded", "aborted", "timed_out"],
-        )
-        self.goal_pointstamped = PointStamped(
-            point=Point(x=look_position[0], y=look_position[1], z=1.0)
-        )
-        self.client = actionlib.SimpleActionClient(
-            "/head_controller/point_head_action", PointHeadAction
-        )
-        self.client.wait_for_server()
-
-    def execute(self, userdata):
-        goal = PointHeadGoal(
-            pointing_frame="head_2_link",
-            pointing_axis=Point(1.0, 0.0, 0.0),
-            max_velocity=1.0,
-            target=PointStamped(
-                header=Header(frame_id="map"),
-                point=self.goal_pointstamped.point,
-            ),
-        )
-        self.client.send_goal(goal)
-
-        # Wait for the result with a timeout of 7 seconds
-        finished_within_time = self.client.wait_for_result(rospy.Duration(7.0))
-
-        if finished_within_time:
-            state = self.client.get_state()
-            if state == GoalStatus.SUCCEEDED:
-                rospy.sleep(0.1)
-                return "succeeded"
-            else:
-                return "aborted"
-        else:
-            self.client.cancel_goal()
-            return "timed_out"
diff --git a/skills/src/lasr_skills/look_to_point.py b/skills/src/lasr_skills/look_to_point.py
index 74cc28e22..94195ce3e 100755
--- a/skills/src/lasr_skills/look_to_point.py
+++ b/skills/src/lasr_skills/look_to_point.py
@@ -1,20 +1,29 @@
-import smach_ros
 import smach
 import actionlib
 import rospy
 from control_msgs.msg import PointHeadGoal, PointHeadAction
 from geometry_msgs.msg import Point, PointStamped
-from std_msgs.msg import Header
 from actionlib_msgs.msg import GoalStatus
 
+from typing import Union
+
 
 class LookToPoint(smach.State):
-    def __init__(self):
+
+    _pointstamped: Union[None, PointStamped]
+
+    def __init__(
+        self,
+        pointstamped: Union[None, PointStamped] = None,
+    ):
         smach.State.__init__(
             self,
             outcomes=["succeeded", "aborted", "timed_out"],
-            input_keys=["pointstamped"],
+            input_keys=["pointstamped"] if pointstamped is None else [],
         )
+
+        self._pointstamped = pointstamped
+
         self.client = actionlib.SimpleActionClient(
             "/head_controller/point_head_action", PointHeadAction
         )
@@ -26,9 +35,10 @@ def execute(self, userdata):
             pointing_frame="head_2_link",
             pointing_axis=Point(1.0, 0.0, 0.0),
             max_velocity=1.0,
-            target=PointStamped(
-                header=Header(frame_id="map"),
-                point=userdata.pointstamped.point,
+            target=(
+                self._pointstamped
+                if self._pointstamped is not None
+                else userdata.pointstamped
             ),
         )
 
@@ -36,7 +46,7 @@ def execute(self, userdata):
         self.client.send_goal(goal)
 
         # Wait for the result with a timeout of 7 seconds
-        finished_within_time = self.client.wait_for_result(rospy.Duration(7.0))
+        finished_within_time = self.client.wait_for_result(rospy.Duration(2.0))
 
         if finished_within_time:
             state = self.client.get_state()
diff --git a/skills/src/lasr_skills/validate_keypoints.py b/skills/src/lasr_skills/validate_keypoints.py
index c7dc492d5..f76881c42 100755
--- a/skills/src/lasr_skills/validate_keypoints.py
+++ b/skills/src/lasr_skills/validate_keypoints.py
@@ -28,7 +28,6 @@ def __init__(
 
             bodypix_confidence (float, optional): The confidence threshold for bodypix. Defaults to 0.7.
 
-
         """
         smach.State.__init__(
             self,
diff --git a/skills/src/lasr_skills/vision/get_cropped_image.py b/skills/src/lasr_skills/vision/get_cropped_image.py
index 78978781e..459ee258d 100755
--- a/skills/src/lasr_skills/vision/get_cropped_image.py
+++ b/skills/src/lasr_skills/vision/get_cropped_image.py
@@ -1,360 +1,74 @@
 #!/usr/bin/env python3
-from typing import Optional, List
-import numpy as np
 import rospy
 import smach
-import cv2
-from sensor_msgs.msg import Image, PointCloud2
-from geometry_msgs.msg import Point, PoseWithCovarianceStamped
-from lasr_vision_msgs.msg import Detection, Detection3D
-from lasr_vision_msgs.srv import YoloDetection, YoloDetection3D
-from cv2_img import cv2_img_to_msg, msg_to_cv2_img
-from cv2_pcl import pcl_to_cv2
+from cv2_img import cv2_img_to_msg
+
+from lasr_vision_msgs.msg import CDRequest
+from lasr_vision_msgs.srv import (
+    CroppedDetection,
+    CroppedDetectionRequest,
+    CroppedDetectionResponse,
+)
 
 
 class GetCroppedImage(smach.State):
+    """
+    This state calls CroppedDetection service instead of running on its own.
+    THis is a much faster version than the older one.
+    """
+
     def __init__(
         self,
         object_name: str,
-        crop_method: str = "centered",
-        debug_publisher: str = "/skills/get_cropped_image/debug",
-        rgb_topic: Optional[str] = "/xtion/rgb/image_raw",
-        depth_topic: Optional[str] = "/xtion/depth_registered/points",
-        model_2d: str = "yolov8x-seg.pt",
-        model_3d: str = "yolov8x-seg.pt",
+        method: str = "centered",
         use_mask: bool = True,
-        confidence_2d: float = 0.5,
-        confidence_3d: float = 0.5,
-        nmsthresh_2d: float = 0.3,
-        nmsthresh_3d: float = 0.3,
+        yolo_model: str = "yolov8x-seg.pt",
+        yolo_model_confidence: float = 0.5,
+        yolo_nms_threshold: float = 0.3,
     ):
-        """This skill gets a semantically cropped image of a given object, allowing
-        you to specify whether you want the nearest object, the furthest object,
-        the most centered object, etc.
-
-        Args:
-            object_name (str): YOLO class name of object to detect.
-
-            crop_method (str, optional): Which semantic crop method to use. See the valid method
-            variables for the list of options. Defaults to "centered".
-
-            debug_publisher (str, optional): Name of the topic to publish the cropped images to.
-            Defaults to "/skills/get_cropped_image/debug".
-
-            rgb_topic (Optional[str], optional): Name of the topic to get RGB images from.
-            Defaults to "/xtion/rgb/image_raw".
-
-            depth_topic (Optional[str], optional): Name of the topic to get RGBD images from.
-            Defaults to "/xtion/depth_registered/points".
-
-            model_2d (str, optional): Name of the YOLO model to use for 2D detection.
-
-            model_3d (str, optional): Name of the YOLO model to use for 3D detection.
-
-            use_mask (bool, optional): Whether to use the mask to crop the image. Defaults to True.
-
-            confidence_2d (float, optional): Confidence threshold for 2D detection.
-
-            confidence_3d (float, optional): Confidence threshold for 3D detection.
-
-            nmsthresh_2d (float, optional): Non-maximum suppression threshold for 2D detection.
-
-            nmsthresh_3d (float, optional): Non-maximum suppression threshold for 3D detection.
-        """
         smach.State.__init__(
             self,
             outcomes=["succeeded", "failed"],
             output_keys=["img_msg"],
         )
 
-        self._crop_method = crop_method
-        self._debug_pub = rospy.Publisher(debug_publisher, Image, queue_size=1)
-        self._rgb_topic = rgb_topic
-        self._depth_topic = depth_topic
-        self._object_name = object_name
-        self._model_2d = model_2d
-        self._model_3d = model_3d
-        self._use_mask = use_mask
-        self._confidence_2d = confidence_2d
-        self._confidence_3d = confidence_3d
-        self._nmsthresh_2d = nmsthresh_2d
-        self._nmsthresh_3d = nmsthresh_3d
-        self._yolo_2d = rospy.ServiceProxy("/yolov8/detect", YoloDetection)
-        self._yolo_3d = rospy.ServiceProxy("/yolov8/detect3d", YoloDetection3D)
-        self._robot_pose_topic = "/robot_pose"
-
-        self._valid_2d_methods = [
-            "centered",
-            "left-most",
-            "right-most",
-            "top-most",
-            "bottom-most",
-        ]
-        self._valid_3d_methods = ["closest", "furthest"]
-
-        if crop_method in self._valid_2d_methods:
-            self._yolo_2d.wait_for_service()
-        elif crop_method in self._valid_3d_methods:
-            self._yolo_3d.wait_for_service()
-        else:
-            raise ValueError(f"Invalid crop_method: {crop_method}")
-
-    def _2d_bbox_crop(
-        self, image: np.ndarray, detections: List[Detection]
-    ) -> np.ndarray:
-        """Crops the image to the according to the desired crop_method.
-
-        Args:
-            image (np.ndarray): Image to crop
-            detections (YoloDetection): YOLO Detections of the desired object
-            in the image.
-        Returns:
-            np.ndarray: Cropped image
-        """
-        if self._crop_method == "centered":
-            y_to_compare = image.shape[0] // 2
-            x_to_compare = image.shape[1] // 2
-        elif self._crop_method == "right-most":
-            x_to_compare = 0
-            y_to_compare = image.shape[0] // 2
-        elif self._crop_method == "left-most":
-            x_to_compare = image.shape[1]
-            y_to_compare = image.shape[0] // 2
-        elif self._crop_method == "top-most":
-            x_to_compare = image.shape[1] // 2
-            y_to_compare = 0
-        elif self._crop_method == "bottom-most":
-            x_to_compare = image.shape[1] // 2
-            y_to_compare = image.shape[0]
-        else:
-            raise ValueError(f"Invalid 2D crop_method: {self._crop_method}")
-
-        if len(detections) == 0:
-            raise ValueError("No detections found")
+        self.object_name = object_name
+        self.method = method
+        self.use_mask = use_mask
+        self.yolo_model = yolo_model
+        self.yolo_model_confidence = yolo_model_confidence
+        self.yolo_nms_threshold = yolo_nms_threshold
 
-        detection = min(
-            detections,
-            key=lambda det: np.sqrt(
-                (x_to_compare - det.xywh[0]) ** 2 + (y_to_compare - det.xywh[1]) ** 2
-            ),
+        rospy.wait_for_service("/vision/cropped_detection")
+        self._cropped_detection = rospy.ServiceProxy(
+            "/vision/cropped_detection", CroppedDetection
         )
 
-        x, y, w, h = (
-            detection.xywh[0],
-            detection.xywh[1],
-            detection.xywh[2],
-            detection.xywh[3],
-        )
-        return image[y - h // 2 : y + h // 2, x - w // 2 : x + w // 2]
-
-    def _2d_mask_crop(
-        self, image: np.ndarray, detections: List[Detection]
-    ) -> np.ndarray:
-        """Crops the image to the according to the desired crop_method using a mask.
-
-        Args:
-            image (np.ndarray): Image to crop
-            detections (YoloDetection): YOLO Detections of the desired object
-            in the image.
-        Returns:
-            np.ndarray: Cropped image
-        """
-
-        # Keeping this in a separate function as might want to make function
-        # more complex, i.e., add noise to other detections rather than filling
-        # in the whole image, etc.
-
-        if self._crop_method == "centered":
-            y_to_compare = image.shape[0] // 2
-            x_to_compare = image.shape[1] // 2
-        elif self._crop_method == "right-most":
-            x_to_compare = 0
-            y_to_compare = image.shape[0] // 2
-        elif self._crop_method == "left-most":
-            x_to_compare = image.shape[1]
-            y_to_compare = image.shape[0] // 2
-        elif self._crop_method == "top-most":
-            x_to_compare = image.shape[1] // 2
-            y_to_compare = 0
-        elif self._crop_method == "bottom-most":
-            x_to_compare = image.shape[1] // 2
-            y_to_compare = image.shape[0]
-        else:
-            raise ValueError(f"Invalid 2D crop_method: {self._crop_method}")
-
-        if len(detections) == 0:
-            raise ValueError("No detections found")
+    def execute(self, userdata) -> str:
+        req = CDRequest()
+        req.method = self.method
+        req.use_mask = self.use_mask
+        req.yolo_model = self.yolo_model
+        req.yolo_model_confidence = self.yolo_model_confidence
+        req.yolo_nms_threshold = self.yolo_nms_threshold
+        req.object_names = [self.object_name]
+        cropped_detection_req: CroppedDetectionRequest = CroppedDetectionRequest()
+        cropped_detection_req.requests = [req]
 
-        if len(detections[0].xyseg) == 0:
-            raise ValueError("No segmentation found")
-
-        detection_index = min(
-            range(len(detections)),
-            key=lambda i: np.sqrt(
-                (x_to_compare - detections[i].xywh[0]) ** 2
-                + (y_to_compare - detections[i].xywh[1]) ** 2
-            ),
-        )
-
-        # x,y coords of the detection
-        # Taken from https://stackoverflow.com/questions/37912928/fill-the-outside-of-contours-opencv
-        mask = np.array(detections[detection_index].xyseg).reshape(-1, 2)
-        stencil = np.zeros(image.shape).astype(image.dtype)
-        colour = (255, 255, 255)
-        cv2.fillPoly(stencil, [mask], colour)
-        # Bitwise AND with 0s is 0s, hence we get the image only where the mask is
-        # with black elsewhere.
-        result = cv2.bitwise_and(image, stencil)
-
-        return result
-
-    def _3d_bbox_crop(
-        self, pointcloud: np.ndarray, robot_loc: Point, detections: List[Detection3D]
-    ) -> np.ndarray:
-        """Crops the image to the desired object that is closest to the
-        centroid of the image.
-
-
-        Args:
-            pointcloud (np.ndarray): pointcloud from depth camera; RGB will be extracted
-            from this.
-            robot_loc (Point): Current location of the robot, used to calculate
-            3D-distance.
-            detections (YoloDetection): Yolo Detections of the desired object
-            in the image.
-        Returns:
-            np.ndarray: Cropped image
-        """
-        closest_detection = min(
-            detections,
-            key=lambda det: np.sqrt(
-                (robot_loc.x - det.point.x) ** 2
-                + (robot_loc.y - det.point.y) ** 2
-                + (robot_loc.z - det.point.z) ** 2
-            ),
-        )
-        furthest_detection = max(
-            detections,
-            key=lambda det: np.sqrt(
-                (robot_loc.x - det.point.x) ** 2
-                + (robot_loc.y - det.point.y) ** 2
-                + (robot_loc.z - det.point.z) ** 2
-            ),
-        )
-
-        if self._crop_method == "closest":
-            detection = closest_detection
-        elif self._crop_method == "furthest":
-            detection = furthest_detection
-        else:
-            raise ValueError(f"Invalid 3D crop_method: {self._crop_method}")
-
-        if detection is None:
-            raise ValueError(f"No detection found")
-
-        rgb_image = pcl_to_cv2(pointcloud)
-        x, y, w, h = (
-            detection.xywh[0],
-            detection.xywh[1],
-            detection.xywh[2],
-            detection.xywh[3],
-        )
-
-        return rgb_image[y - h // 2 : y + h // 2, x - w // 2 : x + w // 2]
-
-    def _3d_mask_crop(
-        self, pointcloud: np.ndarray, robot_loc: Point, detections: List[Detection3D]
-    ) -> np.ndarray:
-        """Crops the image to the according to the desired crop_method using a mask.
-
-        Args:
-            pointcloud (np.ndarray): Image to crop
-            detections (YoloDetection): YOLO Detections of the desired object
-            in the image.
-        Returns:
-            np.ndarray: Cropped image
-        """
-        closest_detection_index = min(
-            range(len(detections)),
-            key=lambda i: np.sqrt(
-                (robot_loc.x - detections[i].point.x) ** 2
-                + (robot_loc.y - detections[i].point.y) ** 2
-                + (robot_loc.z - detections[i].point.z) ** 2
-            ),
-        )
-        furthest_detection_index = max(
-            range(len(detections)),
-            key=lambda i: np.sqrt(
-                (robot_loc.x - detections[i].point.x) ** 2
-                + (robot_loc.y - detections[i].point.y) ** 2
-                + (robot_loc.z - detections[i].point.z) ** 2
-            ),
-        )
-
-        if self._crop_method == "closest":
-            detection_index = closest_detection_index
-        elif self._crop_method == "furthest":
-            detection_index = furthest_detection_index
-        else:
-            raise ValueError(f"Invalid 3D crop_method: {self._crop_method}")
-
-        rgb_image = pcl_to_cv2(pointcloud)
-
-        # x,y coords of the detection
-        # Taken from https://stackoverflow.com/questions/37912928/fill-the-outside-of-contours-opencv
-        mask = np.array(detections[detection_index].xyseg).reshape(-1, 2)
-        stencil = np.zeros(rgb_image.shape).astype(rgb_image.dtype)
-        colour = (255, 255, 255)
-        cv2.fillPoly(stencil, [mask], colour)
-        # Bitwise AND with 0s is 0s, hence we get the image only where the mask is
-        # with black elsewhere.
-        result = cv2.bitwise_and(rgb_image, stencil)
-
-        return result
-
-    def execute(self, userdata):
         try:
-            if self._crop_method in self._valid_2d_methods:
-                img_msg = rospy.wait_for_message(self._rgb_topic, Image)
-                detections = self._yolo_2d(
-                    img_msg, self._model_2d, self._confidence_2d, self._nmsthresh_2d
-                ).detected_objects
-                detections = [
-                    det for det in detections if det.name == self._object_name
-                ]
-                img_cv2 = msg_to_cv2_img(img_msg)
-                cropped_image = (
-                    self._2d_mask_crop(img_cv2, detections)
-                    if self._use_mask
-                    else self._2d_bbox_crop(img_cv2, detections)
-                )
-            elif self._crop_method in self._valid_3d_methods:
-                pointcloud_msg = rospy.wait_for_message(self._depth_topic, PointCloud2)
-                robot_loc = rospy.wait_for_message(
-                    self._robot_pose_topic, PoseWithCovarianceStamped
-                ).pose.pose.position
-                detections = self._yolo_3d(
-                    pointcloud_msg,
-                    self._model_3d,
-                    self._confidence_3d,
-                    self._nmsthresh_3d,
-                ).detected_objects
-                detections = [
-                    det for det in detections if det.name == self._object_name
-                ]
-                cropped_image = (
-                    self._3d_mask_crop(pointcloud_msg, robot_loc, detections)
-                    if self._use_mask
-                    else self._3d_bbox_crop(pointcloud_msg, robot_loc, detections)
-                )
-            else:
-                raise ValueError(f"Invalid crop_method: {self._crop_method}")
-            cropped_msg = cv2_img_to_msg(cropped_image)
-            self._debug_pub.publish(cropped_msg)
-            userdata.img_msg = cropped_msg
+            cropped_detection_resp: CroppedDetectionResponse = self._cropped_detection(
+                cropped_detection_req
+            )
+            cropped_image = cropped_detection_resp.responses[0].cropped_imgs[0]
+            # cropped_msg = cv2_img_to_msg(cropped_image)
+            # self._debug_pub.publish(cropped_msg)
+            userdata.img_msg = cropped_image
             return "succeeded"
-        except Exception as e:
-            rospy.logerr(e)
+        # except rospy.ServiceException as e:
+        #     rospy.logerr(f"Service call failed: {e}")
+        #     return "failed"
+        except Exception as e:  # Got some errors that is not rospy.
+            rospy.logerr(f"Service call failed: {e}")
             return "failed"
 
 
diff --git a/tasks/receptionist/config/lab.yaml b/tasks/receptionist/config/lab.yaml
index 2bd6dceae..63b3ef4fa 100644
--- a/tasks/receptionist/config/lab.yaml
+++ b/tasks/receptionist/config/lab.yaml
@@ -31,27 +31,71 @@ priors:
 
 wait_pose: 
   position: 
-    x: 4.637585370589175
-    y: 6.715591164127531
+    x: 8.245934441303595
+    y: 24.285935396834816
     z: 0.0
   orientation: 
     x: 0.0
     y: 0.0
-    z: 0.478893309417269
-    w: 0.4
+    z: 0.08719394681831685
+    w: 0.9961913549304895
+
 #0.478893309417269
 #0.8778731105321406
 
 # wait_area: [[2.65, -0.61], [4.21, -0.33], [4.58, -2.27], [2.67, -2.66]]
-wait_area: [[3.60, 8.56], [4.69, 8.90], [4.98, 7.59], [3.95, 7.34]]
+wait_area: [[9.16, 25.3], [10.6, 25.7], [11, 24.1], [9.4, 23.7]]
 seat_pose:
   position: 
-    x: 1.1034954065916212
-    y: 0.17802904565746552
+    x: 7.146951994681816
+    y: 23.821753048065705
     z: 0.0
   orientation: 
     x: 0.0
     y: 0.0
-    z: 0.816644293927375
-    w: 0.577141314753899
-seat_area: [[-0.39, 0.87], [-0.74, 2.18], [1.26, 2.64], [1.54, 1.26]]
+    z: -0.6423412634039911
+    w: 0.7664187506373813
+
+
+
+
+sweep_points:
+  - x: 7.78
+    y: 20.1
+    z: 0.5
+  - x: 7.78
+    y: 20.1
+    z: 1.0
+  - x: 9.34
+    y: 21.2
+    z: 0.5
+  - x: 9.34
+    y: 21.2
+    z: 1.0
+  - x: 8.56
+    y: 20.6
+    z: 0.5
+  - x: 8.56
+    y: 20.6
+    z: 1.0
+  - x: 6.74
+    y: 20.1
+    z: 0.5
+  - x: 6.74
+    y: 20.1
+    z: 1.0
+  - x: 5.76
+    y: 21.5
+    z: 0.5
+  - x: 5.76
+    y: 21.5
+    z: 1.0
+
+sofa_point:
+  x: 7.78
+  y: 20.1
+  z: 0.5
+seat_area: [[10.8, 20.2], [5.2, 18.9], [4.41, 21.6], [10.1, 23.1]]
+max_people_on_sofa: 2
+sofa_area: [[8.52, 20.1], [7.1, 19.8], [6.71, 21.1], [8.06, 21.5]]
+
diff --git a/tasks/receptionist/config/motions.yaml b/tasks/receptionist/config/motions.yaml
index 06a719ab6..e0f4201da 100644
--- a/tasks/receptionist/config/motions.yaml
+++ b/tasks/receptionist/config/motions.yaml
@@ -5,6 +5,11 @@ play_motion:
       points:
       - positions: [0.5, 0.0]
         time_from_start: 0.0
+    look_very_left:
+      joints: [head_1_joint, head_2_joint]
+      points:
+      - positions: [1.5, 0.0]
+        time_from_start: 0.0
     look_centre:
       joints: [head_1_joint, head_2_joint]
       points:
@@ -15,6 +20,11 @@ play_motion:
       points:
       - positions: [-0.5, 0.0]
         time_from_start: 0.0
+    look_very_right:
+      joints: [head_1_joint, head_2_joint]
+      points:
+      - positions: [-1.5, 0.0]
+        time_from_start: 0.0
     look_down_left:
       joints: [head_1_joint, head_2_joint]
       points:
diff --git a/tasks/receptionist/launch/setup.launch b/tasks/receptionist/launch/setup.launch
index ad8146757..a953dc128 100644
--- a/tasks/receptionist/launch/setup.launch
+++ b/tasks/receptionist/launch/setup.launch
@@ -18,6 +18,7 @@
     <include file="$(find lasr_vision_yolov8)/launch/service.launch">
         <param name="debug" value="true" />
     </include>
+    <include file="$(find lasr_vision_cropped_detection)/launch/cropped_detection.launch"/>
     
     <node pkg="lasr_vision_feature_extraction" type="service" name="torch_service" output="screen"/>
     <arg name="debug" default="true" />
diff --git a/tasks/receptionist/scripts/main.py b/tasks/receptionist/scripts/main.py
index fa5dc082f..431f68e89 100755
--- a/tasks/receptionist/scripts/main.py
+++ b/tasks/receptionist/scripts/main.py
@@ -4,20 +4,30 @@
 import smach
 import smach_ros
 
-from geometry_msgs.msg import Pose, Point, Quaternion
-from shapely.geometry import Polygon
+from geometry_msgs.msg import Pose, Point, Quaternion, Polygon, PolygonStamped
+from shapely.geometry import Polygon as ShapelyPolygon
+
+from std_msgs.msg import Header
 
 if __name__ == "__main__":
     rospy.init_node("receptionist_robocup")
     wait_pose_param = rospy.get_param("/receptionist/wait_pose")
 
+    seat_area_publisher = rospy.Publisher(
+        "/receptionist/seat_area", PolygonStamped, queue_size=1, latch=True
+    )
+
+    sofa_area_publisher = rospy.Publisher(
+        "/receptionist/sofa_area", PolygonStamped, queue_size=1, latch=True
+    )
+
     wait_pose = Pose(
         position=Point(**wait_pose_param["position"]),
         orientation=Quaternion(**wait_pose_param["orientation"]),
     )
 
     wait_area_param = rospy.get_param("/receptionist/wait_area")
-    wait_area = Polygon(wait_area_param)
+    wait_area = ShapelyPolygon(wait_area_param)
 
     seat_pose_param = rospy.get_param("/receptionist/seat_pose")
     seat_pose = Pose(
@@ -25,39 +35,62 @@
         orientation=Quaternion(**seat_pose_param["orientation"]),
     )
 
+    sweep_points_param = rospy.get_param("/receptionist/sweep_points")
+    sweep_points = [Point(**point) for point in sweep_points_param]
+
     seat_area_param = rospy.get_param("/receptionist/seat_area")
 
-    seat_area = Polygon(seat_area_param)
+    sofa_area_param = rospy.get_param("/receptionist/sofa_area")
+
+    sofa_point_param = rospy.get_param("/receptionist/sofa_point")
+
+    max_people_on_sofa = rospy.get_param("/receptionist/max_people_on_sofa")
+
+    seat_area = ShapelyPolygon(seat_area_param)
+    assert seat_area.is_valid
+
+    sofa_area = ShapelyPolygon(sofa_area_param)
+    assert sofa_area.is_valid
+
+    sofa_point = Point(**sofa_point_param)
+
+    # exclude the sofa area from the seat area
+    seat_area = seat_area.difference(sofa_area)
+
+    seat_area_publisher.publish(
+        PolygonStamped(
+            polygon=Polygon(
+                points=[Point(x=x, y=y, z=0.0) for (x, y) in seat_area.exterior.coords]
+            ),
+            header=Header(frame_id="map"),
+        )
+    )
+    sofa_area_publisher.publish(
+        PolygonStamped(
+            polygon=Polygon(
+                points=[Point(x=x, y=y, z=0.0) for (x, y) in sofa_area.exterior.coords]
+            ),
+            header=Header(frame_id="map"),
+        )
+    )
+
+    assert seat_area.is_valid
 
     receptionist = Receptionist(
         wait_pose,
         wait_area,
         seat_pose,
+        sweep_points,
         seat_area,
+        sofa_area,
+        sofa_point,
         {
             "name": "charlie",
             "drink": "wine",
             "dataset": "receptionist",
-            "detection": True,
-            "attributes": {
-                "has_hair": 0.5,
-                "hair_shape": "straight hair",
-                "hair_colour": "black hair",
-                "facial_hair": 0,
-                "earrings": 0,
-                "necklace": 0,
-                "necktie": 0,
-                # "height": "unknown",
-                "glasses": -0.5,
-                "hat": -0.5,
-                "dress": 0,
-                "top": 0.5,
-                "outwear": 0,
-                "max_dress": "unknown",
-                "max_top": "short sleeved top",
-                "max_outwear": "unknown",
-            },
+            "detection": False,
         },
+        max_people_on_sofa=max_people_on_sofa,
     )
 
     outcome = receptionist.execute()
diff --git a/tasks/receptionist/scripts/test_seat_guest.py b/tasks/receptionist/scripts/test_seat_guest.py
new file mode 100755
index 000000000..ae97aea10
--- /dev/null
+++ b/tasks/receptionist/scripts/test_seat_guest.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+import rospy
+import smach
+from receptionist.states import SeatGuest
+from shapely.geometry import Polygon
+
+
+if __name__ == "__main__":
+    rospy.init_node("test_seat_guest")
+
+    sofa_area_param = rospy.get_param("/sofa_area")
+
+    seat_area_param = rospy.get_param("/seat_area")
+
+    max_people_on_sofa = rospy.get_param("/max_people_on_sofa")
+
+    rospy.sleep(5)
+
+    sofa_area = Polygon(sofa_area_param)
+
+    seat_area = Polygon(seat_area_param)
+
+    seat_area = seat_area.difference(sofa_area)
+
+    sm = smach.StateMachine(outcomes=["succeeded", "failed"])
+    with sm:
+        smach.StateMachine.add(
+            "SeatGuest",
+            SeatGuest(seat_area, sofa_area, max_people_on_sofa),
+            transitions={"succeeded": "succeeded", "failed": "failed"},
+        )
+
+    outcome = sm.execute()
+    rospy.loginfo("Outcome: %s", outcome)
+    rospy.signal_shutdown("Test completed.")
diff --git a/tasks/receptionist/src/receptionist/state_machine.py b/tasks/receptionist/src/receptionist/state_machine.py
index 4d2f54557..78ed090ba 100755
--- a/tasks/receptionist/src/receptionist/state_machine.py
+++ b/tasks/receptionist/src/receptionist/state_machine.py
@@ -1,22 +1,29 @@
+from typing import List, Tuple
 import smach
+import smach_ros
 
-from geometry_msgs.msg import Pose
+from lasr_vision_msgs.srv import Recognise
+from geometry_msgs.msg import Pose, Point, PointStamped
 from shapely.geometry import Polygon
 from lasr_skills import (
     GoToLocation,
     WaitForPersonInArea,
     Say,
     AskAndListen,
-    LookToGivenPoint,
+    PlayMotion,
+    LookToPoint,
 )
+from std_msgs.msg import Header
 from receptionist.states import (
-    ParseNameAndDrink,
-    GetGuestAttributes,
     Introduce,
     SeatGuest,
     FindAndLookAt,
-    ReceptionistLearnFaces,
-    ParseTranscribedInfo,
+    HandleGuest,
+    PointCloudSweep,
+    RunAndProcessDetections,
+    RecognisePeople,
+    CheckSofa,
+    IntroduceAndSeatGuest,
 )
 
 
@@ -26,20 +33,34 @@ def __init__(
         wait_pose: Pose,
         wait_area: Polygon,
         seat_pose: Pose,
+        sweep_points: List[Point],
         seat_area: Polygon,
+        sofa_area: Polygon,
+        sofa_point: Point,
         host_data: dict,
+        max_people_on_sofa: int = 3,
         face_detection_confidence: float = 0.2,
+        known_host: bool = True,
+        learn_guest_1: bool = True,
     ):
         smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
 
+        assert known_host or learn_guest_1, "Must learn at least one guest"
+
+        self.wait_pose = wait_pose
+        self.wait_area = wait_area
+        self.seat_pose = seat_pose
+        self.seat_area = seat_area
+        # self.sweep_points = sweep_points
         with self:
             self.userdata.guest_data = {
                 "host": host_data,
-                "guest1": {"name": ""},
-                "guest2": {"name": ""},
+                "guest1": {"name": "", "drink": "", "detection": False},
+                "guest2": {"name": "", "drink": "", "detection": False},
             }
             self.userdata.confidence = face_detection_confidence
             self.userdata.dataset = "receptionist"
+            self.userdata.seat_position = PointStamped()
 
             smach.StateMachine.add(
                 "SAY_START",
@@ -55,363 +76,51 @@ def __init__(
             First guest
             """
 
-            smach.StateMachine.add(
-                "GO_TO_WAIT_LOCATION_GUEST_1",
-                GoToLocation(wait_pose),
-                transitions={
-                    "succeeded": "SAY_WAITING_GUEST_1",
-                    "failed": "GO_TO_WAIT_LOCATION_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_WAITING_GUEST_1",
-                Say(text="I am waiting for a guest. Please open the door."),
-                transitions={
-                    "succeeded": "WAIT_FOR_PERSON_GUEST_1",
-                    "aborted": "WAIT_FOR_PERSON_GUEST_1",
-                    "preempted": "WAIT_FOR_PERSON_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "WAIT_FOR_PERSON_GUEST_1",
-                WaitForPersonInArea(wait_area),
-                transitions={
-                    "succeeded": "GET_NAME_AND_DRINK_GUEST_1",
-                    "failed": "GET_NAME_AND_DRINK_GUEST_1",
-                },
-            )
-
-            """ 
-            Asking first Guest for Drink and Name
-            """
-
-            smach.StateMachine.add(
-                "GET_NAME_AND_DRINK_GUEST_1",
-                AskAndListen("What is your name and favourite drink?"),
-                transitions={
-                    "succeeded": "PARSE_NAME_AND_DRINK_GUEST_1",
-                    "failed": "PARSE_NAME_AND_DRINK_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "PARSE_NAME_AND_DRINK_GUEST_1",
-                ParseNameAndDrink("guest1"),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_1",
-                    "failed": "REPEAT_GET_NAME_AND_DRINK_GUEST_1",
-                    "failed_name": "REPEAT_GET_NAME_GUEST_1",
-                    "failed_drink": "REPEAT_GET_DRINK_GUEST_1",
-                },
-                remapping={"guest_transcription": "transcribed_speech"},
-            )
-
-            smach.StateMachine.add(
-                "REPEAT_GET_NAME_AND_DRINK_GUEST_1",
-                AskAndListen(
-                    "Sorry, I didn't get that, consider raising your voice. What is your name and favourite drink?"
-                ),
-                transitions={
-                    "succeeded": "REPEAT_PARSE_NAME_AND_DRINK_GUEST_1",
-                    "failed": "SAY_CONTINUE",
-                },
-            )
+            self._goto_waiting_area(guest_id=1)
 
             smach.StateMachine.add(
-                "REPEAT_PARSE_NAME_AND_DRINK_GUEST_1",
-                ParseNameAndDrink("guest1"),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_1",
-                    "failed": "SAY_CONTINUE",
-                    "failed_name": "SAY_CONTINUE",
-                    "failed_drink": "SAY_CONTINUE",
-                },
-                remapping={"guest_transcription": "transcribed_speech"},
-            )
-
-            """
-            Recovery for only name not recognised
-            """
-
-            smach.StateMachine.add(
-                "REPEAT_GET_NAME_GUEST_1",
-                AskAndListen(
-                    "Sorry, I didn't get your name. What is your name? Feel free to repeat your name several times."
-                ),
-                transitions={
-                    "succeeded": "REPEAT_PARSE_NAME_GUEST_1",
-                    "failed": "SAY_CONTINUE",
-                },
-            )
-
-            smach.StateMachine.add(
-                "REPEAT_PARSE_NAME_GUEST_1",
-                ParseTranscribedInfo("guest1", "name"),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_1",
-                    "failed": "SAY_CONTINUE",
-                },
-                remapping={"guest_transcription": "transcribed_speech"},
-            )
-
-            """
-            Recovery for only drink not recognised
-            """
-
-            smach.StateMachine.add(
-                "REPEAT_GET_DRINK_GUEST_1",
-                AskAndListen(
-                    "Sorry, I didn't get your favourite drink. What is your favourite drink? Feel free to repeat your favourite drink several times."
-                ),
-                transitions={
-                    "succeeded": "REPEAT_PARSE_DRINK_GUEST_1",
-                    "failed": "SAY_CONTINUE",
-                },
-            )
-
-            smach.StateMachine.add(
-                "REPEAT_PARSE_DRINK_GUEST_1",
-                ParseTranscribedInfo("guest1", "drink"),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_1",
-                    "failed": "SAY_CONTINUE",
-                },
-                remapping={"guest_transcription": "transcribed_speech"},
-            )
-
-            """
-            Recovery if nothing was recognised (twice)
-            """
-            smach.StateMachine.add(
-                "SAY_CONTINUE",
-                Say(text="Sorry, I didn't get that. I will carry on."),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_1",
-                    "aborted": "SAY_GET_GUEST_ATTRIBUTE_1",
-                    "preempted": "SAY_GET_GUEST_ATTRIBUTE_1",
-                },
-            )
-
-            """ 
-            GET GUEST ATTRIBUTES
-            """
-
-            smach.StateMachine.add(
-                "SAY_GET_GUEST_ATTRIBUTE_1",
-                Say(
-                    text="Please look into my eyes, I am about to detect your attributes."
-                ),
-                transitions={
-                    "succeeded": "GET_GUEST_ATTRIBUTES_GUEST_1",
-                    "aborted": "GET_GUEST_ATTRIBUTES_GUEST_1",
-                    "preempted": "GET_GUEST_ATTRIBUTES_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "GET_GUEST_ATTRIBUTES_GUEST_1",
-                GetGuestAttributes("guest1"),
-                transitions={
-                    "succeeded": "SAY_LEARN_FACES",
-                    "failed": "SAY_GET_GUEST_ATTRIBUTE_1_FAILED",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_GET_GUEST_ATTRIBUTE_1_FAILED",
+                "INTRODUCE_ROBOT",
                 Say(
-                    text="Make sure you're looking into my eyes and facing me, I can't see you."
+                    text="Hello my name is Tiago, nice to meet you, I shall be your receptionist for today. I will try and be polite by looking at you when I speak, so I hope you will do the same by looking into my eyes whenever possible. First let me get to know you a little bit better."
                 ),
                 transitions={
-                    "succeeded": "GET_GUEST_ATTRIBUTES_GUEST_1_AGAIN",
-                    "aborted": "GET_GUEST_ATTRIBUTES_GUEST_1_AGAIN",
-                    "preempted": "GET_GUEST_ATTRIBUTES_GUEST_1_AGAIN",
-                },
-            )
-
-            smach.StateMachine.add(
-                "GET_GUEST_ATTRIBUTES_GUEST_1_AGAIN",
-                GetGuestAttributes("guest1"),
-                transitions={
-                    "succeeded": "SAY_LEARN_FACES",
-                    "failed": "SAY_CONTINUE_GET_GUEST_ATTRIBUTES_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_CONTINUE_GET_GUEST_ATTRIBUTES_GUEST_1",
-                Say(text="I can't see anyone, I will continue"),
-                transitions={
-                    "succeeded": "SAY_LEARN_FACES",
-                    "preempted": "SAY_LEARN_FACES",
-                    "aborted": "SAY_LEARN_FACES",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_LEARN_FACES",
-                Say(text="Continue to look into my eyes, I'm about to learn your face"),
-                transitions={
-                    "succeeded": "LEARN_FACES",
-                    "preempted": "LEARN_FACES",
-                    "aborted": "LEARN_FACES",
+                    "succeeded": f"HANDLE_GUEST_1",
+                    "aborted": f"HANDLE_GUEST_1",
+                    "preempted": f"HANDLE_GUEST_1",
                 },
             )
 
-            smach.StateMachine.add(
-                "LEARN_FACES",
-                ReceptionistLearnFaces("guest1"),
-                transitions={
-                    "succeeded": "SAY_FOLLOW_GUEST_1",
-                    "failed": "SAY_LEARN_FACES_FAILED",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_LEARN_FACES_FAILED",
-                Say(
-                    text="Make sure you're looking into my eyes and staying still, I'll try and learn your face again"
-                ),
-                transitions={
-                    "succeeded": "LEARN_FACES_RECOVERY",
-                    "preempted": "LEARN_FACES_RECOVERY",
-                    "aborted": "LEARN_FACES_RECOVERY",
-                },
-            )
+            # """
+            # GET GUEST ATTRIBUTES
+            # """
 
             smach.StateMachine.add(
-                "LEARN_FACES_RECOVERY",
-                ReceptionistLearnFaces("guest1"),
+                "HANDLE_GUEST_1",
+                HandleGuest("guest1", learn_guest_1),
                 transitions={
                     "succeeded": "SAY_FOLLOW_GUEST_1",
                     "failed": "SAY_FOLLOW_GUEST_1",
                 },
             )
 
-            smach.StateMachine.add(
-                "SAY_FOLLOW_GUEST_1",
-                Say(text="Please follow me, I will guide you to the other guests"),
-                transitions={
-                    "succeeded": "GO_TO_SEAT_LOCATION_GUEST_1",
-                    "preempted": "failed",
-                    "aborted": "failed",
-                },
-            )
-
-            smach.StateMachine.add(
-                "GO_TO_SEAT_LOCATION_GUEST_1",
-                GoToLocation(seat_pose),
-                transitions={
-                    "succeeded": "SAY_WAIT_GUEST_1",
-                    "failed": "GO_TO_SEAT_LOCATION_GUEST_1",
-                },
-            )
+            self._guide_guest(guest_id=1)
 
             smach.StateMachine.add(
-                "SAY_WAIT_GUEST_1",
-                Say(
-                    text="Please wait here on my left. Can the seated host look into my eyes please."
-                ),
-                transitions={
-                    "succeeded": "FIND_AND_LOOK_AT",
-                    "preempted": "failed",
-                    "aborted": "failed",
-                },
-            )
-
-            smach.StateMachine.add(
-                "FIND_AND_LOOK_AT",
-                FindAndLookAt(
-                    "host",
-                    [
-                        [0.0, 0.0],
-                        [-0.8, 0.0],
-                        [0.8, 0.0],
-                    ],
-                ),
-                transitions={
-                    "succeeded": "INTRODUCE_GUEST_1_TO_HOST",
-                    "failed": "SAY_NO_HOST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_NO_HOST_1",
-                Say(text="Wow, I can't find the host, I'm sure they're here"),
-                transitions={
-                    "succeeded": "LOOK_AT_WAITING_GUEST_1_1",
-                    "preempted": "failed",
-                    "aborted": "failed",
-                },
-            )
-
-            smach.StateMachine.add(
-                "LOOK_AT_WAITING_GUEST_1_1",
-                LookToGivenPoint(
-                    [-1.5, 0.0],
-                ),
-                transitions={
-                    "succeeded": "INTRODUCE_GUEST_1_TO_HOST",
-                    "timed_out": "INTRODUCE_GUEST_1_TO_HOST",
-                    "aborted": "INTRODUCE_GUEST_1_TO_HOST",
-                },
-            )
-
-            smach.StateMachine.add(
-                "INTRODUCE_GUEST_1_TO_HOST",
-                Introduce(guest_to_introduce="guest1", guest_to_introduce_to="host"),
-                transitions={
-                    "succeeded": "LOOK_AT_WAITING_GUEST_1_2",
-                    "failed": "LOOK_AT_WAITING_GUEST_1_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "LOOK_AT_WAITING_GUEST_1_2",
-                LookToGivenPoint(
-                    [-1.5, 0.0],
+                "INTRODUCE_AND_SEAT_GUEST_1",
+                IntroduceAndSeatGuest(
+                    "guest1",
+                    ["host"],
+                    seat_area,
+                    sofa_area,
+                    sofa_point,
+                    max_people_on_sofa,
                 ),
-                transitions={
-                    "succeeded": "INTRODUCE_HOST_TO_GUEST_1",
-                    "timed_out": "INTRODUCE_HOST_TO_GUEST_1",
-                    "aborted": "INTRODUCE_HOST_TO_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "INTRODUCE_HOST_TO_GUEST_1",
-                Introduce(guest_to_introduce="host", guest_to_introduce_to="guest1"),
-                transitions={
-                    "succeeded": "SEAT_GUEST_1",
-                    "failed": "SEAT_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SEAT_GUEST_1",
-                SeatGuest(seat_area),
-                transitions={
-                    "succeeded": "SAY_RETURN_WAITING_AREA",
-                    "failed": "SAY_SEAT_GUEST_1_FAILED",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_SEAT_GUEST_1_FAILED",
-                Say(text="I can't see a free seat, please sit down somewhere."),
                 transitions={
                     "succeeded": "SAY_RETURN_WAITING_AREA",
-                    "aborted": "SAY_RETURN_WAITING_AREA",
-                    "preempted": "SAY_RETURN_WAITING_AREA",
+                    "failed": "SAY_RETURN_WAITING_AREA",
                 },
             )
 
-            """
-            Guest 2
-            """
-
             smach.StateMachine.add(
                 "SAY_RETURN_WAITING_AREA",
                 Say(text="Let me go back to the waiting area."),
@@ -422,395 +131,36 @@ def __init__(
                 },
             )
 
-            smach.StateMachine.add(
-                "GO_TO_WAIT_LOCATION_GUEST_2",
-                GoToLocation(wait_pose),
-                transitions={
-                    "succeeded": "SAY_WAITING_GUEST_2",
-                    "failed": "GO_TO_WAIT_LOCATION_GUEST_2",
-                },
-            )
+            # """
+            # Guest 2
+            # """
 
-            smach.StateMachine.add(
-                "SAY_WAITING_GUEST_2",
-                Say(text="I am waiting for a guest. Please open the door."),
-                transitions={
-                    "succeeded": "WAIT_FOR_PERSON_GUEST_2",
-                    "aborted": "WAIT_FOR_PERSON_GUEST_2",
-                    "preempted": "WAIT_FOR_PERSON_GUEST_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "WAIT_FOR_PERSON_GUEST_2",
-                WaitForPersonInArea(wait_area),
-                transitions={
-                    "succeeded": "GET_NAME_AND_DRINK_GUEST_2",
-                    "failed": "GET_NAME_AND_DRINK_GUEST_2",
-                },
-            )
-
-            """
-            Asking second guest for drink and name
-            """
-
-            smach.StateMachine.add(
-                "GET_NAME_AND_DRINK_GUEST_2",
-                AskAndListen("What is your name and favourite drink?"),
-                transitions={
-                    "succeeded": "PARSE_NAME_AND_DRINK_GUEST_2",
-                    "failed": "PARSE_NAME_AND_DRINK_GUEST_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "PARSE_NAME_AND_DRINK_GUEST_2",
-                ParseNameAndDrink("guest2"),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_2",
-                    "failed": "REPEAT_GET_NAME_AND_DRINK_GUEST_2",
-                    "failed_name": "REPEAT_GET_NAME_GUEST_2",
-                    "failed_drink": "REPEAT_GET_DRINK_GUEST_2",
-                },
-                remapping={"guest_transcription": "transcribed_speech"},
-            )
-
-            smach.StateMachine.add(
-                "REPEAT_GET_NAME_AND_DRINK_GUEST_2",
-                AskAndListen(
-                    "Sorry, I didn't get that, consider raising your voice. What is your name and favourite drink?"
-                ),
-                transitions={
-                    "succeeded": "REPEAT_PARSE_NAME_AND_DRINK_GUEST_2",
-                    "failed": "SAY_CONTINUE_GUEST_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "REPEAT_PARSE_NAME_AND_DRINK_GUEST_2",
-                ParseNameAndDrink("guest2"),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_2",
-                    "failed": "SAY_CONTINUE_GUEST_2",
-                    "failed_name": "SAY_CONTINUE_GUEST_2",
-                    "failed_drink": "SAY_CONTINUE_GUEST_2",
-                },
-                remapping={"guest_transcription": "transcribed_speech"},
-            )
-
-            """
-            Recovery for only name not recognised
-            """
-
-            smach.StateMachine.add(
-                "REPEAT_GET_NAME_GUEST_2",
-                AskAndListen(
-                    "Sorry, I didn't get your name. What is your name? Feel free to repeat your name several times."
-                ),
-                transitions={
-                    "succeeded": "REPEAT_PARSE_NAME_GUEST_2",
-                    "failed": "SAY_CONTINUE_GUEST_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "REPEAT_PARSE_NAME_GUEST_2",
-                ParseTranscribedInfo("guest2", "name"),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_2",
-                    "failed": "SAY_CONTINUE_GUEST_2",
-                },
-                remapping={"guest_transcription": "transcribed_speech"},
-            )
-
-            """
-            Recovery for only drink not recognised
-            """
-
-            smach.StateMachine.add(
-                "REPEAT_GET_DRINK_GUEST_2",
-                AskAndListen(
-                    "Sorry, I didn't get your favourite drink. What is your favourite drink? Feel free to repeat your favourite drink several times."
-                ),
-                transitions={
-                    "succeeded": "REPEAT_PARSE_DRINK_GUEST_2",
-                    "failed": "SAY_CONTINUE_GUEST_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "REPEAT_PARSE_DRINK_GUEST_2",
-                ParseTranscribedInfo("guest2", "drink"),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_2",
-                    "failed": "SAY_CONTINUE_GUEST_2",
-                },
-                remapping={"guest_transcription": "transcribed_speech"},
-            )
-
-            smach.StateMachine.add(
-                "SAY_CONTINUE_GUEST_2",
-                Say(text="Sorry, I didn't get that. I will carry on."),
-                transitions={
-                    "succeeded": "SAY_GET_GUEST_ATTRIBUTE_2",
-                    "aborted": "SAY_GET_GUEST_ATTRIBUTE_2",
-                    "preempted": "SAY_GET_GUEST_ATTRIBUTE_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_GET_GUEST_ATTRIBUTE_2",
-                Say(
-                    text="Please look into my eyes, I am about to detect your attributes."
-                ),
-                transitions={
-                    "succeeded": "GET_GUEST_ATTRIBUTES_GUEST_2",
-                    "aborted": "GET_GUEST_ATTRIBUTES_GUEST_2",
-                    "preempted": "GET_GUEST_ATTRIBUTES_GUEST_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "GET_GUEST_ATTRIBUTES_GUEST_2",
-                GetGuestAttributes("guest2"),
-                transitions={
-                    "succeeded": "SAY_FOLLOW_GUEST_2",
-                    "failed": "SAY_GET_GUEST_ATTRIBUTE_2_FAILED",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_GET_GUEST_ATTRIBUTE_2_FAILED",
-                Say(
-                    text="Make sure you're looking into my eyes and facing me, I can't see you."
-                ),
-                transitions={
-                    "succeeded": "GET_GUEST_ATTRIBUTES_GUEST_2_AGAIN",
-                    "aborted": "GET_GUEST_ATTRIBUTES_GUEST_2_AGAIN",
-                    "preempted": "GET_GUEST_ATTRIBUTES_GUEST_2_AGAIN",
-                },
-            )
-
-            smach.StateMachine.add(
-                "GET_GUEST_ATTRIBUTES_GUEST_2_AGAIN",
-                GetGuestAttributes("guest1"),
-                transitions={
-                    "succeeded": "SAY_FOLLOW_GUEST_2",
-                    "failed": "SAY_CONTINUE_GET_GUEST_ATTRIBUTES_GUEST_2",
-                },
-            )
+            self._goto_waiting_area(2)
 
             smach.StateMachine.add(
-                "SAY_CONTINUE_GET_GUEST_ATTRIBUTES_GUEST_2",
-                Say(text="I can't see anyone, I will continue"),
+                "HANDLE_GUEST_2",
+                HandleGuest("guest2", False),
                 transitions={
                     "succeeded": "SAY_FOLLOW_GUEST_2",
-                    "preempted": "SAY_FOLLOW_GUEST_2",
-                    "aborted": "SAY_FOLLOW_GUEST_2",
+                    "failed": "SAY_FOLLOW_GUEST_2",
                 },
             )
 
-            # smach.StateMachine.add(
-            #     "SAY_LEARN_FACES_GUEST_2",
-            #     Say(text="Continue looking into my eyes, I'm about to learn your face"),
-            #     transitions={
-            #         "succeeded": "LEARN_FACES_GUEST_2",
-            #         "preempted": "LEARN_FACES_GUEST_2",
-            #         "aborted": "LEARN_FACES_GUEST_2",
-            #     },
-            # )
-
-            # smach.StateMachine.add(
-            #     "LEARN_FACES_GUEST_2",
-            #     ReceptionistLearnFaces("guest2"),
-            #     transitions={
-            #         "succeeded": "SAY_FOLLOW_GUEST_2",
-            #         "failed": "SAY_FOLLOW_GUEST_2",
-            #     },
-            # )
+            self._guide_guest(guest_id=2)
 
             smach.StateMachine.add(
-                "SAY_FOLLOW_GUEST_2",
-                Say(text="Please follow me, I will guide you to the other guests"),
-                transitions={
-                    "succeeded": "GO_TO_SEAT_LOCATION_GUEST_2",
-                    "preempted": "failed",
-                    "aborted": "failed",
-                },
-            )
-
-            smach.StateMachine.add(
-                "GO_TO_SEAT_LOCATION_GUEST_2",
-                GoToLocation(seat_pose),
-                transitions={
-                    "succeeded": "SAY_WAIT_GUEST_2",
-                    "failed": "GO_TO_SEAT_LOCATION_GUEST_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_WAIT_GUEST_2",
-                Say(
-                    text="Please wait here on my left. Can the seated host look into my eyes please."
-                ),
-                transitions={
-                    "succeeded": "FIND_AND_LOOK_AT_HOST_2",
-                    "preempted": "failed",
-                    "aborted": "failed",
-                },
-            )
-
-            # INTRODUCE GUEST 2 TO HOST
-
-            smach.StateMachine.add(
-                "FIND_AND_LOOK_AT_HOST_2",
-                FindAndLookAt(
-                    "host",
-                    [
-                        [0.0, 0.0],
-                        [-0.8, 0.0],
-                        [0.8, 0.0],
-                    ],
-                ),
-                transitions={
-                    "succeeded": "INTRODUCE_GUEST_2_TO_HOST",
-                    "failed": "LOOK_AT_WAITING_GUEST_2_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "LOOK_AT_WAITING_GUEST_2_1",
-                LookToGivenPoint(
-                    [-1.5, 0.0],
-                ),
-                transitions={
-                    "succeeded": "INTRODUCE_GUEST_2_TO_HOST",
-                    "timed_out": "INTRODUCE_GUEST_2_TO_HOST",
-                    "aborted": "INTRODUCE_GUEST_2_TO_HOST",
-                },
-            )
-
-            # Check if host is sat where they are sat
-            # Look at the host
-
-            smach.StateMachine.add(
-                "INTRODUCE_GUEST_2_TO_HOST",
-                Introduce(guest_to_introduce="guest2", guest_to_introduce_to="host"),
-                transitions={
-                    "succeeded": "LOOK_AT_WAITING_GUEST_2_2",
-                    "failed": "LOOK_AT_WAITING_GUEST_2_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "LOOK_AT_WAITING_GUEST_2_2",
-                LookToGivenPoint(
-                    [-1.5, 0.0],
-                ),
-                transitions={
-                    "succeeded": "INTRODUCE_GUEST_HOST_TO_GUEST_2",
-                    "timed_out": "INTRODUCE_GUEST_HOST_TO_GUEST_2",
-                    "aborted": "INTRODUCE_GUEST_HOST_TO_GUEST_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "INTRODUCE_GUEST_HOST_TO_GUEST_2",
-                Introduce(guest_to_introduce="host", guest_to_introduce_to="guest2"),
-                transitions={
-                    "succeeded": "SAY_WAIT_GUEST_2_SEATED_GUEST_1",
-                    "failed": "SAY_WAIT_GUEST_2_SEATED_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_WAIT_GUEST_2_SEATED_GUEST_1",
-                Say(text="Can the seated guest look into my eyes please."),
-                transitions={
-                    "succeeded": "FIND_AND_LOOK_AT_GUEST_1",
-                    "preempted": "FIND_AND_LOOK_AT_GUEST_1",
-                    "aborted": "FIND_AND_LOOK_AT_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "FIND_AND_LOOK_AT_GUEST_1",
-                FindAndLookAt(
-                    "guest1",
-                    [
-                        [0.0, 0.0],
-                        [-0.8, 0.0],
-                        [0.8, 0.0],
-                    ],
+                "INTRODUCE_AND_SEAT_GUEST_2",
+                IntroduceAndSeatGuest(
+                    "guest2",
+                    ["host", "guest1"],
+                    seat_area,
+                    sofa_area,
+                    sofa_point,
+                    max_people_on_sofa,
                 ),
-                transitions={
-                    "succeeded": "INTRODUCE_GUEST_2_TO_GUEST_1",
-                    "failed": "LOOK_AT_WAITING_GUEST_2_3",
-                },
-            )
-
-            # Check if host is sat where they are sat
-            # Look at the host
-
-            smach.StateMachine.add(
-                "LOOK_AT_WAITING_GUEST_2_3",
-                LookToGivenPoint(
-                    [-1.5, 0.0],
-                ),
-                transitions={
-                    "succeeded": "INTRODUCE_GUEST_2_TO_GUEST_1",
-                    "timed_out": "INTRODUCE_GUEST_2_TO_GUEST_1",
-                    "aborted": "INTRODUCE_GUEST_2_TO_GUEST_1",
-                },
-            )
-
-            smach.StateMachine.add(
-                "INTRODUCE_GUEST_2_TO_GUEST_1",
-                Introduce(guest_to_introduce="guest2", guest_to_introduce_to="guest1"),
-                transitions={
-                    "succeeded": "LOOK_AT_WAITING_GUEST_2_4",
-                    "failed": "LOOK_AT_WAITING_GUEST_2_4",
-                },
-            )
-
-            smach.StateMachine.add(
-                "LOOK_AT_WAITING_GUEST_2_4",
-                LookToGivenPoint(
-                    [-1.5, 0.0],
-                ),
-                transitions={
-                    "succeeded": "INTRODUCE_GUEST_1_TO_GUEST_2",
-                    "timed_out": "INTRODUCE_GUEST_1_TO_GUEST_2",
-                    "aborted": "INTRODUCE_GUEST_1_TO_GUEST_2",
-                },
-            )
-
-            # Look at guest 1
-            smach.StateMachine.add(
-                "INTRODUCE_GUEST_1_TO_GUEST_2",
-                Introduce(guest_to_introduce="guest1", guest_to_introduce_to="guest2"),
-                transitions={
-                    "succeeded": "SEAT_GUEST_2",
-                    "failed": "SEAT_GUEST_2",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SEAT_GUEST_2",
-                SeatGuest(seat_area),
                 transitions={
                     "succeeded": "SAY_GOODBYE",
-                    "failed": "SAY_SEAT_GUEST_2_FAILED",
-                },
-            )
-
-            smach.StateMachine.add(
-                "SAY_SEAT_GUEST_2_FAILED",
-                Say(text="I can't see a free seat, please sit down somewhere."),
-                transitions={
-                    "succeeded": "SAY_GOODBYE",
-                    "aborted": "SAY_GOODBYE",
-                    "preempted": "SAY_GOODBYE",
+                    "failed": "SAY_GOODBYE",
                 },
             )
 
@@ -846,3 +196,87 @@ def __init__(
                     "preempted": "succeeded",
                 },
             )
+
+    def _goto_waiting_area(self, guest_id: int) -> None:
+        """Adds the states to go to the waiting area.
+
+        Args:
+            guest_id (int): Identifier for the guest.
+        """
+
+        smach.StateMachine.add(
+            f"GO_TO_WAIT_LOCATION_GUEST_{guest_id}",
+            GoToLocation(self.wait_pose),
+            transitions={
+                "succeeded": f"SAY_WAITING_GUEST_{guest_id}",
+                "failed": f"GO_TO_WAIT_LOCATION_GUEST_{guest_id}",
+            },
+        )
+
+        smach.StateMachine.add(
+            f"SAY_WAITING_GUEST_{guest_id}",
+            Say(text="I am waiting for a guest."),
+            transitions={
+                "succeeded": f"WAIT_FOR_PERSON_GUEST_{guest_id}",
+                "aborted": f"WAIT_FOR_PERSON_GUEST_{guest_id}",
+                "preempted": f"WAIT_FOR_PERSON_GUEST_{guest_id}",
+            },
+        )
+
+        smach.StateMachine.add(
+            f"WAIT_FOR_PERSON_GUEST_{guest_id}",
+            WaitForPersonInArea(self.wait_area),
+            transitions={
+                "succeeded": f"CHECK_GUEST_ID_GUEST_{guest_id}",
+                "failed": f"CHECK_GUEST_ID_GUEST_{guest_id}",
+            },
+        )
+
+        def check_guest_id(ud):
+            if guest_id == 2:
+                return "guest_2"
+            else:
+                return "guest_1"
+
+        smach.StateMachine.add(
+            f"CHECK_GUEST_ID_GUEST_{guest_id}",
+            smach.CBState(check_guest_id, outcomes=["guest_1", "guest_2"]),
+            transitions={"guest_2": "HANDLE_GUEST_2", "guest_1": "INTRODUCE_ROBOT"},
+        )
+
+    def _guide_guest(self, guest_id: int) -> None:
+        """Adds the states to guide a guest to the
+        seating area.
+
+        Args:
+            guest_id (int): Identifier for the guest.
+        """
+
+        smach.StateMachine.add(
+            f"SAY_FOLLOW_GUEST_{guest_id}",
+            Say(text="Please follow me, I will guide you to the other guests"),
+            transitions={
+                "succeeded": f"GO_TO_SEAT_LOCATION_GUEST_{guest_id}",
+                "preempted": "failed",
+                "aborted": "failed",
+            },
+        )
+
+        smach.StateMachine.add(
+            f"GO_TO_SEAT_LOCATION_GUEST_{guest_id}",
+            GoToLocation(self.seat_pose),
+            transitions={
+                "succeeded": f"SAY_WAIT_GUEST_{guest_id}",
+                "failed": f"GO_TO_SEAT_LOCATION_GUEST_{guest_id}",
+            },
+        )
+
+        smach.StateMachine.add(
+            f"SAY_WAIT_GUEST_{guest_id}",
+            Say(text="Please wait here on my left."),
+            transitions={
+                "succeeded": f"INTRODUCE_AND_SEAT_GUEST_{guest_id}",
+                "preempted": "failed",
+                "aborted": "failed",
+            },
+        )
diff --git a/tasks/receptionist/src/receptionist/states/__init__.py b/tasks/receptionist/src/receptionist/states/__init__.py
index d47897a9e..eaedf1b1b 100644
--- a/tasks/receptionist/src/receptionist/states/__init__.py
+++ b/tasks/receptionist/src/receptionist/states/__init__.py
@@ -1,8 +1,14 @@
 from .get_name_and_drink import ParseNameAndDrink
 from .get_attributes import GetGuestAttributes
 from .introduce import Introduce
-from .seat_guest import SeatGuest
 from .find_and_look_at import FindAndLookAt
 from .receptionist_learn_face import ReceptionistLearnFaces
 from .detect_faces import DetectFaces
 from .get_name_or_drink import ParseTranscribedInfo
+from .handle_guest import HandleGuest
+from .pointcloud_sweep import PointCloudSweep
+from .run_and_process_detections import RunAndProcessDetections
+from .recognise_people import RecognisePeople
+from .seat_guest import SeatGuest
+from .check_sofa import CheckSofa
+from .introduce_and_seat_guest import IntroduceAndSeatGuest
diff --git a/tasks/receptionist/src/receptionist/states/check_sofa.py b/tasks/receptionist/src/receptionist/states/check_sofa.py
new file mode 100644
index 000000000..d42d11c87
--- /dev/null
+++ b/tasks/receptionist/src/receptionist/states/check_sofa.py
@@ -0,0 +1,75 @@
+import smach
+import smach_ros
+
+from shapely.geometry import Polygon as ShapelyPolygon
+from geometry_msgs.msg import Point, Polygon
+
+from lasr_vision_msgs.srv import (
+    CroppedDetectionRequest,
+    CroppedDetectionResponse,
+    CroppedDetection,
+)
+
+from lasr_vision_msgs.msg import CDRequest
+
+
+class CheckSofa(smach.StateMachine):
+
+    def __init__(self, sofa_area: ShapelyPolygon, max_people_on_sofa: int):
+
+        smach.StateMachine.__init__(
+            self,
+            outcomes=["has_free_space", "no_free_space"],
+        )
+
+        self.sofa_area = sofa_area
+        self.max_people_on_sofa = max_people_on_sofa
+
+        with self:
+
+            smach.StateMachine.add(
+                "RUN_DETECTIONS",
+                smach_ros.ServiceState(
+                    "vision/cropped_detection",
+                    CroppedDetection,
+                    request=CroppedDetectionRequest(
+                        requests=[
+                            CDRequest(
+                                method="closest",
+                                use_mask=True,
+                                yolo_model="yolov8x-seg.pt",
+                                yolo_model_confidence=0.5,
+                                yolo_nms_threshold=0.3,
+                                return_sensor_reading=False,
+                                polygons=[
+                                    Polygon(
+                                        points=[
+                                            Point(
+                                                x=point[0],
+                                                y=point[1],
+                                                z=0.0,
+                                            )
+                                            for point in sofa_area.exterior.coords
+                                        ]
+                                    )
+                                ],
+                            )
+                        ]
+                    ),
+                    response_cb=self.detections_cb,
+                ),
+                transitions={
+                    "succeeded": "has_free_space",
+                    "aborted": "no_free_space",
+                    "preempted": "no_free_space",
+                },
+            )
+
+    def detections_cb(self, userdata, response):
+        if len(response.responses[0].detections) == 0:
+            return "aborted"
+
+        if len(response.responses[0].detections) >= self.max_people_on_sofa:
+            return "aborted"
+
+        return "succeeded"
diff --git a/tasks/receptionist/src/receptionist/states/find_and_look_at.py b/tasks/receptionist/src/receptionist/states/find_and_look_at.py
index 61efd0977..253f7330c 100755
--- a/tasks/receptionist/src/receptionist/states/find_and_look_at.py
+++ b/tasks/receptionist/src/receptionist/states/find_and_look_at.py
@@ -17,241 +17,314 @@
 from trajectory_msgs.msg import JointTrajectoryPoint
 from smach import CBState
 import rosservice
+from std_msgs.msg import Header
 
-PUBLIC_CONTAINER = False
-try:
-    from pal_startup_msgs.srv import (
-        StartupStart,
-        StartupStop,
-        StartupStartRequest,
-        StartupStopRequest,
-    )
-except ModuleNotFoundError:
-    PUBLIC_CONTAINER = True
-
-
-def send_head_goal(_point, look_at_pub):
-    goal = FollowJointTrajectoryGoal()
-    goal.trajectory.joint_names = ["head_1_joint", "head_2_joint"]
-    point = JointTrajectoryPoint()
-    point.positions = _point
-    point.time_from_start = rospy.Duration(1)
-    goal.trajectory.points.append(point)
-    look_at_pub.send_goal(goal)
+from typing import Union
 
 
 class FindAndLookAt(smach.StateMachine):
+
     class GetLookPoint(smach.State):
-        def __init__(self, look_positions: List[List[float]]):
+        def __init__(
+            self, guest_id: Union[None, str] = None, mask: Union[None, List[str]] = None
+        ):
             smach.State.__init__(
                 self,
                 outcomes=["succeeded", "failed"],
-                input_keys=[],
-                output_keys=["look_positions"],
+                input_keys=["matched_face_detections"],
+                output_keys=["look_position"],
             )
-            self.look_positions = look_positions
+            self.guest_id = guest_id
+            self.mask = mask
 
         def execute(self, userdata):
-            userdata.look_positions = self.look_positions
-            return "succeeded"
+            if self.guest_id is not None:
+                for detection in userdata.matched_face_detections:
+                    if detection.name == self.guest_id:
+                        self._set_userdata_look_position(userdata, detection)
+                        return "succeeded"
+            else:
+                named_host = self._check_named_host(userdata)
+                if named_host:
+                    return "succeeded"
+                elif self.mask is not None:
+                    for detection in userdata.matched_face_detections:
+                        if detection.name not in self.mask:
+                            self._set_userdata_look_position(userdata, detection)
+                            return "succeeded"
+                else:
+                    if len(userdata.matched_face_detections) > 0:
+                        self._set_userdata_look_position(
+                            userdata, userdata.matched_face_detections[0]
+                        )
+                        return "succeeded"
+            userdata.look_position = PointStamped()
+            return "failed"
 
-    class GetPoint(smach.State):
-        def __init__(self):
-            smach.State.__init__(
-                self,
-                outcomes=["succeeded", "failed"],
-                input_keys=["point_index", "look_positions"],
-                output_keys=["pointstamped"],
-            )
-            self.look_at_pub = actionlib.SimpleActionClient(
-                "/head_controller/follow_joint_trajectory", FollowJointTrajectoryAction
-            )
+        def _check_named_host(self, userdata):
+            for detection in userdata.matched_face_detections:
+                if detection.name == "host":
+                    self._set_userdata_look_position(userdata, detection)
+                    return True
+            return False
 
-        def execute(self, userdata):
-            rospy.sleep(3.0)
-            _point = userdata.look_positions[userdata.point_index]
-            print(f"Looking at {_point}")
-            userdata.pointstamped = PointStamped(
-                point=Point(x=_point[0], y=_point[1], z=1.0)
+        def _set_userdata_look_position(self, userdata, detection):
+            look_position = detection.point
+            look_position.z = 1.25
+            userdata.look_position = PointStamped(
+                point=look_position, header=Header(frame_id="map")
             )
-            send_head_goal(_point, self.look_at_pub)
-            rospy.sleep(3.0)
-
-            return "succeeded"
-
-    def check_name(self, ud):
-        rospy.logwarn(
-            f"Checking name {ud.guest_data[self.guest_name_in]['name']} in detections {ud.deepface_detection}"
-        )
-        if len(ud.deepface_detection) == 0:
-            return "no_detection"
-        for detection in ud.deepface_detection:
-            if (
-                detection.name == ud.guest_data[self.guest_name_in]["name"]
-                and detection.confidence > ud.confidence
-            ):
-                rospy.loginfo(
-                    f"Detection {detection.name} has confidence {detection.confidence} > {ud.confidence}"
-                )
-                return "succeeded"
-            else:
-                rospy.loginfo(
-                    f"Detection {detection.name} has confidence {detection.confidence} <= {ud.confidence}"
-                )
-        return "failed"
 
     def __init__(
-        self,
-        guest_name_in: str,
-        look_positions: Union[List[List[float]], None] = None,
+        self, guest_id: Union[None, str] = None, mask: Union[None, List[str]] = None
     ):
 
-        self.guest_name_in = guest_name_in
-
         smach.StateMachine.__init__(
             self,
             outcomes=["succeeded", "failed"],
-            input_keys=["dataset", "confidence", "guest_data"],
-            output_keys=[],
+            input_keys=["matched_face_detections"],
         )
-
-        if look_positions is None:
-            all_look_positions: List[List[float]] = []
-            look_positions = [
-                [0.0, 0.0],
-                [-1.0, 0.0],
-                [1.0, 0.0],
-            ]
-
-        all_look_positions = look_positions
-        IS_SIMULATION = (
-            "/pal_startup_control/start" not in rosservice.get_service_list()
-        )
-
         with self:
+
             smach.StateMachine.add(
                 "GET_LOOK_POINT",
-                self.GetLookPoint(all_look_positions),
-                transitions={"succeeded": "LOOK_ITERATOR", "failed": "failed"},
-            )
-            look_iterator = smach.Iterator(
-                outcomes=["succeeded", "failed"],
-                it=lambda: range(len(all_look_positions)),
-                it_label="point_index",
-                input_keys=["look_positions", "dataset", "confidence", "guest_data"],
-                output_keys=[],
-                exhausted_outcome="failed",
+                self.GetLookPoint(guest_id=guest_id, mask=mask),
+                transitions={"succeeded": "LOOK_AT_PERSON", "failed": "failed"},
+                remapping={"look_position": "pointstamped"},
             )
-            with look_iterator:
-                container_sm = smach.StateMachine(
-                    outcomes=["succeeded", "failed", "continue"],
-                    input_keys=[
-                        "point_index",
-                        "look_positions",
-                        "dataset",
-                        "confidence",
-                        "guest_data",
-                    ],
-                    output_keys=[],
-                )
-                with container_sm:
-                    if not IS_SIMULATION:
-                        if PUBLIC_CONTAINER:
-                            rospy.logwarn(
-                                "You are using a public container. The head manager will not be stopped during navigation."
-                            )
-                        else:
-                            smach.StateMachine.add(
-                                "DISABLE_HEAD_MANAGER",
-                                smach_ros.ServiceState(
-                                    "/pal_startup_control/stop",
-                                    StartupStop,
-                                    request=StartupStopRequest("head_manager"),
-                                ),
-                                transitions={
-                                    "succeeded": "GET_POINT",
-                                    "aborted": "failed",
-                                    "preempted": "failed",
-                                },
-                            )
-                    smach.StateMachine.add(
-                        "GET_POINT",
-                        self.GetPoint(),
-                        transitions={"succeeded": "GET_IMAGE", "failed": "failed"},
-                        remapping={"pointstamped": "pointstamped"},
-                    )
-                    # smach.StateMachine.add(
-                    #     "LOOK_TO_POINT",
-                    #     LookToPoint(),
-                    #     transitions={
-                    #         "succeeded": "GET_IMAGE",
-                    #         "aborted": "failed",
-                    #         "preempted": "failed",
-                    #     },
-                    # )
-                    smach.StateMachine.add(
-                        "GET_IMAGE",
-                        GetPointCloud("/xtion/depth_registered/points"),
-                        transitions={
-                            "succeeded": "RECOGNISE",
-                        },
-                        remapping={"pcl_msg": "pcl_msg"},
-                    )
-                    smach.StateMachine.add(
-                        "RECOGNISE",
-                        smach_ros.ServiceState(
-                            "/recognise",
-                            Recognise,
-                            input_keys=["pcl_msg", "dataset", "confidence"],
-                            request_cb=lambda ud, _: RecogniseRequest(
-                                image_raw=pcl_to_img_msg(ud.pcl_msg),
-                                dataset=ud.dataset,
-                                confidence=ud.confidence,
-                            ),
-                            response_slots=["detections"],
-                            output_keys=["detections"],
-                        ),
-                        transitions={
-                            "succeeded": "CHECK_NAME",
-                            "aborted": "failed",
-                            "preempted": "failed",
-                        },
-                        remapping={
-                            "pcl_msg": "pcl_msg",
-                            "detections": "deepface_detection",
-                        },
-                    )
-                    smach.StateMachine.add(
-                        "CHECK_NAME",
-                        CBState(
-                            self.check_name,
-                            outcomes=["succeeded", "failed", "no_detection"],
-                            input_keys=[
-                                "deepface_detection",
-                                "confidence",
-                                "guest_data",
-                            ],
-                        ),
-                        transitions={
-                            "succeeded": "LOOK_AT_PERSON",
-                            "failed": "continue",
-                            "no_detection": "continue",
-                        },
-                    )
-                    smach.StateMachine.add(
-                        "LOOK_AT_PERSON",
-                        LookAtPerson(filter=True),
-                        transitions={
-                            "succeeded": "succeeded",
-                            "no_detection": "continue",
-                            "failed": "failed",
-                        },
-                    )
-                look_iterator.set_contained_state(
-                    "CONTAINER_STATE", container_sm, loop_outcomes=["continue"]
-                )
             smach.StateMachine.add(
-                "LOOK_ITERATOR",
-                look_iterator,
-                transitions={"succeeded": "succeeded", "failed": "failed"},
+                "LOOK_AT_PERSON",
+                LookToPoint(),
+                transitions={
+                    "succeeded": "succeeded",
+                    "aborted": "failed",
+                    "timed_out": "failed",
+                },
             )
+
+
+# def send_head_goal(_point, look_at_pub):
+#     goal = FollowJointTrajectoryGoal()
+#     goal.trajectory.joint_names = ["head_1_joint", "head_2_joint"]
+#     point = JointTrajectoryPoint()
+#     point.positions = _point
+#     point.time_from_start = rospy.Duration(1)
+#     goal.trajectory.points.append(point)
+#     look_at_pub.send_goal(goal)
+
+
+# class FindAndLookAt(smach.StateMachine):
+#     class GetLookPoint(smach.State):
+#         def __init__(self, look_positions: List[List[float]]):
+#             smach.State.__init__(
+#                 self,
+#                 outcomes=["succeeded", "failed"],
+#                 input_keys=[],
+#                 output_keys=["look_positions"],
+#             )
+#             self.look_positions = look_positions
+
+#         def execute(self, userdata):
+#             userdata.look_positions = self.look_positions
+#             return "succeeded"
+
+#     class GetPoint(smach.State):
+#         def __init__(self):
+#             smach.State.__init__(
+#                 self,
+#                 outcomes=["succeeded", "failed"],
+#                 input_keys=["point_index", "look_positions"],
+#                 output_keys=["pointstamped"],
+#             )
+#             self.look_at_pub = actionlib.SimpleActionClient(
+#                 "/head_controller/follow_joint_trajectory", FollowJointTrajectoryAction
+#             )
+
+#         def execute(self, userdata):
+#             rospy.sleep(3.0)
+#             _point = userdata.look_positions[userdata.point_index]
+#             print(f"Looking at {_point}")
+#             userdata.pointstamped = PointStamped(
+#                 point=Point(x=_point[0], y=_point[1], z=1.0)
+#             )
+#             send_head_goal(_point, self.look_at_pub)
+#             rospy.sleep(3.0)
+
+#             return "succeeded"
+
+#     def check_name(self, ud):
+#         rospy.logwarn(
+#             f"Checking name {self.guest_name_in} in detections {ud.deepface_detection}"
+#         )
+#         if len(ud.deepface_detection) == 0:
+#             return "no_detection"
+#         for detection in ud.deepface_detection:
+#             if (
+#                 detection.name == self.guest_name_in
+#                 and detection.confidence > ud.confidence
+#             ):
+#                 rospy.loginfo(
+#                     f"Detection {detection.name} has confidence {detection.confidence} > {ud.confidence}"
+#                 )
+#                 return "succeeded"
+#             else:
+#                 rospy.loginfo(
+#                     f"Detection {detection.name} has confidence {detection.confidence} <= {ud.confidence}"
+#                 )
+#         return "failed"
+
+#     def __init__(
+#         self,
+#         guest_name_in: str,
+#         look_positions: Union[List[List[float]], None] = None,
+#     ):
+
+#         self.guest_name_in = guest_name_in
+
+#         smach.StateMachine.__init__(
+#             self,
+#             outcomes=["succeeded", "failed"],
+#             input_keys=["dataset", "confidence", "guest_data"],
+#             output_keys=[],
+#         )
+
+#         if look_positions is None:
+#             all_look_positions: List[List[float]] = []
+#             look_positions = [
+#                 [0.0, 0.0],
+#                 [-1.0, 0.0],
+#                 [1.0, 0.0],
+#             ]
+
+#         all_look_positions = look_positions
+#         IS_SIMULATION = (
+#             "/pal_startup_control/start" not in rosservice.get_service_list()
+#         )
+
+#         with self:
+#             smach.StateMachine.add(
+#                 "GET_LOOK_POINT",
+#                 self.GetLookPoint(all_look_positions),
+#                 transitions={"succeeded": "LOOK_ITERATOR", "failed": "failed"},
+#             )
+#             look_iterator = smach.Iterator(
+#                 outcomes=["succeeded", "failed"],
+#                 it=lambda: range(len(all_look_positions)),
+#                 it_label="point_index",
+#                 input_keys=["look_positions", "dataset", "confidence", "guest_data"],
+#                 output_keys=[],
+#                 exhausted_outcome="failed",
+#             )
+#             with look_iterator:
+#                 container_sm = smach.StateMachine(
+#                     outcomes=["succeeded", "failed", "continue"],
+#                     input_keys=[
+#                         "point_index",
+#                         "look_positions",
+#                         "dataset",
+#                         "confidence",
+#                         "guest_data",
+#                     ],
+#                     output_keys=[],
+#                 )
+#                 with container_sm:
+#                     if not IS_SIMULATION:
+#                         if PUBLIC_CONTAINER:
+#                             rospy.logwarn(
+#                                 "You are using a public container. The head manager will not be stopped during navigation."
+#                             )
+#                         else:
+#                             smach.StateMachine.add(
+#                                 "DISABLE_HEAD_MANAGER",
+#                                 smach_ros.ServiceState(
+#                                     "/pal_startup_control/stop",
+#                                     StartupStop,
+#                                     request=StartupStopRequest("head_manager"),
+#                                 ),
+#                                 transitions={
+#                                     "succeeded": "GET_POINT",
+#                                     "aborted": "failed",
+#                                     "preempted": "failed",
+#                                 },
+#                             )
+#                     smach.StateMachine.add(
+#                         "GET_POINT",
+#                         self.GetPoint(),
+#                         transitions={"succeeded": "GET_IMAGE", "failed": "failed"},
+#                         remapping={"pointstamped": "pointstamped"},
+#                     )
+#                     # smach.StateMachine.add(
+#                     #     "LOOK_TO_POINT",
+#                     #     LookToPoint(),
+#                     #     transitions={
+#                     #         "succeeded": "GET_IMAGE",
+#                     #         "aborted": "failed",
+#                     #         "preempted": "failed",
+#                     #     },
+#                     # )
+#                     smach.StateMachine.add(
+#                         "GET_IMAGE",
+#                         GetPointCloud("/xtion/depth_registered/points"),
+#                         transitions={
+#                             "succeeded": "RECOGNISE",
+#                         },
+#                         remapping={"pcl_msg": "pcl_msg"},
+#                     )
+#                     smach.StateMachine.add(
+#                         "RECOGNISE",
+#                         smach_ros.ServiceState(
+#                             "/recognise",
+#                             Recognise,
+#                             input_keys=["pcl_msg", "dataset", "confidence"],
+#                             request_cb=lambda ud, _: RecogniseRequest(
+#                                 image_raw=pcl_to_img_msg(ud.pcl_msg),
+#                                 dataset=ud.dataset,
+#                                 confidence=ud.confidence,
+#                             ),
+#                             response_slots=["detections"],
+#                             output_keys=["detections"],
+#                         ),
+#                         transitions={
+#                             "succeeded": "CHECK_NAME",
+#                             "aborted": "failed",
+#                             "preempted": "failed",
+#                         },
+#                         remapping={
+#                             "pcl_msg": "pcl_msg",
+#                             "detections": "deepface_detection",
+#                         },
+#                     )
+#                     smach.StateMachine.add(
+#                         "CHECK_NAME",
+#                         CBState(
+#                             self.check_name,
+#                             outcomes=["succeeded", "failed", "no_detection"],
+#                             input_keys=[
+#                                 "deepface_detection",
+#                                 "confidence",
+#                                 "guest_data",
+#                             ],
+#                         ),
+#                         transitions={
+#                             "succeeded": "LOOK_AT_PERSON",
+#                             "failed": "continue",
+#                             "no_detection": "continue",
+#                         },
+#                     )
+#                     smach.StateMachine.add(
+#                         "LOOK_AT_PERSON",
+#                         LookAtPerson(filter=True),
+#                         transitions={
+#                             "succeeded": "succeeded",
+#                             "no_detection": "continue",
+#                             "failed": "failed",
+#                         },
+#                     )
+#                 look_iterator.set_contained_state(
+#                     "CONTAINER_STATE", container_sm, loop_outcomes=["continue"]
+#                 )
+#             smach.StateMachine.add(
+#                 "LOOK_ITERATOR",
+#                 look_iterator,
+#                 transitions={"succeeded": "succeeded", "failed": "failed"},
+#             )
diff --git a/tasks/receptionist/src/receptionist/states/get_name_or_drink.py b/tasks/receptionist/src/receptionist/states/get_name_or_drink.py
index 88cbe432a..2c425e832 100644
--- a/tasks/receptionist/src/receptionist/states/get_name_or_drink.py
+++ b/tasks/receptionist/src/receptionist/states/get_name_or_drink.py
@@ -14,7 +14,6 @@ def __init__(
         self,
         guest_id: str,
         info_type: str,
-        # param_key: str = "/priors",
         param_key: str = "/receptionist/priors",
     ):
         """Parses the transcription of the guests' information.
diff --git a/tasks/receptionist/src/receptionist/states/handle_guest.py b/tasks/receptionist/src/receptionist/states/handle_guest.py
new file mode 100644
index 000000000..0d75d7440
--- /dev/null
+++ b/tasks/receptionist/src/receptionist/states/handle_guest.py
@@ -0,0 +1,367 @@
+import smach
+
+from lasr_skills import AskAndListen, Say, AdjustCamera
+from receptionist.states import (
+    ParseNameAndDrink,
+    ParseTranscribedInfo,
+    ReceptionistLearnFaces,
+    GetGuestAttributes,
+)
+
+
+class HandleGuest(smach.StateMachine):
+
+    class GetNameAndDrink(smach.StateMachine):
+        def __init__(self, guest_id: str):
+            super().__init__(
+                outcomes=["succeeded", "failed"],
+                input_keys=["guest_data"],
+                output_keys=["guest_data"],
+            )
+
+            with self:
+                smach.StateMachine.add(
+                    f"GET_NAME_AND_DRINK_GUEST_{guest_id}",
+                    AskAndListen("What is your name and favourite drink?"),
+                    transitions={
+                        "succeeded": f"PARSE_NAME_AND_DRINK_GUEST_{guest_id}",
+                        "failed": f"PARSE_NAME_AND_DRINK_GUEST_{guest_id}",
+                    },
+                )
+
+                smach.StateMachine.add(
+                    f"PARSE_NAME_AND_DRINK_GUEST_{guest_id}",
+                    ParseNameAndDrink(guest_id),
+                    transitions={
+                        "succeeded": "succeeded",
+                        "failed": f"REPEAT_GET_NAME_AND_DRINK_GUEST_{guest_id}",
+                        "failed_name": f"REPEAT_GET_NAME_GUEST_{guest_id}",
+                        "failed_drink": f"REPEAT_GET_DRINK_GUEST_{guest_id}",
+                    },
+                    remapping={"guest_transcription": "transcribed_speech"},
+                )
+
+                smach.StateMachine.add(
+                    f"REPEAT_GET_NAME_AND_DRINK_GUEST_{guest_id}",
+                    AskAndListen(
+                        "Sorry, I didn't get that, please raise your voice. What is your name and favourite drink?"
+                    ),
+                    transitions={
+                        "succeeded": f"REPEAT_PARSE_NAME_AND_DRINK_GUEST_{guest_id}",
+                        "failed": "succeeded",
+                    },
+                )
+
+                smach.StateMachine.add(
+                    f"REPEAT_PARSE_NAME_AND_DRINK_GUEST_{guest_id}",
+                    ParseNameAndDrink(guest_id),
+                    transitions={
+                        "succeeded": "succeeded",
+                        "failed": "succeeded",
+                        "failed_name": "succeeded",
+                        "failed_drink": "succeeded",
+                    },
+                    remapping={"guest_transcription": "transcribed_speech"},
+                )
+
+                """
+                Recovery for only name not recognised
+                """
+
+                smach.StateMachine.add(
+                    f"REPEAT_GET_NAME_GUEST_{guest_id}",
+                    AskAndListen("Sorry, I didn't get your name. What is your name?"),
+                    transitions={
+                        "succeeded": f"REPEAT_PARSE_NAME_GUEST_{guest_id}",
+                        "failed": "succeeded",
+                    },
+                )
+
+                smach.StateMachine.add(
+                    f"REPEAT_PARSE_NAME_GUEST_{guest_id}",
+                    ParseTranscribedInfo(guest_id, "name"),
+                    transitions={
+                        "succeeded": "succeeded",
+                        "failed": "succeeded",
+                    },
+                    remapping={"guest_transcription": "transcribed_speech"},
+                )
+
+                """
+                Recovery for only drink not recognised
+                """
+
+                smach.StateMachine.add(
+                    f"REPEAT_GET_DRINK_GUEST_{guest_id}",
+                    AskAndListen(
+                        "Sorry, I didn't get your favourite drink. What is your favourite drink?"
+                    ),
+                    transitions={
+                        "succeeded": f"REPEAT_PARSE_DRINK_GUEST_{guest_id}",
+                        "failed": "succeeded",
+                    },
+                )
+
+                smach.StateMachine.add(
+                    f"REPEAT_PARSE_DRINK_GUEST_{guest_id}",
+                    ParseTranscribedInfo(guest_id, "drink"),
+                    transitions={
+                        "succeeded": "succeeded",
+                        "failed": "succeeded",
+                    },
+                    remapping={"guest_transcription": "transcribed_speech"},
+                )
+
+    class GetAttributesAndLearnFace(smach.StateMachine):
+
+        def __init__(self, guest_id: str, learn_face: bool):
+
+            super().__init__(
+                outcomes=[
+                    "succeeded",
+                    "failed",
+                    "get_attributes_failed",
+                    "learn_face_failed",
+                ],
+                input_keys=["guest_data"],
+            )
+
+            with self:
+
+                sm_con = smach.Concurrence(
+                    outcomes=[
+                        "succeeded",
+                        "failed",
+                        "get_attributes_failed",
+                        "learn_face_failed",
+                    ],
+                    default_outcome="failed",
+                    outcome_map={
+                        "succeeded": {
+                            "GET_ATTRIBUTES": "succeeded",
+                            "LEARN_FACE": "succeeded",
+                        },
+                        "failed": {
+                            "GET_ATTRIBUTES": "failed",
+                            "LEARN_FACE": "failed",
+                        },
+                        "get_attributes_failed": {
+                            "GET_ATTRIBUTES": "failed",
+                            "LEARN_FACE": "succeeded",
+                        },
+                        "learn_face_failed": {
+                            "GET_ATTRIBUTES": "succeeded",
+                            "LEARN_FACE": "failed",
+                        },
+                    },
+                    input_keys=["guest_data"],
+                    output_keys=["guest_data"],
+                )
+
+                with sm_con:
+                    smach.Concurrence.add(
+                        "GET_ATTRIBUTES",
+                        GetGuestAttributes(guest_id),
+                    )
+
+                    if learn_face:
+                        smach.Concurrence.add(
+                            "LEARN_FACE",
+                            ReceptionistLearnFaces(guest_id),
+                        )
+                    else:
+                        smach.Concurrence.add(
+                            "LEARN_FACE",
+                            smach.CBState(
+                                lambda ud: "succeeded", outcomes=["succeeded", "failed"]
+                            ),
+                        )
+
+                smach.StateMachine.add(
+                    "GET_ATTRIBUTES_AND_LEARN_FACE",
+                    sm_con,
+                    transitions={
+                        "succeeded": "succeeded",
+                        "failed": "failed",
+                        "get_attributes_failed": "get_attributes_failed",
+                        "learn_face_failed": "learn_face_failed",
+                    },
+                )
+
+    def __init__(self, guest_id: str, learn_face: bool):
+        super().__init__(
+            outcomes=[
+                "succeeded",
+                "failed",
+            ],
+            input_keys=["guest_data"],
+        )
+
+        with self:
+            smach.StateMachine.add(
+                "AdjustCamera",
+                AdjustCamera(
+                    max_attempts=3,
+                    debug=False,
+                ),
+                transitions={
+                    "finished": "HANDLE_GUEST",
+                    "failed": "HANDLE_GUEST",
+                    "truncated": "HANDLE_GUEST",
+                },
+            )
+
+            sm_con = smach.Concurrence(
+                outcomes=[
+                    "succeeded",
+                    "failed",
+                    "vision_failed",
+                    "get_attributes_failed",
+                    "learn_face_failed",
+                ],
+                default_outcome="failed",
+                outcome_map={
+                    "succeeded": {
+                        "GET_NAME_AND_DRINK": "succeeded",
+                        "GET_ATTRIBUTES_AND_LEARN_FACE": "succeeded",
+                    },
+                    "failed": {
+                        "GET_NAME_AND_DRINK": "failed",
+                        "GET_ATTRIBUTES_AND_LEARN_FACE": "failed",
+                    },
+                    "vision_failed": {
+                        "GET_NAME_AND_DRINK": "succeeded",
+                        "GET_ATTRIBUTES_AND_LEARN_FACE": "failed",
+                    },
+                    "get_attributes_failed": {
+                        "GET_NAME_AND_DRINK": "succeeded",
+                        "GET_ATTRIBUTES_AND_LEARN_FACE": "get_attributes_failed",
+                    },
+                    "learn_face_failed": {
+                        "GET_NAME_AND_DRINK": "succeeded",
+                        "GET_ATTRIBUTES_AND_LEARN_FACE": "learn_face_failed",
+                    },
+                },
+                input_keys=["guest_data"],
+                output_keys=["guest_data"],
+            )
+
+            with sm_con:
+                smach.Concurrence.add(
+                    "GET_NAME_AND_DRINK",
+                    self.GetNameAndDrink(guest_id),
+                )
+
+                smach.Concurrence.add(
+                    "GET_ATTRIBUTES_AND_LEARN_FACE",
+                    self.GetAttributesAndLearnFace(guest_id, learn_face),
+                )
+
+            smach.StateMachine.add(
+                "HANDLE_GUEST",
+                sm_con,
+                transitions={
+                    "succeeded": "succeeded",
+                    "failed": "failed",
+                    "vision_failed": "SAY_VISION_FAILED",
+                    "get_attributes_failed": "SAY_ATTRIBUTES_FAILED",
+                    "learn_face_failed": "SAY_LEARN_FACE_FAILED",
+                },
+            )
+
+            smach.StateMachine.add(
+                "SAY_VISION_FAILED",
+                Say(
+                    text="I'm sorry, I can't get your attributes or learn your face. Look into my eyes please."
+                ),
+                transitions={
+                    "succeeded": "GET_ATTRIBUTES_AND_LEARN_FACE",
+                    "aborted": "GET_ATTRIBUTES_AND_LEARN_FACE",
+                    "preempted": "GET_ATTRIBUTES_AND_LEARN_FACE",
+                },
+            )
+
+            smach.StateMachine.add(
+                "GET_ATTRIBUTES_AND_LEARN_FACE",
+                self.GetAttributesAndLearnFace(guest_id, learn_face),
+                transitions={
+                    "succeeded": "succeeded",
+                    "failed": "SAY_VISION_STILL_FAILED",
+                    "get_attributes_failed": "SAY_GET_ATTRIBUTES_STILL_FAILED",
+                    "learn_face_failed": "SAY_LEARN_FACE_STILL_FAILED",
+                },
+            )
+
+            smach.StateMachine.add(
+                "SAY_ATTRIBUTES_FAILED",
+                Say(
+                    text="I'm sorry, I couldn't get your attributes. Make sure you're looking into my eyes."
+                ),
+                transitions={
+                    "succeeded": "GET_ATTRIBUTES",
+                    "aborted": "GET_ATTRIBUTES",
+                    "preempted": "GET_ATTRIBUTES",
+                },
+            )
+
+            smach.StateMachine.add(
+                "GET_ATTRIBUTES",
+                GetGuestAttributes(guest_id),
+                transitions={
+                    "succeeded": "succeeded",
+                    "failed": "SAY_GET_ATTRIBUTES_STILL_FAILED",
+                },
+            )
+
+            smach.StateMachine.add(
+                "SAY_LEARN_FACE_FAILED",
+                Say(
+                    text="I'm sorry, I couldn't learn your face. Make sure you're looking into my eyes."
+                ),
+                transitions={
+                    "succeeded": "LEARN_FACE",
+                    "aborted": "LEARN_FACE",
+                    "preempted": "LEARN_FACE",
+                },
+            )
+
+            smach.StateMachine.add(
+                "LEARN_FACE",
+                ReceptionistLearnFaces(guest_id),
+                transitions={
+                    "succeeded": "succeeded",
+                    "failed": "SAY_LEARN_FACE_STILL_FAILED",
+                },
+            )
+
+            smach.StateMachine.add(
+                "SAY_VISION_STILL_FAILED",
+                Say(
+                    text="I'm sorry, I still couldn't get your attributes or learn your face."
+                ),
+                transitions={
+                    "succeeded": "failed",
+                    "aborted": "failed",
+                    "preempted": "failed",
+                },
+            )
+
+            smach.StateMachine.add(
+                "SAY_GET_ATTRIBUTES_STILL_FAILED",
+                Say(text="I'm sorry, I still couldn't get your attributes."),
+                transitions={
+                    "succeeded": "failed",
+                    "aborted": "failed",
+                    "preempted": "failed",
+                },
+            )
+
+            smach.StateMachine.add(
+                "SAY_LEARN_FACE_STILL_FAILED",
+                Say(text="I'm sorry, I still couldn't learn your face."),
+                transitions={
+                    "succeeded": "failed",
+                    "aborted": "failed",
+                    "preempted": "failed",
+                },
+            )
diff --git a/tasks/receptionist/src/receptionist/states/introduce.py b/tasks/receptionist/src/receptionist/states/introduce.py
index 40c0c8754..2f14012ea 100644
--- a/tasks/receptionist/src/receptionist/states/introduce.py
+++ b/tasks/receptionist/src/receptionist/states/introduce.py
@@ -7,7 +7,7 @@
 import rospy
 import smach
 from smach import UserData
-from lasr_skills import Say
+from lasr_skills import Say, LookToPoint
 from typing import Dict, List, Any, Optional
 
 
@@ -50,8 +50,7 @@ def stringify_guest_data(guest_data: Dict[str, Any], guest_id: str) -> str:
 
     guest_str += f"{relevant_guest_data['name']}, their favourite drink is {relevant_guest_data['drink']}. "
 
-    if relevant_guest_data["detection"] == False:
-        guest_str += "No attributes were detected for them."
+    if not relevant_guest_data["detection"]:
         return guest_str
 
     filtered_attributes = {}
diff --git a/tasks/receptionist/src/receptionist/states/introduce_and_seat_guest.py b/tasks/receptionist/src/receptionist/states/introduce_and_seat_guest.py
new file mode 100644
index 000000000..3ae2b8a7b
--- /dev/null
+++ b/tasks/receptionist/src/receptionist/states/introduce_and_seat_guest.py
@@ -0,0 +1,402 @@
+import smach
+import smach_ros
+
+from typing import List
+
+from shapely.geometry import Polygon as ShapelyPolygon
+from geometry_msgs.msg import Point, Polygon, PointStamped
+from std_msgs.msg import Header
+
+
+from lasr_skills import LookToPoint, Say
+
+from lasr_vision_msgs.msg import CDRequest, CDResponse
+
+from lasr_vision_msgs.srv import (
+    CroppedDetectionRequest,
+    CroppedDetectionResponse,
+    CroppedDetection,
+    Recognise,
+)
+import rospy
+
+from receptionist.states import Introduce
+
+import numpy as np
+
+
+def _euclidian_distance(point1: Point, point2: Point):
+    return ((point1.x - point2.x) ** 2 + (point1.y - point2.y) ** 2) + (
+        (point1.z - point2.z) ** 2
+    ) ** 0.5
+
+
+class IntroduceAndSeatGuest(smach.StateMachine):
+
+    class RecognisePeople(smach.State):
+
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["people_detections"],
+                output_keys=["matched_face_detections"],
+            )
+            self._recognise = rospy.ServiceProxy("/recognise", Recognise)
+            self._recognise.wait_for_service()
+            self._dataset = "receptionist"
+
+        def execute(self, userdata):
+            try:
+                face_detections = []
+                for person_detection in userdata.people_detections:
+                    rospy.loginfo(
+                        f"Running face detection on {person_detection[0].name}"
+                    )
+                    img_msg = person_detection[1]
+                    recognise_result = self._recognise(img_msg, self._dataset, 0.2)
+                    if len(recognise_result.detections) > 0:
+
+                        face_detection = person_detection
+                        face_detection[0].name = recognise_result.detections[0].name
+
+                        rospy.loginfo(
+                            f"Recognised face as {recognise_result.detections[0].name}"
+                        )
+                        face_detections.append(face_detection[0])
+
+                userdata.matched_face_detections = face_detections
+            except rospy.ServiceException as e:
+                rospy.logwarn(f"Unable to perform face detection. ({str(e)})")
+                return "failed"
+            return "succeeded"
+
+    class HandleResponse(smach.State):
+
+        def __init__(self):
+
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["responses"],
+                output_keys=["people_detections", "empty_seat_detections"],
+            )
+            self.closesness_distance = 0.5
+            self.overlap_threshold = 0.8
+
+        def execute(self, userdata):
+            try:
+                seat_detections = []
+                people_detections = []
+                response = userdata.responses[0]
+                rospy.loginfo(f"Processing {len(response.detections_3d)} detections.")
+                for index, detection in enumerate(response.detections_3d):
+                    if (
+                        np.isnan(detection.point.x)
+                        or np.isnan(detection.point.y)
+                        or np.isnan(detection.point.z)
+                    ):
+                        continue
+                    rospy.loginfo(f"Processing detection: {detection.name}")
+
+                    if (
+                        detection.name == "chair"
+                        and response.polygon_ids[index]
+                        == 0  # ignore chairs inside the sofa area
+                    ):
+                        seat_detections.append(
+                            (
+                                detection,
+                                response.cropped_imgs[index],
+                                response.polygon_ids[index],
+                            )
+                        )
+                        rospy.loginfo(f"Found chair at: {detection.point}")
+                    elif detection.name == "person":
+                        people_detections.append(
+                            (
+                                detection,
+                                response.cropped_imgs[index],
+                                response.polygon_ids[index],
+                            )
+                        )
+                        rospy.loginfo(f"Found person at: {detection.point}")
+
+                # Filter out people detections that are on seats
+                filtered_seats = []
+                for seat_det in seat_detections:
+                    seat = seat_det[0]
+                    seat_removed = False
+                    seat_seg = np.array([seat.xyseg]).reshape(-1, 2)
+                    seat_polygon = ShapelyPolygon(seat_seg)
+                    for person in people_detections:
+                        person_seg = np.array([person[0].xyseg]).reshape(-1, 2)
+                        person_polygon = ShapelyPolygon(person_seg)
+                        if (
+                            person_polygon.intersection(seat_polygon).area
+                            / person_polygon.area
+                            >= self.overlap_threshold
+                        ):
+                            rospy.loginfo(
+                                f"Person detected on seat. Removing seat: {seat.point}"
+                            )
+                            seat_removed = True
+                            break
+                    if not seat_removed:
+                        filtered_seats.append(seat_det)
+
+                # Each is a list of (Detection, Image, PolygonIndex) pairs.
+                userdata.empty_seat_detections = filtered_seats
+                userdata.people_detections = people_detections
+                rospy.loginfo(
+                    f"Found {len(filtered_seats)} empty seats and {len(people_detections)} people."
+                )
+
+            except Exception as e:
+                rospy.logerr(f"Failed to process detections: {str(e)}")
+                return "failed"
+            return "succeeded"
+
+    class GetLookPoint(smach.State):
+
+        def __init__(self, guest_id: str):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded"],
+                input_keys=["matched_face_detections"],
+                output_keys=["look_point"],
+            )
+            self._guest_id = guest_id
+
+        def execute(self, userdata):
+            if len(userdata.matched_face_detections) == 0:
+                userdata.look_point = PointStamped()
+                return "succeeded"
+
+            for detection in userdata.matched_face_detections:
+                if detection.name == self._guest_id:
+                    look_point = PointStamped(
+                        point=detection.point, header=Header(frame_id="map")
+                    )
+                    userdata.look_point = look_point
+                    return "succeeded"
+            userdata.look_point = PointStamped()
+            return "succeeded"
+
+    class SelectSeat(smach.State):
+
+        def __init__(self, sofa_position: Point, max_people_on_sofa: int):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded_sofa", "succeeded_chair", "failed"],
+                input_keys=["empty_seat_detections", "people_detections"],
+                output_keys=["seat_position"],
+            )
+            self._sofa_position = sofa_position
+            self._max_people_on_sofa = max_people_on_sofa
+
+        def execute(self, userdata):
+
+            num_people_on_sofa = 0
+
+            for detection in userdata.people_detections:
+
+                if detection[2] == 1:  # is inside the sofa area
+                    num_people_on_sofa += 1
+
+            if num_people_on_sofa < self._max_people_on_sofa:
+
+                userdata.seat_position = PointStamped(
+                    point=self._sofa_position, header=Header(frame_id="map")
+                )
+                return "succeeded_sofa"
+
+            if len(userdata.empty_seat_detections) > 0:
+                userdata.seat_position = PointStamped(
+                    point=userdata.empty_seat_detections[0][0].point,
+                    header=Header(frame_id="map"),
+                )
+                return "succeeded_chair"
+
+            return "failed"
+
+    def __init__(
+        self,
+        guest_id: str,
+        guests_to_introduce_to: List[str],
+        seating_area: ShapelyPolygon,
+        sofa_area: ShapelyPolygon,
+        sofa_position: Point,
+        max_people_on_sofa: int,
+    ):
+
+        smach.StateMachine.__init__(
+            self, outcomes=["succeeded", "failed"], input_keys=["guest_data"]
+        )
+
+        """
+        1. Detect all people and seats
+        2. Introduce the guest to the other guests
+        3. Seat the guest
+        
+        """
+
+        rospy.sleep(5.0)
+
+        with self:
+
+            # Detect people and seats
+            smach.StateMachine.add(
+                "DETECT_PEOPLE_AND_SEATS",
+                smach_ros.ServiceState(
+                    "/vision/cropped_detection",
+                    CroppedDetection,
+                    request=CroppedDetectionRequest(
+                        requests=[
+                            CDRequest(
+                                method="closest",
+                                use_mask=True,
+                                yolo_model="yolov8x-seg.pt",
+                                yolo_model_confidence=0.5,
+                                yolo_nms_threshold=0.3,
+                                return_sensor_reading=False,
+                                object_names=["person", "chair"],
+                                polygons=[
+                                    Polygon(
+                                        points=[
+                                            Point(
+                                                x=point[0],
+                                                y=point[1],
+                                                z=0.0,
+                                            )
+                                            for point in seating_area.exterior.coords
+                                        ]
+                                    ),
+                                    Polygon(
+                                        points=[
+                                            Point(
+                                                x=point[0],
+                                                y=point[1],
+                                                z=0.0,
+                                            )
+                                            for point in sofa_area.exterior.coords
+                                        ]
+                                    ),
+                                ],
+                            )
+                        ]
+                    ),
+                    output_keys=["response"],
+                    response_slots=["responses"],
+                ),
+                transitions={
+                    "succeeded": "HANDLE_RESPONSE",
+                    "aborted": "failed",
+                    "preempted": "failed",
+                },
+                remapping={"response": "detections"},
+            )
+
+            smach.StateMachine.add(
+                "HANDLE_RESPONSE",
+                self.HandleResponse(),
+                transitions={"succeeded": "RECOGNISE_PEOPLE", "failed": "failed"},
+            )
+
+            smach.StateMachine.add(
+                "RECOGNISE_PEOPLE",
+                self.RecognisePeople(),
+                transitions={
+                    "succeeded": (
+                        f"GET_LOOK_POINT_{guests_to_introduce_to[0]}"
+                        if len(guests_to_introduce_to) > 0
+                        else "succeeded"
+                    ),
+                    "failed": "failed",
+                },
+            )
+
+            for i, guest_to_introduce_to in enumerate(guests_to_introduce_to):
+
+                smach.StateMachine.add(
+                    f"GET_LOOK_POINT_{guest_to_introduce_to}",
+                    self.GetLookPoint(guest_to_introduce_to),
+                    transitions={"succeeded": f"LOOK_AT_{guest_to_introduce_to}"},
+                )
+
+                smach.StateMachine.add(
+                    f"LOOK_AT_{guest_to_introduce_to}",
+                    LookToPoint(),
+                    transitions={
+                        "succeeded": f"INTRODUCE_{guest_id}_TO_{guest_to_introduce_to}",
+                        "aborted": f"INTRODUCE_{guest_id}_TO_{guest_to_introduce_to}",
+                        "timed_out": f"INTRODUCE_{guest_id}_TO_{guest_to_introduce_to}",
+                    },
+                    remapping={"pointstamped": "look_point"},
+                )
+
+                smach.StateMachine.add(
+                    f"INTRODUCE_{guest_id}_TO_{guest_to_introduce_to}",
+                    Introduce(
+                        guest_to_introduce=guest_id,
+                        guest_to_introduce_to=guest_to_introduce_to,
+                    ),
+                    transitions={
+                        "succeeded": (
+                            "SELECT_SEAT"
+                            if i == len(guests_to_introduce_to) - 1
+                            else f"GET_LOOK_POINT_{guests_to_introduce_to[i+1]}"
+                        )
+                    },
+                )
+
+            smach.StateMachine.add(
+                "SELECT_SEAT",
+                self.SelectSeat(sofa_position, max_people_on_sofa),
+                transitions={
+                    "succeeded_sofa": "SAY_SOFA",
+                    "succeeded_chair": "SAY_CHAIR",
+                    "failed": "SAY_ANY",
+                },
+            )
+
+            smach.StateMachine.add(
+                "SAY_SOFA",
+                Say(text="Please sit on the sofa"),
+                transitions={
+                    "succeeded": "LOOK_AT_SEAT",
+                    "preempted": "LOOK_AT_SEAT",
+                    "aborted": "LOOK_AT_SEAT",
+                },
+            )
+
+            smach.StateMachine.add(
+                "SAY_CHAIR",
+                Say(text="Please sit on the chair that I am looking at"),
+                transitions={
+                    "succeeded": "LOOK_AT_SEAT",
+                    "preempted": "LOOK_AT_SEAT",
+                    "aborted": "LOOK_AT_SEAT",
+                },
+            )
+
+            smach.StateMachine.add(
+                "SAY_ANY",
+                Say(text="Please sit on any empty seat"),
+                transitions={
+                    "succeeded": "succeeded",
+                    "preempted": "succeeded",
+                    "aborted": "succeeded",
+                },
+            )
+
+            smach.StateMachine.add(
+                "LOOK_AT_SEAT",
+                LookToPoint(),
+                transitions={
+                    "succeeded": "succeeded",
+                    "aborted": "succeeded",
+                    "timed_out": "succeeded",
+                },
+                remapping={"pointstamped": "seat_position"},
+            )
diff --git a/tasks/receptionist/src/receptionist/states/pointcloud_sweep.py b/tasks/receptionist/src/receptionist/states/pointcloud_sweep.py
new file mode 100755
index 000000000..e5a1b97c7
--- /dev/null
+++ b/tasks/receptionist/src/receptionist/states/pointcloud_sweep.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+from typing import List, Tuple, Optional
+import smach
+import rospy
+import tf2_ros as tf
+
+from tf_pcl import pcl_transform
+from std_msgs.msg import Header
+from geometry_msgs.msg import PointStamped, Point
+from sensor_msgs.msg import PointCloud2
+
+from lasr_skills import LookToPoint
+
+# global tf buffer
+tf_buffer = tf.Buffer(cache_time=rospy.Duration(10))
+
+
+def start_tf_buffer() -> None:
+    tf.TransformListener(tf_buffer)
+
+
+class PointCloudSweep(smach.StateMachine):
+    def __init__(self, sweep_points: List[Point]):
+        smach.StateMachine.__init__(
+            self,
+            outcomes=["succeeded", "failed"],
+            output_keys=["transformed_pointclouds"],
+        )
+        self.sweep_points = sweep_points
+
+        start_tf_buffer()
+        with self:
+            self.userdata.transformed_pointclouds = []
+            for index, point in enumerate(sweep_points):
+                smach.StateMachine.add(
+                    f"GetPointStamped_{index}",
+                    self.GetPointStamped(point=point),
+                    transitions={
+                        "succeeded": f"LookToPoint_{index}",
+                        "failed": "failed",
+                    },
+                    remapping={
+                        "pointstamped": f"pointstamped_{index}",
+                    },
+                )
+                smach.StateMachine.add(
+                    f"LookToPoint_{index}",
+                    LookToPoint(pointstamped=None),
+                    transitions={
+                        "succeeded": f"GetTransformedPointcloud_{index}",
+                        "aborted": "failed",
+                        "timed_out": f"GetTransformedPointcloud_{index}",
+                    },
+                    remapping={
+                        "pointstamped": f"pointstamped_{index}",
+                    },
+                )
+                if index < len(sweep_points) - 1:
+                    transitions = {
+                        "succeeded": f"GetPointStamped_{index+1}",
+                        "failed": "failed",
+                    }
+                else:
+                    transitions = {
+                        "succeeded": "succeeded",
+                        "failed": "failed",
+                    }
+                smach.StateMachine.add(
+                    f"GetTransformedPointcloud_{index}",
+                    self.GetTransformedPointcloud(),
+                    transitions=transitions,
+                    remapping={
+                        "transformed_pointcloud": f"transformed_pointcloud_{index}",
+                    },
+                )
+
+    class GetTransformedPointcloud(smach.State):
+        def __init__(self, depth_topic: str = "/xtion/depth_registered/points"):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["transformed_pointclouds"],
+                output_keys=["transformed_pointclouds"],
+            )
+            self.depth_topic = depth_topic
+
+        def execute(self, userdata):
+            try:
+                pcl = rospy.wait_for_message(self.depth_topic, PointCloud2)
+                # transform pcl to map frame
+                trans = tf_buffer.lookup_transform(
+                    "map",
+                    pcl.header.frame_id,
+                    rospy.Time(0),
+                    rospy.Duration(1.0),
+                )
+                pcl_map = pcl_transform(pcl, trans)
+                userdata.transformed_pointclouds.append(pcl_map)
+            except Exception as e:
+                rospy.logerr(f"Failed to get and transform pointcloud: {str(e)}")
+                return "failed"
+            return "succeeded"
+
+    class GetPointStamped(smach.State):
+        def __init__(self, point: Optional[Point]):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["point"] if point is None else [],
+                output_keys=["pointstamped"],
+            )
+            self.point = point
+
+        def execute(self, userdata):
+            try:
+                if self.point is None:
+                    point = userdata.point
+                else:
+                    point = self.point
+                pointstamped = PointStamped(
+                    point=point,
+                    header=Header(frame_id="map"),
+                )
+                userdata.pointstamped = pointstamped
+            except Exception as e:
+                rospy.logerr(f"Failed to create PointStamped: {str(e)}")
+                return "failed"
+            return "succeeded"
+
+
+if __name__ == "__main__":
+    rospy.init_node("pointcloud_sweep")
+    sweep_points = [
+        (5.78, 3.06, 0.8),
+        (5.06, 3.61, 0.8),
+        (4.31, 4.22, 0.8),
+    ]
+    sm = PointCloudSweep(sweep_points)
+    sm.userdata.transformed_pointclouds = []
+    outcome = sm.execute()
+    rospy.loginfo(f"Pointcloud sweep completed with outcome: {outcome}")
+    rospy.loginfo(
+        f"Numner of Transformed pointclouds: {len(sm.userdata.transformed_pointclouds)}"
+    )
+    rospy.spin()
diff --git a/tasks/receptionist/src/receptionist/states/receptionist_learn_face.py b/tasks/receptionist/src/receptionist/states/receptionist_learn_face.py
index 1607a7754..e29fcb80d 100644
--- a/tasks/receptionist/src/receptionist/states/receptionist_learn_face.py
+++ b/tasks/receptionist/src/receptionist/states/receptionist_learn_face.py
@@ -5,36 +5,82 @@
 import sys
 
 
+from lasr_vision_msgs.msg import CDRequest, CDResponse
 from lasr_vision_msgs.srv import (
     LearnFace,
     LearnFaceRequest,
-    LearnFaceResponse,
+    CroppedDetection,
+    CroppedDetectionRequest,
+    CroppedDetectionResponse,
 )
 
+from sensor_msgs.msg import Image
+
+from typing import List
+
 
 class ReceptionistLearnFaces(smach.State):
-    def __init__(self, guest_id: str):
+
+    _guest_id: str
+    _dataset_size: int
+    _learn_face: rospy.ServiceProxy
+
+    def __init__(self, guest_id: str, dataset_size: int = 10):
         smach.State.__init__(
             self, outcomes=["succeeded", "failed"], input_keys=["guest_data"]
         )
         self._guest_id = guest_id
+        self._dataset_size = dataset_size
+
+        rospy.wait_for_service("/learn_face")
+        self._learn_face = rospy.ServiceProxy("/learn_face", LearnFace)
+
+        rospy.wait_for_service("/vision/cropped_detection")
+        self._cropped_detection = rospy.ServiceProxy(
+            "/vision/cropped_detection", CroppedDetection
+        )
+
+    def execute(self, userdata) -> str:
+
+        cropped_detection_req: CroppedDetectionRequest = CroppedDetectionRequest(
+            [
+                CDRequest(
+                    method="closest",
+                    use_mask=True,
+                    yolo_model="yolov8x-seg.pt",
+                    yolo_model_confidence=0.5,
+                    yolo_nms_threshold=0.3,
+                    object_names=["person"],
+                )
+            ]
+            * self._dataset_size
+        )
 
-    def execute(self, userdata):
-        print("here we will learn faces")
         try:
-            learn_service = rospy.ServiceProxy("/learn_face", LearnFace)
-            guest_name = userdata.guest_data[self._guest_id]["name"]
-            print(guest_name)
-            req = LearnFaceRequest()
-            req.name = guest_name
-            req.dataset = "receptionist"
-            req.n_images = 10
-            resp = learn_service(req)
-        except ValueError as e:
-            print("No face detected. Error:" + str(e))
+
+            cropped_detection_resp: CroppedDetectionResponse = self._cropped_detection(
+                cropped_detection_req
+            )
+        except rospy.ServiceException as e:
+            rospy.logerr(f"Service call failed: {e}")
             return "failed"
+
+        images: List[Image] = [
+            resp.cropped_imgs[0]
+            for resp in cropped_detection_resp.responses
+            if resp.cropped_imgs
+        ]
+
+        learn_face_req: LearnFaceRequest = LearnFaceRequest(
+            name=self._guest_id,
+            dataset="receptionist",
+            images=images,
+        )
+
+        try:
+            self._learn_face(learn_face_req)
         except rospy.ServiceException as e:
-            rospy.logerr("Service call failed: %s" % e)
+            rospy.logerr(f"Service call failed: {e}")
             return "failed"
 
         return "succeeded"
diff --git a/tasks/receptionist/src/receptionist/states/recognise_people.py b/tasks/receptionist/src/receptionist/states/recognise_people.py
new file mode 100644
index 000000000..c898a4df0
--- /dev/null
+++ b/tasks/receptionist/src/receptionist/states/recognise_people.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+import smach
+import rospy
+from lasr_vision_msgs.srv import Recognise, RecogniseRequest
+
+
+class RecognisePeople(smach.State):
+    def __init__(
+        self,
+        dataset: str,
+        confidence: float = 0.5,
+    ):
+        smach.State.__init__(
+            self,
+            outcomes=["succeeded", "failed"],
+            output_keys=["named_detections"],
+            input_keys=["people_detections"],
+        )
+        self._dataset = dataset
+        self._confidence = confidence
+        self._recognise = rospy.ServiceProxy("/recognise", Recognise)
+
+    def execute(self, userdata):
+        try:
+            named_detections = []
+            for person_detection in userdata.people_detections:
+                img_msg = person_detection[1]
+                result = self._recognise(img_msg, self._dataset, self._confidence)
+                named_detections.append(
+                    [person_detection[0], result.detections[0].name]
+                )
+            userdata.named_detections = named_detections
+        except rospy.ServiceException as e:
+            rospy.logwarn(f"Unable to perform inference. ({str(e)})")
+            return "failed"
+        return "succeeded"
diff --git a/tasks/receptionist/src/receptionist/states/run_and_process_detections.py b/tasks/receptionist/src/receptionist/states/run_and_process_detections.py
new file mode 100755
index 000000000..69be98834
--- /dev/null
+++ b/tasks/receptionist/src/receptionist/states/run_and_process_detections.py
@@ -0,0 +1,348 @@
+#!/usr/bin/env python3
+from typing import List
+import rospy
+import smach
+import numpy as np
+from receptionist.states import PointCloudSweep
+from shapely.geometry import Polygon as ShapelyPolygon
+from geometry_msgs.msg import Polygon, Point, PointStamped
+from std_msgs.msg import Header
+from lasr_vision_msgs.srv import (
+    CroppedDetectionRequest,
+    CroppedDetectionResponse,
+    CroppedDetection,
+)
+from lasr_vision_msgs.msg import CDRequest, CDResponse
+from lasr_vision_msgs.srv import (
+    Recognise,
+    RecogniseRequest,
+    DetectFaces,
+    DetectFacesRequest,
+)
+
+
+def _euclidian_distance(point1: Point, point2: Point):
+    return ((point1.x - point2.x) ** 2 + (point1.y - point2.y) ** 2) + (
+        (point1.z - point2.z) ** 2
+    ) ** 0.5
+
+
+class RunAndProcessDetections(smach.StateMachine):
+    def __init__(
+        self,
+        seating_area: ShapelyPolygon,
+        sofa_area: ShapelyPolygon,
+        detection_service: str = "/vision/cropped_detection",
+    ):
+        smach.StateMachine.__init__(
+            self,
+            outcomes=["succeeded", "failed"],
+            input_keys=["transformed_pointclouds"],
+            output_keys=[
+                "empty_seat_detections",
+                "people_detections",
+                "matched_face_detections",
+            ],
+        )
+        self._detection_service = detection_service
+        self._detection_client = rospy.ServiceProxy(
+            self._detection_service, CroppedDetection
+        )
+        self.seating_area = seating_area
+        self.sofa_area = sofa_area
+
+        with self:
+            smach.StateMachine.add(
+                "RUN_DETECTIONS",
+                self.RunDetections(
+                    detection_client=self._detection_client,
+                    seating_area=self.seating_area,
+                    sofa_area=self.sofa_area,
+                    method="closest",
+                    use_mask=True,
+                    yolo_model="yolov8x-seg.pt",
+                    yolo_model_confidence=0.8,
+                    yolo_nms_threshold=0.3,
+                    return_sensor_reading=False,
+                    object_names=["person", "chair"],
+                ),
+                transitions={"succeeded": "PROCESS_DETECTIONS", "failed": "failed"},
+                remapping={
+                    "transformed_pointclouds": "transformed_pointclouds",
+                    "detections": "detections",
+                },
+            )
+            smach.StateMachine.add(
+                "PROCESS_DETECTIONS",
+                self.ProcessDetections(closesness_distance=0.5, overlap_threshold=0.8),
+                transitions={"succeeded": "RUN_FACE_DETECTIONS", "failed": "failed"},
+                remapping={
+                    "detections": "detections",
+                    "empty_seat_detections": "empty_seat_detections",
+                    "people_detections": "people_detections",
+                },
+            )
+            smach.StateMachine.add(
+                "RUN_FACE_DETECTIONS",
+                self.RunFaceDetections(),
+                transitions={"succeeded": "succeeded", "failed": "failed"},
+            )
+
+    class RunDetections(smach.State):
+        def __init__(
+            self,
+            detection_client: rospy.ServiceProxy,
+            seating_area: ShapelyPolygon,
+            sofa_area: ShapelyPolygon,
+            method: str = "closest",
+            use_mask: bool = True,
+            yolo_model: str = "yolov8x-seg.pt",
+            yolo_model_confidence: float = 0.5,
+            yolo_nms_threshold: float = 0.3,
+            return_sensor_reading: bool = False,
+            object_names: List[str] = ["person", "chair"],
+        ):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["transformed_pointclouds"],
+                output_keys=["detections"],
+            )
+
+            self.detector = detection_client
+            self.seating_area = seating_area
+            self.sofa_area = sofa_area
+            self.method = method
+            self.use_mask = use_mask
+            self.yolo_model = yolo_model
+            self.yolo_model_confidence = yolo_model_confidence
+            self.yolo_nms_threshold = yolo_nms_threshold
+            self.return_sensor_reading = return_sensor_reading
+            self.object_names = object_names
+
+        def execute(self, userdata):
+            try:
+                self.detector.wait_for_service()
+                request: CroppedDetectionRequest = CroppedDetectionRequest()
+
+                for pointcloud in userdata.transformed_pointclouds:
+                    # .loginfo(f"Running detections on pointcloud: {pointcloud}")
+                    request.requests.append(
+                        CDRequest(
+                            method=self.method,
+                            use_mask=self.use_mask,
+                            yolo_model=self.yolo_model,
+                            yolo_model_confidence=self.yolo_model_confidence,
+                            yolo_nms_threshold=self.yolo_nms_threshold,
+                            return_sensor_reading=self.return_sensor_reading,
+                            polygons=[
+                                Polygon(
+                                    points=[
+                                        Point(
+                                            x=point[0],
+                                            y=point[1],
+                                            z=0.0,
+                                        )
+                                        for point in self.seating_area.exterior.coords
+                                    ]
+                                ),
+                                Polygon(
+                                    points=[
+                                        Point(
+                                            x=point[0],
+                                            y=point[1],
+                                            z=0.0,
+                                        )
+                                        for point in self.sofa_area.exterior.coords
+                                    ]
+                                ),
+                            ],
+                            pointcloud=pointcloud,
+                            object_names=self.object_names,
+                        )
+                    )
+                result: CroppedDetectionResponse = self.detector(request)
+                userdata.detections = result.responses
+            except Exception as e:
+                rospy.logerr(f"Failed to run detections: {str(e)}")
+                return "failed"
+            return "succeeded"
+
+    class RunFaceDetections(smach.State):
+        def __init__(
+            self,
+        ):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["transformed_pointclouds", "people_detections"],
+                output_keys=["matched_face_detections"],
+            )
+
+            self._dataset = "receptionist"
+            self._recognise = rospy.ServiceProxy("/recognise", Recognise)
+
+        def execute(self, userdata):
+            self._recognise.wait_for_service()
+            try:
+                face_detections = []
+                for person_detection in userdata.people_detections:
+                    rospy.loginfo(
+                        f"Running face detection on {person_detection[0].name}"
+                    )
+                    img_msg = person_detection[1]
+                    recognise_result = self._recognise(img_msg, self._dataset, 0.2)
+                    if len(recognise_result.detections) > 0:
+
+                        face_detection = person_detection
+                        face_detection[0].name = recognise_result.detections[0].name
+
+                        rospy.loginfo(
+                            f"Recognised face as {recognise_result.detections[0].name}"
+                        )
+                        face_detections.append(face_detection[0])
+
+                userdata.matched_face_detections = face_detections
+            except rospy.ServiceException as e:
+                rospy.logwarn(f"Unable to perform face detection. ({str(e)})")
+                return "failed"
+            return "succeeded"
+
+    class ProcessDetections(smach.State):
+        def __init__(
+            self, closesness_distance: float = 0.15, overlap_threshold: float = 0.8
+        ):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["detections"],
+                output_keys=[
+                    "empty_seat_detections",
+                    "people_detections",
+                ],
+            )
+            self.closesness_distance = closesness_distance
+            self.overlap_threshold = overlap_threshold
+
+        def execute(self, userdata):
+            try:
+                seat_detections = []
+                people_detections = []
+                rospy.loginfo(f"Processing {len(userdata.detections)} detections.")
+                for detection_set in userdata.detections:
+                    for index, detection in enumerate(detection_set.detections_3d):
+                        if (
+                            np.isnan(detection.point.x)
+                            or np.isnan(detection.point.y)
+                            or np.isnan(detection.point.z)
+                        ):
+                            continue
+                        rospy.loginfo(f"Processing detection: {detection.name}")
+                        add_person = True
+                        add_chair = True
+                        if (
+                            detection.name == "chair"
+                            and detection_set.polygon_ids[index] == 0
+                        ):
+                            for added_chair in seat_detections:
+                                if (
+                                    _euclidian_distance(
+                                        added_chair[0].point,
+                                        detection.point,
+                                    )
+                                    < self.closesness_distance
+                                ):
+                                    add_chair = False
+                            if add_chair:
+                                seat_detections.append(
+                                    (detection, detection_set.cropped_imgs[index])
+                                )
+                                rospy.loginfo(f"Found chair at: {detection.point}")
+                        elif detection.name == "person":
+                            for added_person in people_detections:
+                                if (
+                                    _euclidian_distance(
+                                        added_person[0].point,
+                                        detection.point,
+                                    )
+                                    < self.closesness_distance
+                                ):
+                                    add_person = False
+                            if add_person:
+                                people_detections.append(
+                                    (detection, detection_set.cropped_imgs[index])
+                                )
+                                rospy.loginfo(f"Found person at: {detection.point}")
+
+                # Filter out people detections that are on seats
+                filtered_seats = []
+                for seat_det in seat_detections:
+                    seat = seat_det[0]
+                    seat_removed = False
+                    seat_seg = np.array([seat.xyseg]).reshape(-1, 2)
+                    seat_polygon = ShapelyPolygon(seat_seg)
+                    for person in people_detections:
+                        person_seg = np.array([person[0].xyseg]).reshape(-1, 2)
+                        person_polygon = ShapelyPolygon(person_seg)
+                        if (
+                            person_polygon.intersection(seat_polygon).area
+                            / person_polygon.area
+                            >= self.overlap_threshold
+                        ):
+                            rospy.loginfo(
+                                f"Person detected on seat. Removing seat: {seat.point}"
+                            )
+                            seat_removed = True
+                            break
+                    if not seat_removed:
+                        filtered_seats.append(seat_det)
+
+                # Each is a list of (Detection, Image) pairs.
+                userdata.empty_seat_detections = filtered_seats
+                userdata.people_detections = people_detections
+                rospy.loginfo(
+                    f"Found {len(filtered_seats)} empty seats and {len(people_detections)} people."
+                )
+
+            except Exception as e:
+                rospy.logerr(f"Failed to process detections: {str(e)}")
+                return "failed"
+            return "succeeded"
+
+
+if __name__ == "__main__":
+    rospy.init_node("run_and_process_detections")
+    seat_area = [[-1.3, -0.1], [-1.8, 2.0], [1.9, 2.8], [2.8, 0.4]]
+    seat_polygon = Polygon()
+    for point in seat_area:
+        seat_polygon.points.append(Point(x=point[0], y=point[1], z=0))
+    while not rospy.is_shutdown():
+        sm = smach.StateMachine(outcomes=["succeeded", "failed"])
+        with sm:
+            sm.userdata.transformed_pointclouds = []
+            smach.StateMachine.add(
+                "Sweep",
+                PointCloudSweep(
+                    sweep_points=[
+                        (0.138, 1.2, 0.8),
+                        (0.852, 1.29, 0.8),
+                        (1.28, 1.48, 0.8),
+                    ]
+                ),
+                transitions={
+                    "succeeded": "RunAndProcessDetections",
+                    "failed": "failed",
+                },
+            )
+
+            smach.StateMachine.add(
+                "RunAndProcessDetections",
+                RunAndProcessDetections(seating_area=seat_polygon),
+                transitions={"succeeded": "succeeded", "failed": "failed"},
+            )
+
+        outcome = sm.execute()
+        rospy.loginfo(f"Run and Process Detections completed with outcome: {outcome}")
+        # wait for user to press enter before running again
+        input("Press Enter to run again...")
+        rospy.spin()
diff --git a/tasks/receptionist/src/receptionist/states/seat_guest.py b/tasks/receptionist/src/receptionist/states/seat_guest.py
index f26360c3a..dc0548a93 100755
--- a/tasks/receptionist/src/receptionist/states/seat_guest.py
+++ b/tasks/receptionist/src/receptionist/states/seat_guest.py
@@ -16,110 +16,49 @@
     Wait,
 )
 
+from receptionist.states import PointCloudSweep, RunAndProcessDetections
+
+from std_msgs.msg import Header
+
 
 class SeatGuest(smach.StateMachine):
-    _motions: List[str] = ["look_down_left", "look_down_right", "look_down_centre"]
 
-    class ProcessDetections(smach.State):
+    class SelectSeat(smach.State):
+
         def __init__(self):
+
             smach.State.__init__(
                 self,
                 outcomes=["succeeded", "failed"],
-                input_keys=[
-                    "detections_3d",
-                ],
+                input_keys=["empty_seat_detections"],
                 output_keys=["seat_position"],
             )
 
         def execute(self, userdata) -> str:
-            seat_detections = [
-                det for det in userdata.detections_3d if det.name == "chair"
-            ]
-            person_detections = [
-                det for det in userdata.detections_3d if det.name == "person"
-            ]
-
-            person_polygons: List[Polygon] = [
-                Polygon(np.array(person.xyseg).reshape(-1, 2))
-                for person in person_detections
-            ]
-
-            print(
-                f"There are {len(seat_detections)} seats and {len(person_detections)} people."
-            )
-
-            for seat in seat_detections:
-                seat_polygon: Polygon = Polygon(np.array(seat.xyseg).reshape(-1, 2))
-                seat_is_empty: bool = True
-                for person_polygon in person_polygons:
-                    if person_polygon.intersects(seat_polygon):
-                        seat_is_empty = False
-                        print(person_polygon.intersection(seat_polygon))
-                        break
+            if len(userdata.empty_seat_detections) == 0:
+                return "failed"
 
-                if seat_is_empty:
-                    userdata.seat_position = PointStamped(point=seat.point)
-                    print(seat.point)
-                    return "succeeded"
-
-            return "failed"
+            seat = userdata.empty_seat_detections[0][0]
+            userdata.seat_position = PointStamped(
+                point=seat.point, header=Header(frame_id="map")
+            )
+            return "succeeded"
 
     def __init__(
         self,
-        seat_area: Polygon,
     ):
-        smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
+        smach.StateMachine.__init__(
+            self, outcomes=["succeeded", "failed"], input_keys=["empty_seat_detections"]
+        )
         with self:
-            # TODO: stop doing this
-            self.userdata.people_detections = []
-            self.userdata.seat_detections = []
-
-            motion_iterator = smach.Iterator(
-                outcomes=["succeeded", "failed"],
-                it=self._motions,
-                it_label="motion",
-                input_keys=["people_detections", "seat_detections"],
-                output_keys=["seat_position"],
-                exhausted_outcome="failed",
-            )
-
-            with motion_iterator:
-                container_sm = smach.StateMachine(
-                    outcomes=["succeeded", "failed", "continue"],
-                    input_keys=["motion", "people_detections", "seat_detections"],
-                    output_keys=["seat_position"],
-                )
-
-                with container_sm:
-                    smach.StateMachine.add(
-                        "LOOK",
-                        PlayMotion(),
-                        transitions={
-                            "succeeded": "DETECT",
-                            "aborted": "failed",
-                            "preempted": "failed",
-                        },
-                        remapping={"motion_name": "motion"},
-                    )
-                    smach.StateMachine.add(
-                        "DETECT",
-                        Detect3DInArea(seat_area, filter=["chair", "person"]),
-                        transitions={"succeeded": "CHECK", "failed": "failed"},
-                    )
-                    smach.StateMachine.add(
-                        "CHECK",
-                        self.ProcessDetections(),
-                        transitions={"succeeded": "succeeded", "failed": "continue"},
-                    )
-
-                smach.Iterator.set_contained_state(
-                    "CONTAINER_SM", container_sm, loop_outcomes=["continue"]
-                )
 
             smach.StateMachine.add(
-                "MOTION_ITERATOR",
-                motion_iterator,
-                transitions={"succeeded": "LOOK_TO_POINT", "failed": "failed"},
+                "SELECT_SEAT",
+                self.SelectSeat(),
+                transitions={
+                    "succeeded": "LOOK_TO_POINT",
+                    "failed": "failed",
+                },
             )
             smach.StateMachine.add(
                 "LOOK_TO_POINT",
@@ -143,7 +82,6 @@ def __init__(
 
             smach.StateMachine.add(
                 "WAIT_FOR_GUEST_SEAT",
-                # Number of seconds to wait for passed in as argument
                 Wait(5),
                 transitions={
                     "succeeded": "RESET_HEAD",