merge: pull request #122 from m-barker/main

feat: actionlib server for microphone transcription
LASR-at-Home · Jan 29, 2024 · df3848f · df3848f
2 parents cb7e730 + 1b41363
commit df3848f
Show file tree

Hide file tree

Showing 10 changed files with 521 additions and 12 deletions.
diff --git a/common/speech/lasr_speech_recognition_msgs/CMakeLists.txt b/common/speech/lasr_speech_recognition_msgs/CMakeLists.txt
@@ -7,7 +7,11 @@ project(lasr_speech_recognition_msgs)
 ## Find catkin macros and libraries
 ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
 ## is used, also find other catkin packages
-find_package(catkin REQUIRED COMPONENTS message_generation)
+find_package(catkin REQUIRED COMPONENTS message_generation genmsg actionlib_msgs actionlib std_msgs)
+add_action_files(
+  DIRECTORY action
+  FILES TranscribeSpeech.action
+)
 
 ## System dependencies are found with CMake's conventions
 # find_package(Boost REQUIRED COMPONENTS system)
@@ -63,8 +67,9 @@ add_service_files(
 
 ## Generate added messages and services with any dependencies listed here
 generate_messages(
-#   DEPENDENCIES
-#   std_msgs  # Or other packages containing msgs
+  DEPENDENCIES
+  std_msgs  # Or other packages containing msgs
+  actionlib_msgs
 )
 
 ################################################

diff --git a/common/speech/lasr_speech_recognition_msgs/action/TranscribeSpeech.action b/common/speech/lasr_speech_recognition_msgs/action/TranscribeSpeech.action
@@ -0,0 +1,6 @@
+---
+#result definition
+string sequence
+---
+#feedback
+string sequence
diff --git a/common/speech/lasr_speech_recognition_msgs/package.xml b/common/speech/lasr_speech_recognition_msgs/package.xml
@@ -51,6 +51,8 @@
   <buildtool_depend>catkin</buildtool_depend>
   <build_depend>message_generation</build_depend>
   <exec_depend>message_runtime</exec_depend>
+  <build_depend>actionlib_msgs</build_depend>
+  <exec_depend>actionlib_msgs</exec_depend>
 
 
   <!-- The export tag contains other, unspecified, tags -->

diff --git a/common/speech/lasr_speech_recognition_whisper/CMakeLists.txt b/common/speech/lasr_speech_recognition_whisper/CMakeLists.txt
@@ -7,7 +7,11 @@ project(lasr_speech_recognition_whisper)
 ## Find catkin macros and libraries
 ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
 ## is used, also find other catkin packages
-find_package(catkin REQUIRED catkin_virtualenv)
+find_package(catkin REQUIRED catkin_virtualenv genmsg actionlib_msgs actionlib std_msgs)
+# add_action_files(
+#   DIRECTORY action
+#   FILES TranscribeSpeech.action
+# )
 
 ## System dependencies are found with CMake's conventions
 # find_package(Boost REQUIRED COMPONENTS system)
@@ -70,7 +74,8 @@ catkin_generate_virtualenv(
 ## Generate added messages and services with any dependencies listed here
 # generate_messages(
 #   DEPENDENCIES
-#   std_msgs  # Or other packages containing msgs
+#   std_msgs
+#   actionlib_msgs  # Or other packages containing msgs
 # )
 
 ################################################
@@ -162,8 +167,11 @@ include_directories(
 catkin_install_python(PROGRAMS
   nodes/simple_transcribe_microphone
   nodes/transcribe_microphone
+  nodes/transcribe_microphone_server
   scripts/list_microphones.py
   scripts/test_microphones.py
+  scripts/repeat_after_me.py
+  scripts/test_speech_server.py
   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
 )
 

diff --git a/common/speech/lasr_speech_recognition_whisper/doc/USAGE.md b/common/speech/lasr_speech_recognition_whisper/doc/USAGE.md
@@ -26,3 +26,31 @@ Stop listening whenever:
 ```bash
 rosservice call /whisper/stop_listening "{}"
 ```
+
+Run an actionlib server to transcribe the microphone:
+
+```bash
+rosrun lasr_speech_recognition_whisper transcribe_microphone_server
+```
+
+The response from the request is a `string` containing the transcribed text.
+
+Several command line configuration options exist, which can be viewed with:
+
+```bash
+rosrun lasr_speech_recognition_whisper transcribe_microphone_server --help
+```
+
+Get tiago to repeat, with TTS the transcribed speech output; he will begin repeating after hearing "tiago, repeat ...." and stop once hearing "tiago, stop..."
+
+```bash
+rosrun lasr_speech_recognition_whisper repeat_after_me.py
+```
+
+To constantly listen and view transcribed speech output in the command line (by constantly sending requests to the actionlib server), run the following script:
+
+```bash
+rosrun lasr_speech_recongition_whisper test_speech_server.py
+```
+
+