Skip to content

Commit

Permalink
merge: pull request #122 from m-barker/main
Browse files Browse the repository at this point in the history
feat: actionlib server for microphone transcription
  • Loading branch information
jws-1 authored Jan 29, 2024
2 parents cb7e730 + 1b41363 commit df3848f
Show file tree
Hide file tree
Showing 10 changed files with 521 additions and 12 deletions.
11 changes: 8 additions & 3 deletions common/speech/lasr_speech_recognition_msgs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ project(lasr_speech_recognition_msgs)
## Find catkin macros and libraries
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
## is used, also find other catkin packages
find_package(catkin REQUIRED COMPONENTS message_generation)
find_package(catkin REQUIRED COMPONENTS message_generation genmsg actionlib_msgs actionlib std_msgs)
add_action_files(
DIRECTORY action
FILES TranscribeSpeech.action
)

## System dependencies are found with CMake's conventions
# find_package(Boost REQUIRED COMPONENTS system)
Expand Down Expand Up @@ -63,8 +67,9 @@ add_service_files(

## Generate added messages and services with any dependencies listed here
generate_messages(
# DEPENDENCIES
# std_msgs # Or other packages containing msgs
DEPENDENCIES
std_msgs # Or other packages containing msgs
actionlib_msgs
)

################################################
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
#result definition
string sequence
---
#feedback
string sequence
2 changes: 2 additions & 0 deletions common/speech/lasr_speech_recognition_msgs/package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
<buildtool_depend>catkin</buildtool_depend>
<build_depend>message_generation</build_depend>
<exec_depend>message_runtime</exec_depend>
<build_depend>actionlib_msgs</build_depend>
<exec_depend>actionlib_msgs</exec_depend>


<!-- The export tag contains other, unspecified, tags -->
Expand Down
12 changes: 10 additions & 2 deletions common/speech/lasr_speech_recognition_whisper/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ project(lasr_speech_recognition_whisper)
## Find catkin macros and libraries
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
## is used, also find other catkin packages
find_package(catkin REQUIRED catkin_virtualenv)
find_package(catkin REQUIRED catkin_virtualenv genmsg actionlib_msgs actionlib std_msgs)
# add_action_files(
# DIRECTORY action
# FILES TranscribeSpeech.action
# )

## System dependencies are found with CMake's conventions
# find_package(Boost REQUIRED COMPONENTS system)
Expand Down Expand Up @@ -70,7 +74,8 @@ catkin_generate_virtualenv(
## Generate added messages and services with any dependencies listed here
# generate_messages(
# DEPENDENCIES
# std_msgs # Or other packages containing msgs
# std_msgs
# actionlib_msgs # Or other packages containing msgs
# )

################################################
Expand Down Expand Up @@ -162,8 +167,11 @@ include_directories(
catkin_install_python(PROGRAMS
nodes/simple_transcribe_microphone
nodes/transcribe_microphone
nodes/transcribe_microphone_server
scripts/list_microphones.py
scripts/test_microphones.py
scripts/repeat_after_me.py
scripts/test_speech_server.py
DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
)

Expand Down
28 changes: 28 additions & 0 deletions common/speech/lasr_speech_recognition_whisper/doc/USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,31 @@ Stop listening whenever:
```bash
rosservice call /whisper/stop_listening "{}"
```

Run an actionlib server to transcribe the microphone:

```bash
rosrun lasr_speech_recognition_whisper transcribe_microphone_server
```

The response from the request is a `string` containing the transcribed text.

Several command line configuration options exist, which can be viewed with:

```bash
rosrun lasr_speech_recognition_whisper transcribe_microphone_server --help
```

Get tiago to repeat, with TTS the transcribed speech output; he will begin repeating after hearing "tiago, repeat ...." and stop once hearing "tiago, stop..."

```bash
rosrun lasr_speech_recognition_whisper repeat_after_me.py
```

To constantly listen and view transcribed speech output in the command line (by constantly sending requests to the actionlib server), run the following script:

```bash
rosrun lasr_speech_recongition_whisper test_speech_server.py
```


Loading

0 comments on commit df3848f

Please sign in to comment.