Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add-energy-thresh-to-speech #216

Merged
merged 13 commits into from
Jun 11, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class speech_model_params:
mic_device (Optional[str]): Microphone device index or name. Defaults to None.
timer_duration (Optional[int]): Duration of the timer for adjusting the microphone for ambient noise. Defaults to 20 seconds.
warmup (bool): Whether to warmup the model by running inference on a test file. Defaults to True.
energy_threshold (Optional[int]): Energy threshold for silence detection. Using this disables automatic adjustment. Defaults to None.
"""

model_name: str = "medium.en"
Expand All @@ -41,6 +42,7 @@ class speech_model_params:
mic_device: Optional[str] = None
timer_duration: Optional[int] = 20
warmup: bool = True
energy_threshold: Optional[int] = None


class TranscribeSpeechAction(object):
Expand Down Expand Up @@ -71,7 +73,7 @@ class TranscribeSpeechAction(object):
self._model_params.warmup,
)
# Configure the speech recogniser object and adjust for ambient noise
self.recogniser = self._configure_recogniser(ambient_adj=True)
self.recogniser = self._configure_recogniser()
# Setup the action server and register execution callback
self._action_server = actionlib.SimpleActionServer(
self._action_name,
Expand All @@ -84,11 +86,6 @@ class TranscribeSpeechAction(object):

self._action_server.start()

def _reset_timer(self) -> None:
"""Resets the timer for adjusting the microphone for ambient noise."""
self._timer.shutdown()
self._timer = rospy.Timer(rospy.Duration(self._timer_duration), self._timer_cb)

def _configure_microphone(self) -> sr.Microphone:
"""Configures the microphone for listening to speech based on the
microphone device index or name.
Expand Down Expand Up @@ -116,20 +113,22 @@ class TranscribeSpeechAction(object):
f"Could not find microphone with name: {self._model_params.mic_device}"
)

def _configure_recogniser(self, ambient_adj: bool = True) -> sr.Recognizer:
def _configure_recogniser(self) -> sr.Recognizer:
"""Configures the speech recogniser object.

Args:
ambient_adj (bool, optional): Whether to adjust for ambient noise. Defaults to True.

Returns:
sr.Recognizer: speech recogniser object.
"""
self._listening = True
recogniser = sr.Recognizer()
if ambient_adj:
with self._configure_microphone() as source:
recogniser.adjust_for_ambient_noise(source)

if self._model_params.energy_threshold:
recogniser.dynamic_energy_threshold = False
recogniser.energy_threshold = self._model_params.energy_threshold
return recogniser

with self._configure_microphone() as source:
recogniser.adjust_for_ambient_noise(source)
self._listening = False
return recogniser

Expand Down Expand Up @@ -257,7 +256,14 @@ def parse_args() -> dict:
help="Disable warming up the model by running inference on a test file.",
)

args,unknown = parser.parse_known_args()
parser.add_argument(
"--energy_threshold",
type=int,
default=None,
help="Energy threshold for silence detection. Using this disables automatic adjustment",
)

args, unknown = parser.parse_known_args()
return vars(args)


Expand Down