Skip to content

Commit

Permalink
#53, #80
Browse files Browse the repository at this point in the history
  • Loading branch information
KoljaB committed May 6, 2024
1 parent db79a17 commit f69a16f
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 29 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Check the [FAQ page](./FAQ.md) for answers to a lot of questions around the usag

## Updates

Latest Version: v0.3.45
Latest Version: v0.3.46

See [release history](https://github.com/KoljaB/RealtimeTTS/releases).

Expand Down Expand Up @@ -308,6 +308,12 @@ When you initialize the `TextToAudioStream` class, you have various options to c
- **Default**: en
- **Description**: Language to use for sentence splitting.

#### `muted` (bool)
- **Type**: Bool
- **Required**: No
- **Default**: False
- **Description**: Global muted parameter. If True, no pyAudio stream will be opened. Disables audio playback via local speakers (in case you want to synthesize to file or process audio chunks) and overrides the play parameters muted setting.

#### `level` (int)
- **Type**: Integer
- **Required**: No
Expand Down
49 changes: 28 additions & 21 deletions RealtimeTTS/stream_player.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def __init__(
format: int = pyaudio.paInt16,
channels: int = 1,
rate: int = 16000,
output_device_index=None):
output_device_index=None,
muted: bool = False):
"""
Args:
format (int): Audio format, defaults to pyaudio.paInt16
Expand All @@ -33,6 +34,7 @@ def __init__(
self.channels = channels
self.rate = rate
self.output_device_index = output_device_index
self.muted = muted


class AudioStream:
Expand All @@ -58,27 +60,33 @@ def open_stream(self):
pySampleRate = self.config.rate
pyOutput_device_index = self.config.output_device_index

if self.config.format == pyaudio.paCustomFormat:
pyFormat = self.pyaudio_instance.get_format_from_width(2)
logging.debug("Opening stream for mpeg audio chunks, "
f"pyFormat: {pyFormat}, pyChannels: {pyChannels}, "
f"pySampleRate: {pySampleRate}")
if self.config.muted:
print("Muted mode, no opening stream")
logging.debug("Muted mode, no opening stream")

else:
pyFormat = self.config.format
logging.debug("Opening stream for wave audio chunks, "
f"pyFormat: {pyFormat}, pyChannels: {pyChannels}, "
f"pySampleRate: {pySampleRate}")
print("Opening stream")
if self.config.format == pyaudio.paCustomFormat:
pyFormat = self.pyaudio_instance.get_format_from_width(2)
logging.debug("Opening stream for mpeg audio chunks, "
f"pyFormat: {pyFormat}, pyChannels: {pyChannels}, "
f"pySampleRate: {pySampleRate}")
else:
pyFormat = self.config.format
logging.debug("Opening stream for wave audio chunks, "
f"pyFormat: {pyFormat}, pyChannels: {pyChannels}, "
f"pySampleRate: {pySampleRate}")

try:
self.stream = self.pyaudio_instance.open(
format=pyFormat,
channels=pyChannels,
rate=pySampleRate,
output_device_index=pyOutput_device_index,
output=True)
except Exception as e:
print(f"Error opening stream: {e}")
exit(0)
try:
self.stream = self.pyaudio_instance.open(
format=pyFormat,
channels=pyChannels,
rate=pySampleRate,
output_device_index=pyOutput_device_index,
output=True)
except Exception as e:
print(f"Error opening stream: {e}")
exit(0)

def start_stream(self):
"""Starts the audio stream."""
Expand Down Expand Up @@ -224,7 +232,6 @@ def _play_chunk(self, chunk):
for i in range(0, len(chunk), sub_chunk_size):
sub_chunk = chunk[i:i + sub_chunk_size]

# print("Playing/yielding chunk")
if not self.first_chunk_played and self.on_playback_start:
self.on_playback_start()
self.first_chunk_played = True
Expand Down
28 changes: 22 additions & 6 deletions RealtimeTTS/text_to_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(self,
output_device_index=None,
tokenizer: str = "nltk",
language: str = "en",
muted: bool = False,
level=logging.WARNING,
):
"""
Expand All @@ -53,6 +54,11 @@ def __init__(self,
tokenizer (str, optional): Tokenizer to use for sentence splitting
(currently "nltk" and "stanza" are supported).
language (str, optional): Language to use for sentence splitting.
muted (bool, optional): If True, disables audio playback via local
speakers (in case you want to synthesize to file or process
audio chunks). Default is False.
If set to True it will override the play parameters muted
setting.
level (int, optional): Logging level. Defaults to logging.WARNING.
"""
self.log_characters = log_characters
Expand All @@ -67,6 +73,7 @@ def __init__(self,
self.abort_events = []
self.tokenizer = tokenizer
self.language = language
self.global_muted = muted
self.player = None
self.play_lock = threading.Lock()
self.is_playing_flag = False
Expand Down Expand Up @@ -118,13 +125,16 @@ def load_engine(

# Check if the engine doesn't support consuming generators directly
if not self.engine.can_consume_generators:
config = AudioConfiguration(
format,
channels,
rate,
self.output_device_index,
muted=self.global_muted)

self.player = StreamPlayer(
self.engine.queue,
AudioConfiguration(
format,
channels,
rate,
self.output_device_index),
config,
on_playback_start=self._on_audio_stream_start)
else:
self.engine.on_playback_start = self._on_audio_stream_start
Expand Down Expand Up @@ -231,6 +241,8 @@ def play(
which the first sentence fragment is forced to be yielded.
Default is 15 words.
"""
if self.global_muted:
muted = True

if is_external_call:
if not self.play_lock.acquire(blocking=False):
Expand Down Expand Up @@ -295,7 +307,11 @@ def play(
self.generated_text += self.char_iter.iterated_text

self._create_iterators()
self.is_playing_flag = False

if is_external_call:

self.is_playing_flag = False
self.play_lock.release()
else:
try:
# Start the audio player to handle playback
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setuptools.setup(
name="RealTimeTTS",
version="0.3.44",
version="0.3.46",
author="Kolja Beigel",
author_email="[email protected]",
description="*Stream text into audio with an easy-to-use, highly configurable library delivering voice output with minimal latency.",
Expand Down

0 comments on commit f69a16f

Please sign in to comment.