From 69ffc72afd7ab78ef9cc92f6864386a6da732d97 Mon Sep 17 00:00:00 2001 From: Kolja Beigel Date: Thu, 7 Dec 2023 11:51:02 +0100 Subject: [PATCH] fix for Mac system voices --- README.md | 2 +- RealtimeTTS/engines/system_engine.py | 21 ++++++++++++++++----- RealtimeTTS/text_to_stream.py | 6 +++++- setup.py | 2 +- tests/coqui_test.py | 10 +++++----- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 871f65f..2918a92 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ https://github.com/KoljaB/RealtimeTTS/assets/7604638/87dcd9a5-3a4e-4f57-be45-837 ## Updates -Latest Version: v0.3.32 +Latest Version: v0.3.34 #### New Features: - new Engine: OpenAI TTS diff --git a/RealtimeTTS/engines/system_engine.py b/RealtimeTTS/engines/system_engine.py index d7148f5..4ee9382 100644 --- a/RealtimeTTS/engines/system_engine.py +++ b/RealtimeTTS/engines/system_engine.py @@ -1,4 +1,6 @@ from .base_engine import BaseEngine +from pydub.utils import mediainfo +from pydub import AudioSegment from typing import Union import tempfile import pyaudio @@ -54,9 +56,8 @@ def get_stream_info(self): - Sample Rate (int): The sample rate of the audio in Hz. 16000 represents 16kHz sample rate. """ return pyaudio.paInt16, 1, 22050 - - def synthesize(self, - text: str) -> bool: + + def synthesize(self, text: str) -> bool: """ Synthesizes text to audio stream. @@ -67,12 +68,22 @@ def synthesize(self, self.engine.save_to_file(text, self.file_path) self.engine.runAndWait() - # Open the saved WAV file + # Get media info of the file + info = mediainfo(self.file_path) + + # Check if the file format is AIFF and convert to WAV if necessary + if info['format_name'] == 'aiff': + audio = AudioSegment.from_file(self.file_path, format="aiff") + audio.export(self.file_path, format="wav") + + # Now open the WAV file with wave.open(self.file_path, 'rb') as wf: audio_data = wf.readframes(wf.getnframes()) self.queue.put(audio_data) + return True - return True + # Return False if the process failed + return False def get_voices(self): """ diff --git a/RealtimeTTS/text_to_stream.py b/RealtimeTTS/text_to_stream.py index 89d6e86..069782b 100644 --- a/RealtimeTTS/text_to_stream.py +++ b/RealtimeTTS/text_to_stream.py @@ -56,6 +56,7 @@ def __init__(self, self.abort_events = [] self.tokenizer = tokenizer self.language = language + self.player = None self._create_iterators() @@ -105,6 +106,7 @@ def load_engine(self, self.player = StreamPlayer(self.engine.queue, AudioConfiguration(format, channels, rate), on_playback_start=self._on_audio_stream_start) else: self.engine.on_playback_start = self._on_audio_stream_start + self.player = None logging.info(f"loaded engine {self.engine.engine_name}") @@ -197,7 +199,9 @@ def play(self, self.stream_running = True abort_event = threading.Event() self.abort_events.append(abort_event) - self.player.mute(muted) + + if self.player: + self.player.mute(muted) self.output_wavfile = output_wavfile self.chunk_callback = on_audio_chunk diff --git a/setup.py b/setup.py index 5e9f267..53d1274 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setuptools.setup( name="RealTimeTTS", - version="0.3.32", + version="0.3.34", author="Kolja Beigel", author_email="kolja.beigel@web.de", description="*Stream text into audio with an easy-to-use, highly configurable library delivering voice output with minimal latency.", diff --git a/tests/coqui_test.py b/tests/coqui_test.py index dc2652f..fa17d56 100644 --- a/tests/coqui_test.py +++ b/tests/coqui_test.py @@ -7,17 +7,17 @@ def dummy_generator(): # for normal use with minimal logging: - engine = CoquiEngine() + # engine = CoquiEngine() # test with extended logging: - # import logging - # logging.basicConfig(level=logging.DEBUG) - # engine = CoquiEngine(level=logging.DEBUG) + import logging + logging.basicConfig(level=logging.INFO) + engine = CoquiEngine(level=logging.INFO) stream = TextToAudioStream(engine) print ("Starting to play stream") - stream.feed(dummy_generator()).play() + stream.feed(dummy_generator()).play(log_synthesized_text=True) engine.shutdown() \ No newline at end of file