Skip to content

Commit

Permalink
fix for Mac system voices
Browse files Browse the repository at this point in the history
  • Loading branch information
KoljaB committed Dec 7, 2023
1 parent 0c90812 commit 69ffc72
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 13 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ https://github.com/KoljaB/RealtimeTTS/assets/7604638/87dcd9a5-3a4e-4f57-be45-837
## Updates

Latest Version: v0.3.32
Latest Version: v0.3.34

#### New Features:
- new Engine: OpenAI TTS
Expand Down
21 changes: 16 additions & 5 deletions RealtimeTTS/engines/system_engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from .base_engine import BaseEngine
from pydub.utils import mediainfo
from pydub import AudioSegment
from typing import Union
import tempfile
import pyaudio
Expand Down Expand Up @@ -54,9 +56,8 @@ def get_stream_info(self):
- Sample Rate (int): The sample rate of the audio in Hz. 16000 represents 16kHz sample rate.
"""
return pyaudio.paInt16, 1, 22050

def synthesize(self,
text: str) -> bool:

def synthesize(self, text: str) -> bool:
"""
Synthesizes text to audio stream.
Expand All @@ -67,12 +68,22 @@ def synthesize(self,
self.engine.save_to_file(text, self.file_path)
self.engine.runAndWait()

# Open the saved WAV file
# Get media info of the file
info = mediainfo(self.file_path)

# Check if the file format is AIFF and convert to WAV if necessary
if info['format_name'] == 'aiff':
audio = AudioSegment.from_file(self.file_path, format="aiff")
audio.export(self.file_path, format="wav")

# Now open the WAV file
with wave.open(self.file_path, 'rb') as wf:
audio_data = wf.readframes(wf.getnframes())
self.queue.put(audio_data)
return True

return True
# Return False if the process failed
return False

def get_voices(self):
"""
Expand Down
6 changes: 5 additions & 1 deletion RealtimeTTS/text_to_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def __init__(self,
self.abort_events = []
self.tokenizer = tokenizer
self.language = language
self.player = None

self._create_iterators()

Expand Down Expand Up @@ -105,6 +106,7 @@ def load_engine(self,
self.player = StreamPlayer(self.engine.queue, AudioConfiguration(format, channels, rate), on_playback_start=self._on_audio_stream_start)
else:
self.engine.on_playback_start = self._on_audio_stream_start
self.player = None

logging.info(f"loaded engine {self.engine.engine_name}")

Expand Down Expand Up @@ -197,7 +199,9 @@ def play(self,
self.stream_running = True
abort_event = threading.Event()
self.abort_events.append(abort_event)
self.player.mute(muted)

if self.player:
self.player.mute(muted)

self.output_wavfile = output_wavfile
self.chunk_callback = on_audio_chunk
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setuptools.setup(
name="RealTimeTTS",
version="0.3.32",
version="0.3.34",
author="Kolja Beigel",
author_email="[email protected]",
description="*Stream text into audio with an easy-to-use, highly configurable library delivering voice output with minimal latency.",
Expand Down
10 changes: 5 additions & 5 deletions tests/coqui_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@ def dummy_generator():


# for normal use with minimal logging:
engine = CoquiEngine()
# engine = CoquiEngine()

# test with extended logging:
# import logging
# logging.basicConfig(level=logging.DEBUG)
# engine = CoquiEngine(level=logging.DEBUG)
import logging
logging.basicConfig(level=logging.INFO)
engine = CoquiEngine(level=logging.INFO)


stream = TextToAudioStream(engine)

print ("Starting to play stream")
stream.feed(dummy_generator()).play()
stream.feed(dummy_generator()).play(log_synthesized_text=True)

engine.shutdown()

0 comments on commit 69ffc72

Please sign in to comment.