Skip to content

Commit

Permalink
bugfix stream2sentence
Browse files Browse the repository at this point in the history
  • Loading branch information
KoljaB committed Nov 29, 2024
1 parent b7d01dc commit 578b3b9
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 34 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ Let me know if you need any adjustments or additional languages!

## Updates

Latest Version: v0.4.13
Latest Version: v0.4.14

See [release history](https://github.com/KoljaB/RealtimeTTS/releases).

Expand Down
18 changes: 10 additions & 8 deletions RealtimeTTS/text_to_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,14 @@ def play_async(
context_size_look_overhead: int = 12,
muted: bool = False,
sentence_fragment_delimiters: str = ".?!;:,\n…)]}。-",
force_first_fragment_after_words=15,
force_first_fragment_after_words=30,
debug=False,
):
"""
Async handling of text to audio synthesis, see play() method.
"""
if not self.is_playing_flag:
self.is_playing_flag = True
# Pass additional parameter to differentiate external call
args = (
fast_sentence_fragment,
fast_sentence_fragment_allsentences,
Expand All @@ -200,16 +200,13 @@ def play_async(
sentence_fragment_delimiters,
force_first_fragment_after_words,
True,
debug,
)
self.play_thread = threading.Thread(target=self.play, args=args)
self.play_thread.start()
else:
logging.warning("play_async() called while already playing audio, skipping")

# self.play_thread = threading.Thread(target=self.play, args=(fast_sentence_fragment, buffer_threshold_seconds, minimum_sentence_length, minimum_first_fragment_length, log_synthesized_text, reset_generated_text, output_wavfile, on_sentence_synthesized, on_audio_chunk, tokenizer, language, context_size, muted, sentence_fragment_delimiters, force_first_fragment_after_words))
# self.play_thread.daemon = True
# self.play_thread.start()

def play(
self,
fast_sentence_fragment: bool = True,
Expand All @@ -231,8 +228,9 @@ def play(
context_size_look_overhead: int = 12,
muted: bool = False,
sentence_fragment_delimiters: str = ".?!;:,\n…)]}。-",
force_first_fragment_after_words=15,
force_first_fragment_after_words=30,
is_external_call=True,
debug=False,
):
"""
Handles the synthesis of text to audio.
Expand Down Expand Up @@ -263,7 +261,9 @@ def play(
considered sentence delimiters. Default is ".?!;:,\n…)]}。-".
- force_first_fragment_after_words (int): The number of words after
which the first sentence fragment is forced to be yielded.
Default is 15 words.
Default is 30 words.
- is_external_call: If True, the method is called from an external source.
- debug: If True, enables debug mode.
"""
if self.global_muted:
muted = True
Expand Down Expand Up @@ -359,6 +359,7 @@ def play(
log_characters=self.log_characters,
sentence_fragment_delimiters=sentence_fragment_delimiters,
force_first_fragment_after_words=force_first_fragment_after_words,
debug=debug,
)

# Create the synthesis chunk generator with the given sentences
Expand Down Expand Up @@ -490,6 +491,7 @@ def synthesize_worker():
sentence_fragment_delimiters=sentence_fragment_delimiters,
force_first_fragment_after_words=force_first_fragment_after_words,
is_external_call=False,
debug=debug,
)

if is_external_call:
Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# stream2sentence is the core of RealtimeTTS - it quickly converts streamed text into sentences for real-time synthesis
stream2sentence==0.2.9

# azure-cognitiveservices-speech is for AzureEngine
azure-cognitiveservices-speech==1.41.1

Expand Down Expand Up @@ -26,7 +29,4 @@ pyaudio==0.2.14
pydub==0.25.1

# resampy is used to resample from the tts to the target device sample rate
resampy==0.4.3

# stream2sentence is to quickly convert streamed text into sentences for real-time synthesis
stream2sentence==0.2.7
resampy==0.4.3
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
current_version = "0.4.13"
current_version = "0.4.14"

import setuptools

Expand All @@ -7,7 +7,7 @@
long_description = fh.read()

long_description = """
To install RealTimeTTS, you need to specify the TTS engine(s) you wish to use.
To install realtimetts, you need to specify the TTS engine(s) you wish to use.
For example, to install all supported engines:
Expand Down Expand Up @@ -85,14 +85,14 @@ def parse_requirements(filename):
}

setuptools.setup(
name="RealTimeTTS",
name="realtimetts",
version=current_version,
author="Kolja Beigel",
author_email="[email protected]",
description="Stream text into audio with an easy-to-use, highly configurable library delivering voice output with minimal latency.",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/KoljaB/RealTimeTTS",
url="https://github.com/KoljaB/realtimetts",
packages=setuptools.find_packages(),
classifiers=[
"Programming Language :: Python :: 3",
Expand Down
52 changes: 35 additions & 17 deletions tests/edge_test.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,41 @@
if __name__ == "__main__":
from RealtimeTTS import TextToAudioStream, EdgeEngine, EdgeVoice
from RealtimeTTS import EdgeEngine, TextToAudioStream

def dummy_generator():
yield "Hey guys! These here are realtime spoken sentences based on edge text synthesis. "
yield "With a voice based on microsoft azure tts technology. "
text = """\
No, the way it "cuts midway" is NOT like the audio is cut abruptly (like when you pause a video). You can check below the audio (sorry for not doing that earlier)\
"""

#voice = GTTSVoice(s1peed=1.3)
engine = EdgeEngine(rate=5, pitch=10)
stream = TextToAudioStream(engine, output_device_index=0)
voice_engine = EdgeEngine()
#voice_engine.set_voice(2)
voice_stream = TextToAudioStream(voice_engine, language="en")

print("Getting voices")
voices = engine.get_voices()
print(voices)
#engine.set_voice("RyanNeural")
#voice_stream.feed(text).play_async(force_first_fragment_after_words=12)
voice_stream.feed(text).play_async()

import time
time.sleep(30)



# if __name__ == "__main__":
# from RealtimeTTS import TextToAudioStream, EdgeEngine, EdgeVoice

# def dummy_generator():
# yield "Hey guys! These here are realtime spoken sentences based on edge text synthesis. "
# yield "With a voice based on microsoft azure tts technology. "

# #voice = GTTSVoice(s1peed=1.3)
# engine = EdgeEngine(rate=5, pitch=10)
# stream = TextToAudioStream(engine, output_device_index=0)

# print("Getting voices")
# voices = engine.get_voices()
# print(voices)
# #engine.set_voice("RyanNeural")


#voice = EdgeVoice("en-GB-RyanNeural", rate="+5%", volume="+0%", pitch="+10Hz")
voice = EdgeVoice("en-GB-RyanNeural")
engine.set_voice(voice)
# #voice = EdgeVoice("en-GB-RyanNeural", rate="+5%", volume="+0%", pitch="+10Hz")
# voice = EdgeVoice("en-GB-RyanNeural")
# engine.set_voice(voice)

print("Starting to play stream")
stream.feed(dummy_generator()).play(log_synthesized_text=True)
# print("Starting to play stream")
# stream.feed(dummy_generator()).play(log_synthesized_text=True)

0 comments on commit 578b3b9

Please sign in to comment.