bugfix stream2sentence

KoljaB · Nov 29, 2024 · 578b3b9 · 578b3b9
1 parent b7d01dc
commit 578b3b9
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 34 deletions.
diff --git a/README.md b/README.md
@@ -58,7 +58,7 @@ Let me know if you need any adjustments or additional languages!
 
 ## Updates
 
-Latest Version: v0.4.13
+Latest Version: v0.4.14
 
 See [release history](https://github.com/KoljaB/RealtimeTTS/releases).
 

diff --git a/RealtimeTTS/text_to_stream.py b/RealtimeTTS/text_to_stream.py
@@ -170,14 +170,14 @@ def play_async(
         context_size_look_overhead: int = 12,
         muted: bool = False,
         sentence_fragment_delimiters: str = ".?!;:,\n…)]}。-",
-        force_first_fragment_after_words=15,
+        force_first_fragment_after_words=30,
+        debug=False,
     ):
         """
         Async handling of text to audio synthesis, see play() method.
         """
         if not self.is_playing_flag:
             self.is_playing_flag = True
-            # Pass additional parameter to differentiate external call
             args = (
                 fast_sentence_fragment,
                 fast_sentence_fragment_allsentences,
@@ -200,16 +200,13 @@ def play_async(
                 sentence_fragment_delimiters,
                 force_first_fragment_after_words,
                 True,
+                debug,
             )
             self.play_thread = threading.Thread(target=self.play, args=args)
             self.play_thread.start()
         else:
             logging.warning("play_async() called while already playing audio, skipping")
 
-        # self.play_thread = threading.Thread(target=self.play, args=(fast_sentence_fragment, buffer_threshold_seconds, minimum_sentence_length, minimum_first_fragment_length, log_synthesized_text, reset_generated_text, output_wavfile, on_sentence_synthesized, on_audio_chunk, tokenizer, language, context_size, muted, sentence_fragment_delimiters, force_first_fragment_after_words))
-        # self.play_thread.daemon = True
-        # self.play_thread.start()
-
     def play(
         self,
         fast_sentence_fragment: bool = True,
@@ -231,8 +228,9 @@ def play(
         context_size_look_overhead: int = 12,
         muted: bool = False,
         sentence_fragment_delimiters: str = ".?!;:,\n…)]}。-",
-        force_first_fragment_after_words=15,
+        force_first_fragment_after_words=30,
         is_external_call=True,
+        debug=False,
     ):
         """
         Handles the synthesis of text to audio.
@@ -263,7 +261,9 @@ def play(
             considered sentence delimiters. Default is ".?!;:,\n…)]}。-".
         - force_first_fragment_after_words (int): The number of words after
             which the first sentence fragment is forced to be yielded.
-            Default is 15 words.
+            Default is 30 words.
+        - is_external_call: If True, the method is called from an external source.
+        - debug: If True, enables debug mode.
         """
         if self.global_muted:
             muted = True
@@ -359,6 +359,7 @@ def play(
                     log_characters=self.log_characters,
                     sentence_fragment_delimiters=sentence_fragment_delimiters,
                     force_first_fragment_after_words=force_first_fragment_after_words,
+                    debug=debug,
                 )
 
                 # Create the synthesis chunk generator with the given sentences
@@ -490,6 +491,7 @@ def synthesize_worker():
                     sentence_fragment_delimiters=sentence_fragment_delimiters,
                     force_first_fragment_after_words=force_first_fragment_after_words,
                     is_external_call=False,
+                    debug=debug,
                 )
 
             if is_external_call:

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,6 @@
+# stream2sentence is the core of RealtimeTTS - it quickly converts streamed text into sentences for real-time synthesis
+stream2sentence==0.2.9
+
 # azure-cognitiveservices-speech is for AzureEngine
 azure-cognitiveservices-speech==1.41.1
 
@@ -26,7 +29,4 @@ pyaudio==0.2.14
 pydub==0.25.1
 
 # resampy is used to resample from the tts to the target device sample rate 
-resampy==0.4.3
-
-# stream2sentence is to quickly convert streamed text into sentences for real-time synthesis
-stream2sentence==0.2.7
+resampy==0.4.3
diff --git a/setup.py b/setup.py
@@ -1,4 +1,4 @@
-current_version = "0.4.13"
+current_version = "0.4.14"
 
 import setuptools
 
@@ -7,7 +7,7 @@
     long_description = fh.read()
 
 long_description = """
-To install RealTimeTTS, you need to specify the TTS engine(s) you wish to use. 
+To install realtimetts, you need to specify the TTS engine(s) you wish to use. 
 
 For example, to install all supported engines:
 
@@ -85,14 +85,14 @@ def parse_requirements(filename):
 }
 
 setuptools.setup(
-    name="RealTimeTTS",
+    name="realtimetts",
     version=current_version,
     author="Kolja Beigel",
     author_email="[email protected]",
     description="Stream text into audio with an easy-to-use, highly configurable library delivering voice output with minimal latency.",
     long_description=long_description,
     long_description_content_type="text/markdown",
-    url="https://github.com/KoljaB/RealTimeTTS",
+    url="https://github.com/KoljaB/realtimetts",
     packages=setuptools.find_packages(),
     classifiers=[
         "Programming Language :: Python :: 3",

diff --git a/tests/edge_test.py b/tests/edge_test.py
@@ -1,23 +1,41 @@
-if __name__ == "__main__":
-    from RealtimeTTS import TextToAudioStream, EdgeEngine, EdgeVoice
+from RealtimeTTS import EdgeEngine, TextToAudioStream
 
-    def dummy_generator():
-        yield "Hey guys! These here are realtime spoken sentences based on edge text synthesis. "
-        yield "With a voice based on microsoft azure tts technology. "
+text = """\
+No, the way it "cuts midway" is NOT like the audio is cut abruptly (like when you pause a video). You can check below the audio (sorry for not doing that earlier)\
+"""
 
-    #voice = GTTSVoice(s1peed=1.3)
-    engine = EdgeEngine(rate=5, pitch=10)
-    stream = TextToAudioStream(engine, output_device_index=0)
+voice_engine = EdgeEngine()
+#voice_engine.set_voice(2)
+voice_stream = TextToAudioStream(voice_engine, language="en")
 
-    print("Getting voices")
-    voices = engine.get_voices()
-    print(voices)
-    #engine.set_voice("RyanNeural")  
+#voice_stream.feed(text).play_async(force_first_fragment_after_words=12)
+voice_stream.feed(text).play_async()
+
+import time
+time.sleep(30)
+
+
+
+# if __name__ == "__main__":
+#     from RealtimeTTS import TextToAudioStream, EdgeEngine, EdgeVoice
+
+#     def dummy_generator():
+#         yield "Hey guys! These here are realtime spoken sentences based on edge text synthesis. "
+#         yield "With a voice based on microsoft azure tts technology. "
+
+#     #voice = GTTSVoice(s1peed=1.3)
+#     engine = EdgeEngine(rate=5, pitch=10)
+#     stream = TextToAudioStream(engine, output_device_index=0)
+
+#     print("Getting voices")
+#     voices = engine.get_voices()
+#     print(voices)
+#     #engine.set_voice("RyanNeural")  
 
 
-    #voice = EdgeVoice("en-GB-RyanNeural", rate="+5%", volume="+0%", pitch="+10Hz")
-    voice = EdgeVoice("en-GB-RyanNeural")
-    engine.set_voice(voice)
+#     #voice = EdgeVoice("en-GB-RyanNeural", rate="+5%", volume="+0%", pitch="+10Hz")
+#     voice = EdgeVoice("en-GB-RyanNeural")
+#     engine.set_voice(voice)
 
-    print("Starting to play stream")
-    stream.feed(dummy_generator()).play(log_synthesized_text=True)
+#     print("Starting to play stream")
+#     stream.feed(dummy_generator()).play(log_synthesized_text=True)