From cb8fba703df4a731f941687387d8b2842025d128 Mon Sep 17 00:00:00 2001
From: Jarod Mica <jarodmica@gmail.com>
Date: Sun, 9 Jun 2024 22:44:59 -0700
Subject: [PATCH] update

---
 .gitignore                                 |   3 +
 README.md                                  |   5 +-
 audio_book_app_2_0.py => audio_book_app.py |   9 +-
 changelog.md                               |   4 +
 text_test1.txt                             |  63 +------
 tortoise_api.py                            | 208 ---------------------
 6 files changed, 18 insertions(+), 274 deletions(-)
 rename audio_book_app_2_0.py => audio_book_app.py (99%)
 delete mode 100644 tortoise_api.py

diff --git a/.gitignore b/.gitignore
index 4046235..416d783 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,7 @@ __pycache__/
 venv/
 audiobooks/
 output/
+.vscode/
+tortoise_api/
+
 
diff --git a/README.md b/README.md
index 387386e..f304e99 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ There are two ways to install this, via Package or Manually.  If you don't have
     - [ ] Highlight sentences for generation later (will need to do some type of edit to the json structure so that even if you close out, they are still highlighted)
     - [ ] Find a way to do "multiple speakers" for dialogue in the book (might involve a new tab where users can select sentences to regenerate)
     - [ ] Auto sentence regeneration and comparison using whisper (https://github.com/maxbachmann/RapidFuzz/) 
+    - [ ] Add a toggleable option for using rvc conversion
 
 
 ## Prerequisites:
@@ -83,7 +84,7 @@ venv\Scripts\activate
 ```
 4. Install pytorch using command below (recommended) or get from https://pytorch.org/get-started/locally/:
 
-```pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117```
+```pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121```
 
 5. Install requirements:
 
@@ -95,6 +96,8 @@ venv\Scripts\activate
 
 ```pip install git+https://github.com/JarodMica/rvc-tts-pipeline.git@lightweight#egg=rvc_tts_pipe```
 
+```pip install git+https://github.com/JarodMica/tortoise_api.git```
+
 6. Download and install ffmpeg: https://ffmpeg.org/download.html
     - Place ffmpeg.exe and ffprobe.exe inside of audiobook_maker OR make sure they are in your environment path variable
 
diff --git a/audio_book_app_2_0.py b/audio_book_app.py
similarity index 99%
rename from audio_book_app_2_0.py
rename to audio_book_app.py
index fbf7ffc..aefa42a 100644
--- a/audio_book_app_2_0.py
+++ b/audio_book_app.py
@@ -30,8 +30,8 @@
 script_directory = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(script_directory)
 
-from tortoise_api import Tortoise_API
-from tortoise_api import load_sentences
+from tortoise_api.tortoise_api import load_sentences, load_config, call_api
+
 from rvc_pipe.rvc_infer import rvc_convert
 
 class AudioGenerationWorker(QThread):
@@ -72,7 +72,6 @@ def __init__(self):
 
         self.init_ui()
         
-        self.tortoise = Tortoise_API()
 
     def init_ui(self):
         # Main Layout
@@ -785,7 +784,9 @@ def generate_audio_for_sentence_threaded(self, directory_path, progress_callback
             progress_callback(progress_percentage)\
             
     def generate_audio(self, sentence):
-        audio_path = self.tortoise.call_api(sentence)
+        tort_setup = os.path.join(script_dir, "tort.yaml")
+        parameters = load_config(tort_setup)
+        audio_path = call_api(sentence, **parameters)
         selected_voice = self.voice_models_combo.currentText()
         selected_index = self.voice_index_combo.currentText()
         voice_model_path = os.path.join(self.voice_folder_path, selected_voice)
diff --git a/changelog.md b/changelog.md
index 7330e9b..928fe85 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,9 @@
 # Changelog & thoughts
 
+# 6/9/2024
+Bug fix for tortoise TTS API call implemented, lots of things in the pipeline need a little refreshing
+- Package version is not done yet.
+
 # 10/17/2023
 Bug fixes for next patch
 - Fixed hardcoded path in lightweight rvc package under configs.py for nvidia cards under 4GB
diff --git a/text_test1.txt b/text_test1.txt
index 66579c4..a8f4a5c 100644
--- a/text_test1.txt
+++ b/text_test1.txt
@@ -1,61 +1,2 @@
----- Test 1 ----
-This is a simple test. It should work without any issues.
--- Expected Output --
-["This is a simple test.", "It should work without any issues."]
-
----- Test 2 ----
-Although I went to the store, I forgot to buy milk. Next time, I’ll make a list.
--- Expected Output --
-["Although I went to the store, I forgot to buy milk.", "Next time, I’ll make a list."]
-
----- Test 3 ----
-Hello World!! What's happening?? #excited.
--- Expected Output --
-["Hello World!!", "What's happening??", "#excited."]
-
----- Test 4 ----
-This is a weird case.. It happens sometimes..
--- Expected Output --
-["This is a weird case.", "It happens sometimes."]
-
----- Test 5 ----
-I went to the store, bought milk. Then, went to the park, enjoyed the day.
--- Expected Output --
-["I went to the store, bought milk.", "Then, went to the park, enjoyed the day."]
-
----- Test 6 ----
-
--- Expected Output --
-[]
-
----- Test 7 ----
-###!!!
--- Expected Output --
-[]
-
----- Test 8 ----
-    This is a test.
-
-....?????##
-$$%^#$@
-!@#$!@%%
-@@@
-!!
-...
-....////\\][[]]
-
-It should return two sentences.   
--- Expected Output --
-["This is a test.", "It should return two sentences."]
-
----- Test 9 ----
-Although I went to the store,
-I forgot to buy milk.
-Next time, I’ll make a list.
--- Expected Output --
-["Although I went to the store, I forgot to buy milk.", "Next time, I’ll make a list."]
-
----- Test 10 ----
-Is this real?? Or #fantasy... Caught in a landslide, no escape...
--- Expected Output --
-["Is this real??", "Or #fantasy.", "Caught in a landslide, no escape..."]
\ No newline at end of file
+These are the 5 BEST open source text to speech softwares that I've come across over the past year.
+This here is just a quick sample of my voice with a british accent, and this is how I actually sound.
\ No newline at end of file
diff --git a/tortoise_api.py b/tortoise_api.py
deleted file mode 100644
index 98aff78..0000000
--- a/tortoise_api.py
+++ /dev/null
@@ -1,208 +0,0 @@
-import requests
-import concurrent.futures
-from queue import Queue
-import threading
-import os
-import sounddevice as sd
-import soundfile as sf
-import yaml
-import re
-
-class Tortoise_API:
-    '''
-    API calls to the tortoise GUI using requests.  Must have an open instance of
-    tortoise TTS GUI running or else nothing will happen. For most cases, to use this
-    you need to use filter_paragraph() to splice text into a list of sentences, then
-    feed that list 1-by-1 into call_api.  The idea is to speed up the process so that you can
-    generate audio while audio is being spoken
-    '''
-    def __init__(self):
-        # Actually only necessary if you're using run(), could clean up code later
-        self.audio_queue = Queue()
-        self.free_slots = Queue()
-        self.semaphore = threading.Semaphore(1)
-
-    def call_api(self, sentence, is_queue=False):
-        '''
-        Makes a request to the Tortoise TTS GUI.  Relies on tort.yaml, so make sure it's set-up
-
-        Args:
-            sentence (str) : Text to be converted to speech
-            is_queue (bool) : Only set to True if using as standalone script.  Uses built in queue
-                            system to queue up 6 samples of audio to be read out loud.
-        
-        Returns:
-            audio_path (str) : Path of the audio to be played
-        '''
-        tort_conf = load_config()
-        max_retries = 5
-        
-        for attempt in range(max_retries):
-            for port in range(7860, 7866):
-                try:
-                    url = f"http://127.0.0.1:{port}/run/generate"
-                    print(f"Calling API with sentence: <{sentence}>")
-                    response = requests.post(url, json={
-                        "data": [
-                            f"{sentence}", #prompt
-                            tort_conf['delimiter'], #delimter
-                            tort_conf['emotion'], #emotion
-                            tort_conf['custom_emotion'], #custom emotion
-                            tort_conf['voice_name'], #voice name
-                            {"name": tort_conf['audio_file'],"data":"data:audio/wav;base64,UklGRiQAAABXQVZFZm10IBAAAAABAAEARKwAAIhYAQACABAAZGF0YQAAAAA="},
-                            tort_conf['voice_chunks'], #voice chunks
-                            tort_conf['candidates'], #candidates
-                            tort_conf['seed'], #seed
-                            tort_conf['samples'], #samples
-                            tort_conf['iterations'], #iterations
-                            tort_conf['temperature'], #temp
-                            tort_conf['diffusion_sampler'],
-                            tort_conf['pause_size'],
-                            tort_conf['cvvp_weight'],
-                            tort_conf['top_p'],
-                            tort_conf['diffusion_temp'],
-                            tort_conf['length_penalty'],
-                            tort_conf['repetition_penalty'],
-                            tort_conf['conditioning_free_k'],
-                            tort_conf['experimental_flags'],
-                            False,
-                            False,
-                        ]
-                    }).json()
-
-                    audio_path = response['data'][2]['choices'][0]
-                    print(f"API response received with audio path: {audio_path}")
-
-                    if is_queue:
-                        slot = self.free_slots.get()
-                        self.audio_queue.put((audio_path, slot))
-                    else:
-                        return audio_path
-
-                except requests.ConnectionError:
-                    print(f"Failed to connect to port {port}, trying next port")
-                except requests.Timeout:
-                    print(f"Request timed out on port {port}, trying next port")
-                except requests.RequestException as e:  # Catch any other requests exceptions
-                    print(f"An error occurred on port {port}: {e}")
-                except Exception as e:  # Catch non-requests exceptions
-                    print(f"An unexpected error occurred: {e}")
-            
-            print(f"Attempt {attempt + 1} failed, retrying...")  # Log the retry attempt
-            import time
-            # time.sleep(1)  # Optional: add a delay between retries
-        
-        print(f"Failed to connect after {max_retries} attempts")
-        return None
-
-            
-
-    def play_audio_from_queue(self):
-        while True:
-            audio_file, slot = self.audio_queue.get()
-            if audio_file == "stop":
-                self.audio_queue.task_done()
-                break
-            data, sample_rate = sf.read(audio_file)
-            sd.play(data, sample_rate)
-            sd.wait()
-            os.remove(audio_file)
-            self.audio_queue.task_done()
-            self.free_slots.put(slot)
-
-    # Usually only ran if using this as a standalone script, most likely you won't be
-    def run(self, sentences):
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            for i in range(1, 6):
-                self.free_slots.put(i)
-
-            audio_thread = threading.Thread(target=self.play_audio_from_queue)
-            audio_thread.start()
-
-            # Wait for each API call to complete before starting the next one
-            for sentence in sentences:
-                future = executor.submit(self.call_api, sentence)
-                concurrent.futures.wait([future])
-
-            self.audio_queue.join()
-            self.audio_queue.put(("stop", None))
-
-def load_config():
-    current_dir = os.path.dirname(os.path.abspath(__file__))
-    yaml_file = os.path.join(current_dir, "tort.yaml")
-
-    with open(yaml_file, "r") as file:
-        tort_conf = yaml.safe_load(file)
-
-    return tort_conf
-
-import re
-
-def filter_paragraph(paragraph):
-
-    import nltk
-    if not os.path.exists('./assets'):
-        os.makedirs('./assets')
-    nltk.download('punkt', download_dir='./assets')
-    nltk.data.path.append('./assets')
-
-    # Split the paragraph into lines and process each line separately
-    lines = paragraph.split("\n")
-    
-    filtered_list = []
-    for line in lines:
-        # Tokenize sentences in the current line using nltk
-        sentences = nltk.sent_tokenize(line.strip())
-
-        # Helper function to check if a sentence ends with abbreviation followed by lowercase word
-        def ends_with_abbreviation(sentence):
-            return re.search(r'\b[A-Z](?:\.[A-Z])+[\.]?$', sentence)
-
-        i = 0
-        while i < len(sentences):
-            # Remove square brackets and strip the sentence
-            line_content = re.sub(r'\[|\]', '', sentences[i]).strip()
-
-            # Check for abbreviation and merge with the next sentence if required
-            if i < len(sentences) - 1 and ends_with_abbreviation(line_content) and sentences[i+1][0].islower():
-                line_content += " " + sentences[i+1]
-                i += 1  # Skip next sentence
-
-            # Only append lines that contain at least one alphabetic character
-            if line_content and any(c.isalpha() for c in line_content):
-                filtered_list.append(line_content)
-
-            i += 1
-
-    return filtered_list
-
-
-
-def load_sentences(file_path) -> list:
-    '''
-    Utility function for toroise to load sentences from a text file path
-
-    Args:
-        file_path(str) : path to some text file
-
-    '''
-    with open(file_path, 'r', encoding='utf-8') as file:
-        content = file.read()
-        paragraphs = content.split('\n\n')  # Split content into paragraphs
-        filtered_sentences = []
-        for paragraph in paragraphs:
-            filtered_list = filter_paragraph(paragraph)
-            filtered_sentences.extend(filtered_list)
-    return filtered_sentences
-
-def read_paragraph_from_file(file_path):
-    with open(file_path, 'r') as file:
-        paragraph = file.read()
-    return paragraph
-
-if __name__ == "__main__":
-    file_path = "story.txt"
-    paragraph = read_paragraph_from_file(file_path)
-    filtered_paragraph = filter_paragraph(paragraph)
-    player = Tortoise_API()
-    player.run(filtered_paragraph)