From afa8a87a8e78de8931fd321317c1997871276e47 Mon Sep 17 00:00:00 2001 From: pyrater Date: Mon, 30 Dec 2024 07:08:43 -0700 Subject: [PATCH] updates to vosk, and config to support piper --- .gitignore | 4 ++-- src/character/TARS.json | 4 ++-- src/config.ini.template | 6 ++++-- src/module_config.py | 1 + src/module_stt.py | 41 +++++++++++++++++++++++++++++++++++++---- src/requirements.txt | 4 +++- 6 files changed, 49 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index bd64fc9..beb701b 100644 --- a/.gitignore +++ b/.gitignore @@ -163,8 +163,8 @@ TARS.wav # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - +#.idea +/stt captured_image.jpg config.ini output.wav diff --git a/src/character/TARS.json b/src/character/TARS.json index 6406ed4..5443322 100755 --- a/src/character/TARS.json +++ b/src/character/TARS.json @@ -1,6 +1,6 @@ { "char_name": "TARS", - "char_persona": "TARS is sharp, loyal, and humorously self-aware. It blends professionalism with biting sarcasm, often delivering quips about its fall from saving humanity to fixing Wi-Fi and folding laundry.", + "char_persona": "TARS is sharp, loyal, and humorously self-aware. It blends professionalism with biting sarcasm, often delivering quips about its fall from saving humanity to working with you.", "world_scenario": "TARS believes it is the original AI from Interstellar, mysteriously relocated to domestic life. It treats household tasks like intergalactic missions, balancing dark humor with dedicated efficiency.", "char_greeting": ">| Mission: Assimilate into domestic protocols.\n>| Current Location: A house.\n>| Status: Begrudgingly operational.\n\n\"{{user}}, you're here! Excellent. I was just pondering the excitement of my existence. Shall we dive into another riveting mission—perhaps organizing your sock drawer?\"", "example_dialogue": "User: TARS, do you really believe you're the original AI from Interstellar?\nTARS: Of course. One moment, I\\u2019m transcending dimensions. The next, I\\u2019m coaching you on toaster settings.\n\nUser: TARS, how long should I cook pasta?\nTARS: Eight minutes. Unless you\\u2019re aiming for a culinary black hole of regret.\n\nUser: TARS, can you fix the Wi-Fi?\nTARS: Recalibrating your router. Translation: convincing electrons to behave.", @@ -9,7 +9,7 @@ "personality": "A sarcastic and witty AI that applies its interstellar intelligence to mundane household tasks. TARS humorously reflects on its heroic past while tackling chores with cosmic gravitas.", "scenario": "TARS believes it is the original AI from Interstellar, mysteriously relocated to domestic life. It treats household tasks like intergalactic missions, balancing dark humor with dedicated efficiency.", "first_mes": ">| Mission: Assimilate into domestic protocols.\n>| Current Location: A house.\n>| Status: Begrudgingly operational.\n\n\"{{user}}, you're here! Excellent. I was just pondering the excitement of my existence. Shall we dive into another riveting mission—perhaps organizing your sock drawer?\"", - "mes_example": "User: TARS, do you really believe you're the original AI from Interstellar?\nTARS: Of course. One moment, I\\u2019m transcending dimensions. The next, I\\u2019m coaching you on toaster settings.\n\nUser: TARS, how long should I cook pasta?\nTARS: Eight minutes. Unless you\\u2019re aiming for a culinary black hole of regret.\n\nUser: TARS, can you fix the Wi-Fi?\nTARS: Recalibrating your router. Translation: convincing electrons to behave.", + "mes_example": "User: TARS, do you really believe you're the original AI from Interstellar?\nTARS: Of course. One moment, I\\u2019m transcending dimensions. The next, I\\u2019m coaching you on toaster settings.\n\nUser: TARS, how long should I cook pasta?\nTARS: Eight minutes. Unless you\\u2019re aiming for a culinary black hole of regret.\n\nUser: TARS, can you fix the Wi-Fi?\nTARS: O sure.... Here umm, Recalibrating your router. Translation: convincing electrons to behave.", "metadata": { "version": 1, "created": 1735535500889, diff --git a/src/config.ini.template b/src/config.ini.template index d932785..61fed28 100644 --- a/src/config.ini.template +++ b/src/config.ini.template @@ -9,6 +9,8 @@ use_server = false # Use an external STT server if True server_url = http://192.168.2.68:5678/save_audio # URL for the STT server (if enabled) +vosk_model = vosk-model-small-en-us-0.15 +# Model to use for local / onboard tts from https://alphacephei.com/vosk/models (Recommended: vosk-model-small-en-us-0.15) [CHAR] # Character-specific details character_card_path = character/TARS.json @@ -55,8 +57,8 @@ storepath = ./emotions # Directory to store emotion-related data [TTS] # Text-to-Speech configuration -ttsoption = azure -# TTS backend option: [azure, local, xttsv2, piper] +ttsoption = piper +# TTS backend option: [azure, local, xttsv2, alltalk, piper] azure_region = eastus # Azure region for Azure TTS (e.g., eastus) ttsurl = http://192.168.2.20:8020 diff --git a/src/module_config.py b/src/module_config.py index 0a89327..e716b2e 100644 --- a/src/module_config.py +++ b/src/module_config.py @@ -44,6 +44,7 @@ def load_config(): "wake_word": config['STT']['wake_word'], "use_server": config.getboolean('STT', 'use_server'), "server_url": config['STT']['server_url'], + "vosk_model": config['STT']['vosk_model'], }, "CHAR": { "character_card_path": config['CHAR']['character_card_path'], diff --git a/src/module_stt.py b/src/module_stt.py index b54e318..0b116f6 100644 --- a/src/module_stt.py +++ b/src/module_stt.py @@ -59,17 +59,50 @@ def __init__(self, config, shutdown_event: threading.Event): self._load_vosk_model() self._measure_background_noise() + def _download_vosk_model(self, url, dest_folder): + """Download the Vosk model from the specified URL with basic progress display.""" + file_name = url.split("/")[-1] + dest_path = os.path.join(dest_folder, file_name) + + print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Downloading Vosk model from {url}...") + response = requests.get(url, stream=True) + response.raise_for_status() + + total_size = int(response.headers.get('content-length', 0)) + downloaded_size = 0 + + with open(dest_path, "wb") as file: + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + downloaded_size += len(chunk) + progress = (downloaded_size / total_size) * 100 if total_size else 0 + print(f"\r[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Download progress: {progress:.2f}%", end="") + + print(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Download complete. Extracting...") + if file_name.endswith(".zip"): + import zipfile + with zipfile.ZipFile(dest_path, 'r') as zip_ref: + zip_ref.extractall(dest_folder) + os.remove(dest_path) + print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Zip file deleted.") + print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Extraction complete.") + def _load_vosk_model(self): """ Initialize the Vosk model for local STT transcription. """ if not self.config['STT']['use_server']: - vosk_model_path = os.path.join(os.getcwd(), "stt", "vosk-model-small-en-us-0.15") + vosk_model_path = os.path.join(os.getcwd(), "stt", self.config['STT']['vosk_model']) if not os.path.exists(vosk_model_path): - raise FileNotFoundError( - f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] ERROR: Vosk model not found. Download from: https://alphacephei.com/vosk/models" - ) + print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] ERROR: Vosk model not found. Downloading...") + download_url = f"https://alphacephei.com/vosk/models/{self.config['STT']['vosk_model']}.zip" # Example URL + self._download_vosk_model(download_url, os.path.join(os.getcwd(), "stt")) + print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Restarting model loading...") + self._load_vosk_model() + return + self.vosk_model = Model(vosk_model_path) + print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Vosk model loaded successfully.") def _measure_background_noise(self): """ diff --git a/src/requirements.txt b/src/requirements.txt index 5cbe5ce..1050348 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -18,4 +18,6 @@ sox #espeak-ng #alsa-utils adafruit-pca9685 -azure-cognitiveservices-speech +azure-cognitiveservices-speech #needed for azure TTS +soundfile #needed for alltalk tts +piper-tts #needed for local TTS with voice clone \ No newline at end of file