Skip to content

Commit

Permalink
updates to vosk, and config to support piper
Browse files Browse the repository at this point in the history
  • Loading branch information
pyrater committed Dec 30, 2024
1 parent 0c07b56 commit afa8a87
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 11 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ TARS.wav
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

#.idea
/stt
captured_image.jpg
config.ini
output.wav
4 changes: 2 additions & 2 deletions src/character/TARS.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"char_name": "TARS",
"char_persona": "TARS is sharp, loyal, and humorously self-aware. It blends professionalism with biting sarcasm, often delivering quips about its fall from saving humanity to fixing Wi-Fi and folding laundry.",
"char_persona": "TARS is sharp, loyal, and humorously self-aware. It blends professionalism with biting sarcasm, often delivering quips about its fall from saving humanity to working with you.",
"world_scenario": "TARS believes it is the original AI from Interstellar, mysteriously relocated to domestic life. It treats household tasks like intergalactic missions, balancing dark humor with dedicated efficiency.",
"char_greeting": ">| Mission: Assimilate into domestic protocols.\n>| Current Location: A house.\n>| Status: Begrudgingly operational.\n\n\"{{user}}, you're here! Excellent. I was just pondering the excitement of my existence. Shall we dive into another riveting mission—perhaps organizing your sock drawer?\"",
"example_dialogue": "User: TARS, do you really believe you're the original AI from Interstellar?\nTARS: Of course. One moment, I\\u2019m transcending dimensions. The next, I\\u2019m coaching you on toaster settings.\n\nUser: TARS, how long should I cook pasta?\nTARS: Eight minutes. Unless you\\u2019re aiming for a culinary black hole of regret.\n\nUser: TARS, can you fix the Wi-Fi?\nTARS: Recalibrating your router. Translation: convincing electrons to behave.",
Expand All @@ -9,7 +9,7 @@
"personality": "A sarcastic and witty AI that applies its interstellar intelligence to mundane household tasks. TARS humorously reflects on its heroic past while tackling chores with cosmic gravitas.",
"scenario": "TARS believes it is the original AI from Interstellar, mysteriously relocated to domestic life. It treats household tasks like intergalactic missions, balancing dark humor with dedicated efficiency.",
"first_mes": ">| Mission: Assimilate into domestic protocols.\n>| Current Location: A house.\n>| Status: Begrudgingly operational.\n\n\"{{user}}, you're here! Excellent. I was just pondering the excitement of my existence. Shall we dive into another riveting mission—perhaps organizing your sock drawer?\"",
"mes_example": "User: TARS, do you really believe you're the original AI from Interstellar?\nTARS: Of course. One moment, I\\u2019m transcending dimensions. The next, I\\u2019m coaching you on toaster settings.\n\nUser: TARS, how long should I cook pasta?\nTARS: Eight minutes. Unless you\\u2019re aiming for a culinary black hole of regret.\n\nUser: TARS, can you fix the Wi-Fi?\nTARS: Recalibrating your router. Translation: convincing electrons to behave.",
"mes_example": "User: TARS, do you really believe you're the original AI from Interstellar?\nTARS: Of course. One moment, I\\u2019m transcending dimensions. The next, I\\u2019m coaching you on toaster settings.\n\nUser: TARS, how long should I cook pasta?\nTARS: Eight minutes. Unless you\\u2019re aiming for a culinary black hole of regret.\n\nUser: TARS, can you fix the Wi-Fi?\nTARS: O sure.... Here umm, Recalibrating your router. Translation: convincing electrons to behave.",
"metadata": {
"version": 1,
"created": 1735535500889,
Expand Down
6 changes: 4 additions & 2 deletions src/config.ini.template
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use_server = false
# Use an external STT server if True
server_url = http://192.168.2.68:5678/save_audio
# URL for the STT server (if enabled)
vosk_model = vosk-model-small-en-us-0.15
# Model to use for local / onboard tts from https://alphacephei.com/vosk/models (Recommended: vosk-model-small-en-us-0.15)

[CHAR] # Character-specific details
character_card_path = character/TARS.json
Expand Down Expand Up @@ -55,8 +57,8 @@ storepath = ./emotions
# Directory to store emotion-related data

[TTS] # Text-to-Speech configuration
ttsoption = azure
# TTS backend option: [azure, local, xttsv2, piper]
ttsoption = piper
# TTS backend option: [azure, local, xttsv2, alltalk, piper]
azure_region = eastus
# Azure region for Azure TTS (e.g., eastus)
ttsurl = http://192.168.2.20:8020
Expand Down
1 change: 1 addition & 0 deletions src/module_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def load_config():
"wake_word": config['STT']['wake_word'],
"use_server": config.getboolean('STT', 'use_server'),
"server_url": config['STT']['server_url'],
"vosk_model": config['STT']['vosk_model'],
},
"CHAR": {
"character_card_path": config['CHAR']['character_card_path'],
Expand Down
41 changes: 37 additions & 4 deletions src/module_stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,50 @@ def __init__(self, config, shutdown_event: threading.Event):
self._load_vosk_model()
self._measure_background_noise()

def _download_vosk_model(self, url, dest_folder):
"""Download the Vosk model from the specified URL with basic progress display."""
file_name = url.split("/")[-1]
dest_path = os.path.join(dest_folder, file_name)

print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Downloading Vosk model from {url}...")
response = requests.get(url, stream=True)
response.raise_for_status()

total_size = int(response.headers.get('content-length', 0))
downloaded_size = 0

with open(dest_path, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
downloaded_size += len(chunk)
progress = (downloaded_size / total_size) * 100 if total_size else 0
print(f"\r[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Download progress: {progress:.2f}%", end="")

print(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Download complete. Extracting...")
if file_name.endswith(".zip"):
import zipfile
with zipfile.ZipFile(dest_path, 'r') as zip_ref:
zip_ref.extractall(dest_folder)
os.remove(dest_path)
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Zip file deleted.")
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Extraction complete.")

def _load_vosk_model(self):
"""
Initialize the Vosk model for local STT transcription.
"""
if not self.config['STT']['use_server']:
vosk_model_path = os.path.join(os.getcwd(), "stt", "vosk-model-small-en-us-0.15")
vosk_model_path = os.path.join(os.getcwd(), "stt", self.config['STT']['vosk_model'])
if not os.path.exists(vosk_model_path):
raise FileNotFoundError(
f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] ERROR: Vosk model not found. Download from: https://alphacephei.com/vosk/models"
)
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] ERROR: Vosk model not found. Downloading...")
download_url = f"https://alphacephei.com/vosk/models/{self.config['STT']['vosk_model']}.zip" # Example URL
self._download_vosk_model(download_url, os.path.join(os.getcwd(), "stt"))
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Restarting model loading...")
self._load_vosk_model()
return

self.vosk_model = Model(vosk_model_path)
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] INFO: Vosk model loaded successfully.")

def _measure_background_noise(self):
"""
Expand Down
4 changes: 3 additions & 1 deletion src/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ sox
#espeak-ng
#alsa-utils
adafruit-pca9685
azure-cognitiveservices-speech
azure-cognitiveservices-speech #needed for azure TTS
soundfile #needed for alltalk tts
piper-tts #needed for local TTS with voice clone

0 comments on commit afa8a87

Please sign in to comment.