refactor: update bark voice

gpustack · Nov 28, 2024 · f90b099 · f90b099
1 parent bd91172
commit f90b099
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 5 deletions.
diff --git a/vox_box/backends/tts/bark.py b/vox_box/backends/tts/bark.py
@@ -73,7 +73,7 @@ def speech(
             raise ValueError(f"Voice {voice} not supported")
 
         inputs = self._processor(input, voice_preset=voice).to(self._cfg.device)
-        audio_array = self._model.generate(**inputs)
+        audio_array = self._model.generate(**inputs, history_prompt=voice)
         audio_array = audio_array.cpu().numpy().squeeze()
         sample_rate = self._model.generation_config.sample_rate
 
@@ -85,11 +85,16 @@ def speech(
             return output_file_path
 
     def _get_voices(self) -> List[str]:
-        voices = []
+        voices_v1 = []
+        voices_v2 = []
         if self._speaker_json is not None:
             for key in self._speaker_json.keys():
                 if key == "repo_or_path":
                     continue
-                voices.append(key)
+                if "v2" in key:
+                    voices_v2.append(key)
+                else:
+                    voices_v1.append(key)
 
-            return voices
+        voices = voices_v2 or voices_v1
+        return sorted(voices)
diff --git a/vox_box/utils/audio.py b/vox_box/utils/audio.py
@@ -1,3 +1,4 @@
+import shutil
 import tempfile
 import av
 
@@ -30,10 +31,14 @@ def convert(
     with tempfile.NamedTemporaryFile(
         suffix=f"{suffix}", delete=False
     ) as output_temp_file:
+
         output_file_path = output_temp_file.name
+        if response_format == "wav" and speed == 1:
+            shutil.copy(input_file_path, output_file_path)
+            return output_file_path
+
         input_container = av.open(input_file_path)
         input_stream = input_container.streams.audio[0]
-
         if response_format == "pcm":
             convert_to_pcm(input_stream, output_file_path, speed)
         else: