Skip to content

Commit

Permalink
refactor: update bark voice
Browse files Browse the repository at this point in the history
  • Loading branch information
aiwantaozi committed Nov 28, 2024
1 parent bd91172 commit f90b099
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
13 changes: 9 additions & 4 deletions vox_box/backends/tts/bark.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def speech(
raise ValueError(f"Voice {voice} not supported")

inputs = self._processor(input, voice_preset=voice).to(self._cfg.device)
audio_array = self._model.generate(**inputs)
audio_array = self._model.generate(**inputs, history_prompt=voice)
audio_array = audio_array.cpu().numpy().squeeze()
sample_rate = self._model.generation_config.sample_rate

Expand All @@ -85,11 +85,16 @@ def speech(
return output_file_path

def _get_voices(self) -> List[str]:
voices = []
voices_v1 = []
voices_v2 = []
if self._speaker_json is not None:
for key in self._speaker_json.keys():
if key == "repo_or_path":
continue
voices.append(key)
if "v2" in key:
voices_v2.append(key)
else:
voices_v1.append(key)

return voices
voices = voices_v2 or voices_v1
return sorted(voices)
7 changes: 6 additions & 1 deletion vox_box/utils/audio.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import shutil
import tempfile
import av

Expand Down Expand Up @@ -30,10 +31,14 @@ def convert(
with tempfile.NamedTemporaryFile(
suffix=f"{suffix}", delete=False
) as output_temp_file:

output_file_path = output_temp_file.name
if response_format == "wav" and speed == 1:
shutil.copy(input_file_path, output_file_path)
return output_file_path

input_container = av.open(input_file_path)
input_stream = input_container.streams.audio[0]

if response_format == "pcm":
convert_to_pcm(input_stream, output_file_path, speed)
else:
Expand Down

0 comments on commit f90b099

Please sign in to comment.