Skip to content

Commit

Permalink
feat: support get model languages
Browse files Browse the repository at this point in the history
  • Loading branch information
aiwantaozi committed Nov 28, 2024
1 parent 9337460 commit ec57b83
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 0 deletions.
108 changes: 108 additions & 0 deletions vox_box/backends/stt/faster_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,13 @@ def load(self):
compute_type=compute_type,
)

self._languages = self._get_languages()

self._model_dict = create_model_dict(
self._cfg.model,
task_type=TaskTypeEnum.STT,
backend_framework=BackendEnum.FASTER_WHISPER,
languages=self._languages,
)
self.model_load = True
return self
Expand Down Expand Up @@ -146,3 +149,108 @@ def transcribe(
response["segments"] = timestamps

return response

def _get_languages(self) -> List[Dict]:
return [
{"auto": "auto"},
{"en": "english"},
{"zh": "chinese"},
{"de": "german"},
{"es": "spanish"},
{"ru": "russian"},
{"ko": "korean"},
{"fr": "french"},
{"ja": "japanese"},
{"pt": "portuguese"},
{"pl": "polish"},
{"ca": "catalan"},
{"nl": "dutch"},
{"it": "italian"},
{"th": "thai"},
{"tr": "turkish"},
{"ar": "arabic"},
{"sv": "swedish"},
{"id": "indonesian"},
{"hi": "hindi"},
{"fi": "finnish"},
{"vi": "vietnamese"},
{"he": "hebrew"},
{"uk": "ukrainian"},
{"el": "greek"},
{"ms": "malay"},
{"cs": "czech"},
{"ro": "romanian"},
{"da": "danish"},
{"hu": "hungarian"},
{"ta": "tamil"},
{"no": "norwegian"},
{"ur": "urdu"},
{"hr": "croatian"},
{"bg": "bulgarian"},
{"lt": "lithuanian"},
{"la": "latin"},
{"mi": "maori"},
{"ml": "malayalam"},
{"cy": "welsh"},
{"sk": "slovak"},
{"te": "telugu"},
{"fa": "persian"},
{"lv": "latvian"},
{"bn": "bengali"},
{"sr": "serbian"},
{"az": "azerbaijani"},
{"sl": "slovenian"},
{"kn": "kannada"},
{"et": "estonian"},
{"mk": "macedonian"},
{"br": "breton"},
{"eu": "basque"},
{"is": "icelandic"},
{"hy": "armenian"},
{"ne": "nepali"},
{"mn": "mongolian"},
{"bs": "bosnian"},
{"kk": "kazakh"},
{"sq": "albanian"},
{"sw": "swahili"},
{"gl": "galician"},
{"mr": "marathi"},
{"pa": "punjabi"},
{"si": "sinhala"},
{"km": "khmer"},
{"sn": "shona"},
{"yo": "yoruba"},
{"so": "somali"},
{"af": "afrikaans"},
{"oc": "occitan"},
{"ka": "georgian"},
{"be": "belarusian"},
{"tg": "tajik"},
{"sd": "sindhi"},
{"gu": "gujarati"},
{"am": "amharic"},
{"yi": "yiddish"},
{"lo": "lao"},
{"uz": "uzbek"},
{"fo": "faroese"},
{"ht": "haitian creole"},
{"ps": "pashto"},
{"tk": "turkmen"},
{"nn": "nynorsk"},
{"mt": "maltese"},
{"sa": "sanskrit"},
{"lb": "luxembourgish"},
{"my": "myanmar"},
{"bo": "tibetan"},
{"tl": "tagalog"},
{"mg": "malagasy"},
{"as": "assamese"},
{"tt": "tatar"},
{"haw": "hawaiian"},
{"ln": "lingala"},
{"ha": "hausa"},
{"ba": "bashkir"},
{"jw": "javanese"},
{"su": "sundanese"},
{"yue": "cantonese"},
]
9 changes: 9 additions & 0 deletions vox_box/backends/stt/funasr.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@ def load(self):
log_level=self._log_level,
disable_update=True,
)

self._languages = self._get_languages()

self._model_dict = create_model_dict(
self._cfg.model,
task_type=TaskTypeEnum.STT,
backend_framework=BackendEnum.FUN_ASR,
languages=self._languages,
)
self._model_load = True
return self
Expand Down Expand Up @@ -106,3 +110,8 @@ def transcribe(

text = rich_transcription_postprocess(res[0]["text"])
return text

def _get_languages(self) -> List[Dict]:
return [
{"auto": "auto"},
]
10 changes: 10 additions & 0 deletions vox_box/server/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,16 @@ async def get_model_info(model_id: str):
return model_instance.model_info()


@router.get("/v1/languages")
async def get_languages():
model_instance = get_model_instance()
if model_instance is None:
return {}
return {
"languages": model_instance.model_info().get("languages", []),
}


@router.get("/v1/voices")
async def get_voice():
model_instance = get_model_instance()
Expand Down

0 comments on commit ec57b83

Please sign in to comment.