diff --git a/vox_box/backends/stt/faster_whisper.py b/vox_box/backends/stt/faster_whisper.py index 2354397..c4fdedd 100644 --- a/vox_box/backends/stt/faster_whisper.py +++ b/vox_box/backends/stt/faster_whisper.py @@ -61,10 +61,13 @@ def load(self): compute_type=compute_type, ) + self._languages = self._get_languages() + self._model_dict = create_model_dict( self._cfg.model, task_type=TaskTypeEnum.STT, backend_framework=BackendEnum.FASTER_WHISPER, + languages=self._languages, ) self.model_load = True return self @@ -146,3 +149,108 @@ def transcribe( response["segments"] = timestamps return response + + def _get_languages(self) -> List[Dict]: + return [ + {"auto": "auto"}, + {"en": "english"}, + {"zh": "chinese"}, + {"de": "german"}, + {"es": "spanish"}, + {"ru": "russian"}, + {"ko": "korean"}, + {"fr": "french"}, + {"ja": "japanese"}, + {"pt": "portuguese"}, + {"pl": "polish"}, + {"ca": "catalan"}, + {"nl": "dutch"}, + {"it": "italian"}, + {"th": "thai"}, + {"tr": "turkish"}, + {"ar": "arabic"}, + {"sv": "swedish"}, + {"id": "indonesian"}, + {"hi": "hindi"}, + {"fi": "finnish"}, + {"vi": "vietnamese"}, + {"he": "hebrew"}, + {"uk": "ukrainian"}, + {"el": "greek"}, + {"ms": "malay"}, + {"cs": "czech"}, + {"ro": "romanian"}, + {"da": "danish"}, + {"hu": "hungarian"}, + {"ta": "tamil"}, + {"no": "norwegian"}, + {"ur": "urdu"}, + {"hr": "croatian"}, + {"bg": "bulgarian"}, + {"lt": "lithuanian"}, + {"la": "latin"}, + {"mi": "maori"}, + {"ml": "malayalam"}, + {"cy": "welsh"}, + {"sk": "slovak"}, + {"te": "telugu"}, + {"fa": "persian"}, + {"lv": "latvian"}, + {"bn": "bengali"}, + {"sr": "serbian"}, + {"az": "azerbaijani"}, + {"sl": "slovenian"}, + {"kn": "kannada"}, + {"et": "estonian"}, + {"mk": "macedonian"}, + {"br": "breton"}, + {"eu": "basque"}, + {"is": "icelandic"}, + {"hy": "armenian"}, + {"ne": "nepali"}, + {"mn": "mongolian"}, + {"bs": "bosnian"}, + {"kk": "kazakh"}, + {"sq": "albanian"}, + {"sw": "swahili"}, + {"gl": "galician"}, + {"mr": "marathi"}, + {"pa": "punjabi"}, + {"si": "sinhala"}, + {"km": "khmer"}, + {"sn": "shona"}, + {"yo": "yoruba"}, + {"so": "somali"}, + {"af": "afrikaans"}, + {"oc": "occitan"}, + {"ka": "georgian"}, + {"be": "belarusian"}, + {"tg": "tajik"}, + {"sd": "sindhi"}, + {"gu": "gujarati"}, + {"am": "amharic"}, + {"yi": "yiddish"}, + {"lo": "lao"}, + {"uz": "uzbek"}, + {"fo": "faroese"}, + {"ht": "haitian creole"}, + {"ps": "pashto"}, + {"tk": "turkmen"}, + {"nn": "nynorsk"}, + {"mt": "maltese"}, + {"sa": "sanskrit"}, + {"lb": "luxembourgish"}, + {"my": "myanmar"}, + {"bo": "tibetan"}, + {"tl": "tagalog"}, + {"mg": "malagasy"}, + {"as": "assamese"}, + {"tt": "tatar"}, + {"haw": "hawaiian"}, + {"ln": "lingala"}, + {"ha": "hausa"}, + {"ba": "bashkir"}, + {"jw": "javanese"}, + {"su": "sundanese"}, + {"yue": "cantonese"}, + ] diff --git a/vox_box/backends/stt/funasr.py b/vox_box/backends/stt/funasr.py index 1bf246e..d38b5b5 100644 --- a/vox_box/backends/stt/funasr.py +++ b/vox_box/backends/stt/funasr.py @@ -50,10 +50,14 @@ def load(self): log_level=self._log_level, disable_update=True, ) + + self._languages = self._get_languages() + self._model_dict = create_model_dict( self._cfg.model, task_type=TaskTypeEnum.STT, backend_framework=BackendEnum.FUN_ASR, + languages=self._languages, ) self._model_load = True return self @@ -106,3 +110,8 @@ def transcribe( text = rich_transcription_postprocess(res[0]["text"]) return text + + def _get_languages(self) -> List[Dict]: + return [ + {"auto": "auto"}, + ] diff --git a/vox_box/server/routers.py b/vox_box/server/routers.py index c8d40a6..1354320 100644 --- a/vox_box/server/routers.py +++ b/vox_box/server/routers.py @@ -188,6 +188,16 @@ async def get_model_info(model_id: str): return model_instance.model_info() +@router.get("/v1/languages") +async def get_languages(): + model_instance = get_model_instance() + if model_instance is None: + return {} + return { + "languages": model_instance.model_info().get("languages", []), + } + + @router.get("/v1/voices") async def get_voice(): model_instance = get_model_instance()