From f035f4a1321c3b459787ce1729e90f89e55dc1d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Via=C4=8Das=C5=82a=C5=AD=20Chalikin?= Date: Mon, 29 Jan 2024 11:50:45 +0300 Subject: [PATCH 1/2] Fix narrator --- accessibility.h | 7 + frontend/drivers/platform_darwin.m | 3 +- frontend/drivers/platform_unix.c | 239 ++++++++++------------- frontend/drivers/platform_win32.c | 33 +++- frontend/frontend_driver.h | 2 +- retroarch.c | 24 +-- tasks/task_translation.c | 296 +++++++++++++++-------------- 7 files changed, 291 insertions(+), 313 deletions(-) diff --git a/accessibility.h b/accessibility.h index 1d1c2a7adfe..0787225cdb4 100644 --- a/accessibility.h +++ b/accessibility.h @@ -143,6 +143,13 @@ bool accessibility_speak_priority( unsigned accessibility_narrator_speech_speed, const char* speak_text, int priority); +bool narrator_speak_priority( + bool accessibility_enable, + const char *voice, + unsigned accessibility_narrator_speech_speed, + const char *speak_text, + int priority); + access_state_t *access_state_get_ptr(void); #endif diff --git a/frontend/drivers/platform_darwin.m b/frontend/drivers/platform_darwin.m index 98167d82d9c..272dd1cd3f9 100644 --- a/frontend/drivers/platform_darwin.m +++ b/frontend/drivers/platform_darwin.m @@ -884,11 +884,10 @@ static bool is_narrator_running_macos(void) return (kill(speak_pid, 0) == 0); } -static bool accessibility_speak_macos(int speed, +static bool accessibility_speak_macos(const char* voice, int speed, const char* speak_text, int priority) { int pid; - const char *voice = get_user_language_iso639_1(false); char* language_speaker = accessibility_mac_language_code(voice); char* speeds[10] = {"80", "100", "125", "150", "170", "210", "260", "310", "380", "450"}; diff --git a/frontend/drivers/platform_unix.c b/frontend/drivers/platform_unix.c index 4f2390ee5d2..b0c82d30ce0 100644 --- a/frontend/drivers/platform_unix.c +++ b/frontend/drivers/platform_unix.c @@ -82,7 +82,6 @@ #include "../../msg_hash.h" #include "../../paths.h" #include "../../retroarch.h" -#include "../../translation_defines.h" #include "../../verbosity.h" #ifdef HAVE_MENU @@ -798,7 +797,7 @@ static void check_proc_acpi_sysfs_battery(const char *node, } fill_pathname_join_special(path, basenode, "type", sizeof(path)); - + if (!filestream_exists(path)) goto status; @@ -2789,152 +2788,106 @@ static bool is_narrator_running_unix(void) return (kill(speak_pid, 0) == 0); } -/** - * Returns the espeak-compatible string representation of the translation language enum value. - */ -static const char* espeak_get_str(enum translation_lang id) -{ - switch (id) - { - case TRANSLATION_LANG_EN: - return "en"; - case TRANSLATION_LANG_ES: - return "es"; - case TRANSLATION_LANG_FR: - return "fr"; - case TRANSLATION_LANG_IT: - return "it"; - case TRANSLATION_LANG_DE: - return "de"; - case TRANSLATION_LANG_JP: - return "ja"; - case TRANSLATION_LANG_NL: - return "nl"; - case TRANSLATION_LANG_CS: - return "cs"; - case TRANSLATION_LANG_DA: - return "da"; - case TRANSLATION_LANG_SV: - return "sv"; - case TRANSLATION_LANG_HR: - return "hr"; - case TRANSLATION_LANG_KO: - return "ko"; - case TRANSLATION_LANG_ZH_CN: - case TRANSLATION_LANG_ZH_TW: - return "cmn"; - case TRANSLATION_LANG_CA: - return "ca"; - case TRANSLATION_LANG_BG: - return "bg"; - case TRANSLATION_LANG_BN: - return "bn"; - case TRANSLATION_LANG_EU: - return "eu"; - case TRANSLATION_LANG_AZ: - return "az"; - case TRANSLATION_LANG_AR: - return "ar"; - case TRANSLATION_LANG_SQ: - return "sq"; - case TRANSLATION_LANG_AF: - return "af"; - case TRANSLATION_LANG_EO: - return "eo"; - case TRANSLATION_LANG_ET: - return "et"; - case TRANSLATION_LANG_FI: - return "fi"; - case TRANSLATION_LANG_KA: - return "ka"; - case TRANSLATION_LANG_EL: - return "el"; - case TRANSLATION_LANG_GU: - return "gu"; - case TRANSLATION_LANG_HT: - return "ht"; - case TRANSLATION_LANG_HE: - return "he"; - case TRANSLATION_LANG_HI: - return "hi"; - case TRANSLATION_LANG_HU: - return "hu"; - case TRANSLATION_LANG_IS: - return "is"; - case TRANSLATION_LANG_ID: - return "id"; - case TRANSLATION_LANG_GA: - return "ga"; - case TRANSLATION_LANG_KN: - return "kn"; - case TRANSLATION_LANG_LA: - return "la"; - case TRANSLATION_LANG_LV: - return "lv"; - case TRANSLATION_LANG_LT: - return "lt"; - case TRANSLATION_LANG_MK: - return "mk"; - case TRANSLATION_LANG_MS: - return "ms"; - case TRANSLATION_LANG_MT: - return "mt"; - case TRANSLATION_LANG_NO: - return "nb"; - case TRANSLATION_LANG_FA: - return "fa"; - case TRANSLATION_LANG_PL: - return "pl"; - case TRANSLATION_LANG_PT: - return "pt"; - case TRANSLATION_LANG_RO: - return "ro"; - case TRANSLATION_LANG_RU: - return "ru"; - case TRANSLATION_LANG_SR: - return "sr"; - case TRANSLATION_LANG_SK: - return "sk"; - case TRANSLATION_LANG_SL: - return "sl"; - case TRANSLATION_LANG_SW: - return "sw"; - case TRANSLATION_LANG_TA: - return "ta"; - case TRANSLATION_LANG_TE: - return "te"; - case TRANSLATION_LANG_TH: - return "th"; - case TRANSLATION_LANG_TR: - return "tr"; - case TRANSLATION_LANG_UK: - return "uk"; - case TRANSLATION_LANG_BE: - return "be"; - case TRANSLATION_LANG_UR: - return "ur"; - case TRANSLATION_LANG_VI: - return "vi"; - case TRANSLATION_LANG_CY: - return "cy"; - case TRANSLATION_LANG_AST: - case TRANSLATION_LANG_TL: - case TRANSLATION_LANG_GL: - case TRANSLATION_LANG_YI: - case TRANSLATION_LANG_DONT_CARE: - case TRANSLATION_LANG_LAST: - break; - } +static const char* accessibility_unix_language_code(const char* language) +{ + if ( + string_is_equal(language, "en") || + string_is_equal(language, "it") || + string_is_equal(language, "sv") || + string_is_equal(language, "fr") || + string_is_equal(language, "de") || + string_is_equal(language, "he") || + string_is_equal(language, "id") || + string_is_equal(language, "es") || + string_is_equal(language, "nl") || + string_is_equal(language, "ro") || + string_is_equal(language, "th") || + string_is_equal(language, "ja") || + string_is_equal(language, "sk") || + string_is_equal(language, "hi") || + string_is_equal(language, "ar") || + string_is_equal(language, "hu") || + string_is_equal(language, "el") || + string_is_equal(language, "ru") || + string_is_equal(language, "nb") || + string_is_equal(language, "da") || + string_is_equal(language, "fi") || + string_is_equal(language, "tr") || + string_is_equal(language, "ko") || + string_is_equal(language, "pl") || + string_is_equal(language, "cs") || + string_is_equal(language, "eo") || + string_is_equal(language, "vi") || + string_is_equal(language, "fa") || + string_is_equal(language, "uk") || + string_is_equal(language, "be") || + string_is_equal(language, "hr") || + string_is_equal(language, "bg") || + string_is_equal(language, "bn") || + string_is_equal(language, "eu") || + string_is_equal(language, "az") || + string_is_equal(language, "sq") || + string_is_equal(language, "af") || + string_is_equal(language, "et") || + string_is_equal(language, "ka") || + string_is_equal(language, "gu") || + string_is_equal(language, "ht") || + string_is_equal(language, "is") || + string_is_equal(language, "ga") || + string_is_equal(language, "kn") || + string_is_equal(language, "la") || + string_is_equal(language, "lv") || + string_is_equal(language, "lt") || + string_is_equal(language, "mk") || + string_is_equal(language, "ms") || + string_is_equal(language, "mt") || + string_is_equal(language, "sr") || + string_is_equal(language, "sl") || + string_is_equal(language, "sw") || + string_is_equal(language, "ta") || + string_is_equal(language, "te") || + string_is_equal(language, "ur") || + string_is_equal(language, "cy") + ) + return language; + else if ( + string_is_equal(language, "no") || + string_is_equal(language, "nb") + ) + return "nb"; + else if (string_is_equal(language, "en_gb")) + return "en-gb"; + else if ( + string_is_equal(language, "ca") || + string_is_equal(language, "ca_ES@valencia") + ) + return "ca"; + else if ( + string_is_equal(language, "pt_pt") || + string_is_equal(language, "pt") + ) + return "pt"; + else if (string_is_equal(language, "pt_bt")) + return "pt-br"; + else if ( + string_is_equal(language, "zh") || + string_is_equal(language, "zh_cn") || + string_is_equal(language, "zh_tw") || + string_is_equal(language, "zh-CN") || + string_is_equal(language, "zh-TW") + ) + return "cmn"; + else if (string_is_equal(language, "zh_hk")) + return "yue"; + /* default voice as fallback */ return "en"; } -static bool accessibility_speak_unix(int speed, +static bool accessibility_speak_unix(const char* voice, int speed, const char* speak_text, int priority) { int pid; - settings_t *settings = config_get_ptr(); - unsigned target_lang = settings->uints.ai_service_target_lang; - const char *language = espeak_get_str((enum translation_lang)target_lang); + const char* language = accessibility_unix_language_code(voice); char* voice_out = (char*)malloc(3 + strlen(language)); char* speed_out = (char*)malloc(3 + 3); const char* speeds[10] = {"80", "100", "125", "150", "170", "210", "260", "310", "380", "450"}; @@ -2994,7 +2947,7 @@ static bool accessibility_speak_unix(int speed, /* Tell the system that we'll ignore the exit status of the child * process. This prevents zombie processes. */ signal(SIGCHLD, SIG_IGN); - } + } } end: diff --git a/frontend/drivers/platform_win32.c b/frontend/drivers/platform_win32.c index ffccd7c8c28..2208748e643 100644 --- a/frontend/drivers/platform_win32.c +++ b/frontend/drivers/platform_win32.c @@ -831,7 +831,7 @@ static const char *accessibility_win_language_id(const char* language) return "401"; else if (string_is_equal(language,"hu")) return "040e"; - else if (string_is_equal(language,"zh_tw") || string_is_equal(language,"zh")) + else if (string_is_equal(language, "zh_tw") || string_is_equal(language,"zh")) return "804"; else if (string_is_equal(language,"el")) return "408"; @@ -896,13 +896,15 @@ static const char *accessibility_win_language_code(const char* language) return "Microsoft Naayf Desktop"; else if (string_is_equal(language,"hu")) return "Microsoft Szabolcs Desktop"; - else if (string_is_equal(language,"zh_tw") || string_is_equal(language,"zh")) + else if (string_is_equal(language, "zh_tw") + || string_is_equal(language,"zh-TW") + || string_is_equal(language,"zh")) return "Microsoft Zhiwei Desktop"; else if (string_is_equal(language,"el")) return "Microsoft Stefanos Desktop"; else if (string_is_equal(language,"ru")) return "Microsoft Pavel Desktop"; - else if (string_is_equal(language,"nb")) + else if (string_is_equal(language,"no") || string_is_equal(language,"nb")) return "Microsoft Jon Desktop"; else if (string_is_equal(language,"da")) return "Microsoft Helle Desktop"; @@ -910,7 +912,7 @@ static const char *accessibility_win_language_code(const char* language) return "Microsoft Heidi Desktop"; else if (string_is_equal(language,"zh_hk")) return "Microsoft Danny Desktop"; - else if (string_is_equal(language,"zh_cn")) + else if (string_is_equal(language,"zh_cn") || string_is_equal(language,"zh-CN")) return "Microsoft Kangkang Desktop"; else if (string_is_equal(language,"tr")) return "Microsoft Tolga Desktop"; @@ -918,8 +920,24 @@ static const char *accessibility_win_language_code(const char* language) return "Microsoft Heami Desktop"; else if (string_is_equal(language,"pl")) return "Microsoft Adam Desktop"; - else if (string_is_equal(language,"cs")) + else if (string_is_equal(language,"cs")) return "Microsoft Jakub Desktop"; + else if (string_is_equal(language,"vi")) + return "Microsoft An Desktop"; + else if (string_is_equal(language,"hr")) + return "Microsoft Matej Desktop"; + else if (string_is_equal(language,"bg")) + return "Microsoft Ivan Desktop"; + else if (string_is_equal(language,"ms")) + return "Microsoft Rizwan Desktop"; + else if (string_is_equal(language,"sl")) + return "Microsoft Lado Desktop"; + else if (string_is_equal(language,"ta")) + return "Microsoft Valluvar Desktop"; + else if (string_is_equal(language,"en_gb")) + return "Microsoft George Desktop"; + else if (string_is_equal(language,"ca") || string_is_equal(language,"ca_ES@valencia")) + return "Microsoft Herena Desktop"; return ""; } @@ -1016,11 +1034,10 @@ static bool is_narrator_running_windows(void) return false; } -static bool accessibility_speak_windows(int speed, +static bool accessibility_speak_windows(const char* voice, int speed, const char* speak_text, int priority) { char cmd[512]; - const char *voice = get_user_language_iso639_1(true); const char *language = accessibility_win_language_code(voice); const char *langid = accessibility_win_language_id(voice); bool res = false; @@ -1069,7 +1086,7 @@ static bool accessibility_speak_windows(int speed, g_plat_win32_flags |= PLAT_WIN32_FLAG_USE_POWERSHELL; if (wc) free(wc); - return accessibility_speak_windows(speed, speak_text, priority); + return accessibility_speak_windows(voice, speed, speak_text, priority); } nvdaController_cancelSpeech_func(); diff --git a/frontend/frontend_driver.h b/frontend/frontend_driver.h index 3278fedefa5..95192094398 100644 --- a/frontend/frontend_driver.h +++ b/frontend/frontend_driver.h @@ -111,7 +111,7 @@ typedef struct frontend_ctx_driver const char* (*get_cpu_model_name)(void); enum retro_language (*get_user_language)(void); bool (*is_narrator_running)(void); - bool (*accessibility_speak)(int speed, + bool (*accessibility_speak)(const char* voice, int speed, const char* speak_text, int priority); bool (*set_gamemode)(bool on); diff --git a/retroarch.c b/retroarch.c index 3a264a0b8a2..678f8f7224b 100644 --- a/retroarch.c +++ b/retroarch.c @@ -8341,6 +8341,17 @@ bool accessibility_speak_priority( bool accessibility_enable, unsigned accessibility_narrator_speech_speed, const char* speak_text, int priority) +{ + const char *voice = get_user_language_iso639_1(false); + return narrator_speak_priority(accessibility_enable, voice, accessibility_narrator_speech_speed, speak_text, priority); +} + +bool narrator_speak_priority( + bool accessibility_enable, + const char *voice, + unsigned accessibility_narrator_speech_speed, + const char *speak_text, + int priority) { access_state_t *access_st = access_state_get_ptr(); if (is_accessibility_enabled( @@ -8353,21 +8364,10 @@ bool accessibility_speak_priority( RARCH_LOG("Spoke: %s\n", speak_text); if (frontend && frontend->accessibility_speak) - return frontend->accessibility_speak(accessibility_narrator_speech_speed, speak_text, + return frontend->accessibility_speak(voice, accessibility_narrator_speech_speed, speak_text, priority); RARCH_LOG("Platform not supported for accessibility.\n"); - /* The following method is a fallback for other platforms to use the - AI Service url to do the TTS. However, since the playback is done - via the audio mixer, which only processes the audio while the - core is running, this playback method won't work. When the audio - mixer can handle playing streams while the core is paused, then - we can use this. */ -#if 0 -#if defined(HAVE_NETWORKING) - return accessibility_speak_ai_service(speak_text, voice, priority); -#endif -#endif } return true; diff --git a/tasks/task_translation.c b/tasks/task_translation.c index 5a9b37352f0..be24d278c40 100644 --- a/tasks/task_translation.c +++ b/tasks/task_translation.c @@ -110,6 +110,151 @@ typedef struct /* UTILITIES ---------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ +/** + * Returns the string representation of the translation language enum value. + */ +static const char* ai_service_get_str(enum translation_lang id) +{ + switch (id) + { + case TRANSLATION_LANG_EN: + return "en"; + case TRANSLATION_LANG_ES: + return "es"; + case TRANSLATION_LANG_FR: + return "fr"; + case TRANSLATION_LANG_IT: + return "it"; + case TRANSLATION_LANG_DE: + return "de"; + case TRANSLATION_LANG_JP: + return "ja"; + case TRANSLATION_LANG_NL: + return "nl"; + case TRANSLATION_LANG_CS: + return "cs"; + case TRANSLATION_LANG_DA: + return "da"; + case TRANSLATION_LANG_SV: + return "sv"; + case TRANSLATION_LANG_HR: + return "hr"; + case TRANSLATION_LANG_KO: + return "ko"; + case TRANSLATION_LANG_ZH_CN: + return "zh-CN"; + case TRANSLATION_LANG_ZH_TW: + return "zh-TW"; + case TRANSLATION_LANG_CA: + return "ca"; + case TRANSLATION_LANG_BG: + return "bg"; + case TRANSLATION_LANG_BN: + return "bn"; + case TRANSLATION_LANG_EU: + return "eu"; + case TRANSLATION_LANG_AZ: + return "az"; + case TRANSLATION_LANG_AR: + return "ar"; + case TRANSLATION_LANG_AST: + return "ast"; + case TRANSLATION_LANG_SQ: + return "sq"; + case TRANSLATION_LANG_AF: + return "af"; + case TRANSLATION_LANG_EO: + return "eo"; + case TRANSLATION_LANG_ET: + return "et"; + case TRANSLATION_LANG_TL: + return "tl"; + case TRANSLATION_LANG_FI: + return "fi"; + case TRANSLATION_LANG_GL: + return "gl"; + case TRANSLATION_LANG_KA: + return "ka"; + case TRANSLATION_LANG_EL: + return "el"; + case TRANSLATION_LANG_GU: + return "gu"; + case TRANSLATION_LANG_HT: + return "ht"; + case TRANSLATION_LANG_HE: + return "he"; + case TRANSLATION_LANG_HI: + return "hi"; + case TRANSLATION_LANG_HU: + return "hu"; + case TRANSLATION_LANG_IS: + return "is"; + case TRANSLATION_LANG_ID: + return "id"; + case TRANSLATION_LANG_GA: + return "ga"; + case TRANSLATION_LANG_KN: + return "kn"; + case TRANSLATION_LANG_LA: + return "la"; + case TRANSLATION_LANG_LV: + return "lv"; + case TRANSLATION_LANG_LT: + return "lt"; + case TRANSLATION_LANG_MK: + return "mk"; + case TRANSLATION_LANG_MS: + return "ms"; + case TRANSLATION_LANG_MT: + return "mt"; + case TRANSLATION_LANG_NO: + return "no"; + case TRANSLATION_LANG_FA: + return "fa"; + case TRANSLATION_LANG_PL: + return "pl"; + case TRANSLATION_LANG_PT: + return "pt"; + case TRANSLATION_LANG_RO: + return "ro"; + case TRANSLATION_LANG_RU: + return "ru"; + case TRANSLATION_LANG_SR: + return "sr"; + case TRANSLATION_LANG_SK: + return "sk"; + case TRANSLATION_LANG_SL: + return "sl"; + case TRANSLATION_LANG_SW: + return "sw"; + case TRANSLATION_LANG_TA: + return "ta"; + case TRANSLATION_LANG_TE: + return "te"; + case TRANSLATION_LANG_TH: + return "th"; + case TRANSLATION_LANG_TR: + return "tr"; + case TRANSLATION_LANG_UK: + return "uk"; + case TRANSLATION_LANG_BE: + return "be"; + case TRANSLATION_LANG_UR: + return "ur"; + case TRANSLATION_LANG_VI: + return "vi"; + case TRANSLATION_LANG_CY: + return "cy"; + case TRANSLATION_LANG_YI: + return "yi"; + case TRANSLATION_LANG_DONT_CARE: + case TRANSLATION_LANG_LAST: + break; + } + + return ""; +} + /** * Returns true if the accessibility narrator is currently playing audio. */ @@ -156,8 +301,9 @@ static void accessibility_speak(const char *text) settings_t *settings = config_get_ptr(); unsigned speed = settings->uints.accessibility_narrator_speech_speed; bool narrator_on = settings->bools.accessibility_enable; + const char* voice = ai_service_get_str(settings->uints.ai_service_target_lang); - accessibility_speak_priority(narrator_on, speed, text, 10); + narrator_speak_priority(narrator_on, voice, speed, text, 10); #endif } @@ -175,10 +321,11 @@ static void translation_speak(const char *text) unsigned mode = settings->uints.ai_service_mode; unsigned speed = settings->uints.accessibility_narrator_speech_speed; bool narrator_on = settings->bools.accessibility_enable; + const char* voice = ai_service_get_str(settings->uints.ai_service_target_lang); /* Force the use of the narrator in Narrator modes (TTS) */ if (mode == 2 || mode == 4 || mode == 5 || narrator_on || access_st->enabled) - accessibility_speak_priority(true, speed, text, 10); + narrator_speak_priority(true, voice, speed, text, 10); #endif } @@ -329,151 +476,6 @@ void translation_release(bool inform) translation_hash_info(MSG_AI_AUTO_MODE_DISABLED); } -/** - * Returns the string representation of the translation language enum value. - */ -static const char* ai_service_get_str(enum translation_lang id) -{ - switch (id) - { - case TRANSLATION_LANG_EN: - return "en"; - case TRANSLATION_LANG_ES: - return "es"; - case TRANSLATION_LANG_FR: - return "fr"; - case TRANSLATION_LANG_IT: - return "it"; - case TRANSLATION_LANG_DE: - return "de"; - case TRANSLATION_LANG_JP: - return "ja"; - case TRANSLATION_LANG_NL: - return "nl"; - case TRANSLATION_LANG_CS: - return "cs"; - case TRANSLATION_LANG_DA: - return "da"; - case TRANSLATION_LANG_SV: - return "sv"; - case TRANSLATION_LANG_HR: - return "hr"; - case TRANSLATION_LANG_KO: - return "ko"; - case TRANSLATION_LANG_ZH_CN: - return "zh-CN"; - case TRANSLATION_LANG_ZH_TW: - return "zh-TW"; - case TRANSLATION_LANG_CA: - return "ca"; - case TRANSLATION_LANG_BG: - return "bg"; - case TRANSLATION_LANG_BN: - return "bn"; - case TRANSLATION_LANG_EU: - return "eu"; - case TRANSLATION_LANG_AZ: - return "az"; - case TRANSLATION_LANG_AR: - return "ar"; - case TRANSLATION_LANG_AST: - return "ast"; - case TRANSLATION_LANG_SQ: - return "sq"; - case TRANSLATION_LANG_AF: - return "af"; - case TRANSLATION_LANG_EO: - return "eo"; - case TRANSLATION_LANG_ET: - return "et"; - case TRANSLATION_LANG_TL: - return "tl"; - case TRANSLATION_LANG_FI: - return "fi"; - case TRANSLATION_LANG_GL: - return "gl"; - case TRANSLATION_LANG_KA: - return "ka"; - case TRANSLATION_LANG_EL: - return "el"; - case TRANSLATION_LANG_GU: - return "gu"; - case TRANSLATION_LANG_HT: - return "ht"; - case TRANSLATION_LANG_HE: - return "he"; - case TRANSLATION_LANG_HI: - return "hi"; - case TRANSLATION_LANG_HU: - return "hu"; - case TRANSLATION_LANG_IS: - return "is"; - case TRANSLATION_LANG_ID: - return "id"; - case TRANSLATION_LANG_GA: - return "ga"; - case TRANSLATION_LANG_KN: - return "kn"; - case TRANSLATION_LANG_LA: - return "la"; - case TRANSLATION_LANG_LV: - return "lv"; - case TRANSLATION_LANG_LT: - return "lt"; - case TRANSLATION_LANG_MK: - return "mk"; - case TRANSLATION_LANG_MS: - return "ms"; - case TRANSLATION_LANG_MT: - return "mt"; - case TRANSLATION_LANG_NO: - return "no"; - case TRANSLATION_LANG_FA: - return "fa"; - case TRANSLATION_LANG_PL: - return "pl"; - case TRANSLATION_LANG_PT: - return "pt"; - case TRANSLATION_LANG_RO: - return "ro"; - case TRANSLATION_LANG_RU: - return "ru"; - case TRANSLATION_LANG_SR: - return "sr"; - case TRANSLATION_LANG_SK: - return "sk"; - case TRANSLATION_LANG_SL: - return "sl"; - case TRANSLATION_LANG_SW: - return "sw"; - case TRANSLATION_LANG_TA: - return "ta"; - case TRANSLATION_LANG_TE: - return "te"; - case TRANSLATION_LANG_TH: - return "th"; - case TRANSLATION_LANG_TR: - return "tr"; - case TRANSLATION_LANG_UK: - return "uk"; - case TRANSLATION_LANG_BE: - return "be"; - case TRANSLATION_LANG_UR: - return "ur"; - case TRANSLATION_LANG_VI: - return "vi"; - case TRANSLATION_LANG_CY: - return "cy"; - case TRANSLATION_LANG_YI: - return "yi"; - case TRANSLATION_LANG_DONT_CARE: - case TRANSLATION_LANG_LAST: - break; - } - - return ""; -} - /* AUTOMATION --------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ From 726693fd2eacd10be8d5882ee74dd1872b859632 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Via=C4=8Das=C5=82a=C5=AD=20Chalikin?= Date: Tue, 6 Feb 2024 14:22:50 +0300 Subject: [PATCH 2/2] Refactor accessibility --- accessibility.h | 52 +++++++++++---------- frontend/drivers/platform_darwin.m | 4 +- frontend/drivers/platform_unix.c | 6 +-- frontend/drivers/platform_win32.c | 6 +-- frontend/frontend_driver.h | 4 +- input/input_driver.c | 8 ++-- menu/menu_driver.c | 16 +++---- retroarch.c | 73 +++++++++++++++++++----------- runloop.c | 4 +- tasks/task_translation.c | 4 +- 10 files changed, 99 insertions(+), 78 deletions(-) diff --git a/accessibility.h b/accessibility.h index 0787225cdb4..dadf33fbc14 100644 --- a/accessibility.h +++ b/accessibility.h @@ -41,10 +41,10 @@ typedef struct { /* The last request task, used to prepare and send the translation */ retro_task_t *request_task; - + /* The last response task, used to parse costly translation data */ retro_task_t *response_task; - + /* Timestamp of the last translation request */ retro_time_t last_call; @@ -59,7 +59,7 @@ typedef struct /* 1 if the automatic mode has been enabled, 0 otherwise */ int ai_service_auto; - + /* Text-to-speech narrator override flag */ bool enabled; } access_state_t; @@ -71,26 +71,26 @@ bool is_narrator_running(bool accessibility_enable); #endif /* - Invoke this method to send a request to the AI service. + Invoke this method to send a request to the AI service. It makes the following POST request using URL params: – source_lang (optional): language code of the content currently running. – target_lang (optional): language of the content to return. – output: comma-separated list of formats that must be provided by the service. Also lists supported sub-formats. - + The currently supported formats are: – sound: raw audio to playback. (wav) – text: text to be read through internal text-to-speech capabilities. 'subs' can be specified on top of that to explain that we are looking for short text response in the manner of subtitles. – image: image to display on top of the video feed. Widgets will be used - first if possible, otherwise we'll try to draw it directly on the + first if possible, otherwise we'll try to draw it directly on the video buffer. (bmp, png, png-a) [All in 24-bits BGR formats] - + In addition, the request contains a JSON payload, formatted as such: – image: captured frame from the currently running content (in base64). – format: format of the captured frame ("png", or "bmp"). - – coords: array describing the coordinates of the image within the + – coords: array describing the coordinates of the image within the viewport space (x, y, width, height). – viewport: array describing the size of the viewport (width, height). – label: a text string describing the content (__). @@ -99,7 +99,7 @@ bool is_narrator_running(bool accessibility_enable); – : the name of a retropad input, valued 1 if pressed. (a, b, x, y, l, r, l2, r2, l3, r3) (up, down, left, right, start, select) - + The translation component then expects a response from the AI service in the form of a JSON payload, formatted as such: – image: base64 representation of an image in a supported format. @@ -108,47 +108,49 @@ bool is_narrator_running(bool accessibility_enable); – text_position: hint for the position of the text when the service is running in text mode (ie subtitles). Position is a number, 1 for Bottom or 2 for Top (defaults to bottom). - – press: a list of retropad input to forcibly press. On top of the + – press: a list of retropad input to forcibly press. On top of the expected keys (cf. 'state' above) values 'pause' and 'unpause' can be specified to control the flow of the content. – error: any error encountered with the request. – auto: either 'auto' or 'continue' to control automatic requests. - + All fields are optional, but at least one of them must be present. If 'error' is set, the error is shown to the user and everything else is ignored, even 'auto' settings. - + With 'auto' on 'auto', RetroArch will automatically send a new request (with a minimum delay enforced by uints.ai_service_poll_delay), with a value - of 'continue', RetroArch will ignore the returned content and skip to the + of 'continue', RetroArch will ignore the returned content and skip to the next automatic request. This allows the service to specify that the returned content is the same as the one previously sent, so RetroArch does not need to - update its display unless necessary. With 'continue' the service *must* - still send the content, as we may need to display it if the user paused the + update its display unless necessary. With 'continue' the service *must* + still send the content, as we may need to display it if the user paused the AI service for instance. - {paused} boolean is passed in to indicate if the current call was made - during a paused frame. Due to how the menu widgets work, if the AI service - is called in 'auto' mode, then this call will be made while the menu widgets + {paused} boolean is passed in to indicate if the current call was made + during a paused frame. Due to how the menu widgets work, if the AI service + is called in 'auto' mode, then this call will be made while the menu widgets unpause the core for a frame to update the on-screen widgets. To tell the AI - service what the pause mode is honestly, we store the runloop_paused + service what the pause mode is honestly, we store the runloop_paused variable from before the service wipes the widgets, and pass that in here. */ bool run_translation_service(settings_t *settings, bool paused); void translation_release(bool inform); -bool accessibility_speak_priority( +/* Proxy for calls related to menu navigation */ +bool navigation_say( bool accessibility_enable, unsigned accessibility_narrator_speech_speed, - const char* speak_text, int priority); + const char* speak_text, + int priority); -bool narrator_speak_priority( - bool accessibility_enable, - const char *voice, +/* Local platform-specific TTS */ +bool accessibility_speak_priority( unsigned accessibility_narrator_speech_speed, const char *speak_text, - int priority); + int priority, + const char* voice); access_state_t *access_state_get_ptr(void); diff --git a/frontend/drivers/platform_darwin.m b/frontend/drivers/platform_darwin.m index 272dd1cd3f9..1248d25806e 100644 --- a/frontend/drivers/platform_darwin.m +++ b/frontend/drivers/platform_darwin.m @@ -884,8 +884,8 @@ static bool is_narrator_running_macos(void) return (kill(speak_pid, 0) == 0); } -static bool accessibility_speak_macos(const char* voice, int speed, - const char* speak_text, int priority) +static bool accessibility_speak_macos(int speed, + const char* speak_text, int priority, const char* voice) { int pid; char* language_speaker = accessibility_mac_language_code(voice); diff --git a/frontend/drivers/platform_unix.c b/frontend/drivers/platform_unix.c index b0c82d30ce0..7206bb0cf7c 100644 --- a/frontend/drivers/platform_unix.c +++ b/frontend/drivers/platform_unix.c @@ -812,7 +812,7 @@ static void check_proc_acpi_sysfs_battery(const char *node, buf = NULL; } -status: +status: fill_pathname_join_special(path, basenode, "status", sizeof(path)); if (!filestream_exists(path)) @@ -2883,8 +2883,8 @@ static const char* accessibility_unix_language_code(const char* language) return "en"; } -static bool accessibility_speak_unix(const char* voice, int speed, - const char* speak_text, int priority) +static bool accessibility_speak_unix(int speed, + const char* speak_text, int priority, const char* voice) { int pid; const char* language = accessibility_unix_language_code(voice); diff --git a/frontend/drivers/platform_win32.c b/frontend/drivers/platform_win32.c index 2208748e643..a24a5fea6db 100644 --- a/frontend/drivers/platform_win32.c +++ b/frontend/drivers/platform_win32.c @@ -1034,8 +1034,8 @@ static bool is_narrator_running_windows(void) return false; } -static bool accessibility_speak_windows(const char* voice, int speed, - const char* speak_text, int priority) +static bool accessibility_speak_windows(int speed, + const char* speak_text, int priority, const char* voice) { char cmd[512]; const char *language = accessibility_win_language_code(voice); @@ -1086,7 +1086,7 @@ static bool accessibility_speak_windows(const char* voice, int speed, g_plat_win32_flags |= PLAT_WIN32_FLAG_USE_POWERSHELL; if (wc) free(wc); - return accessibility_speak_windows(voice, speed, speak_text, priority); + return accessibility_speak_windows(speed, speak_text, priority, voice); } nvdaController_cancelSpeech_func(); diff --git a/frontend/frontend_driver.h b/frontend/frontend_driver.h index 95192094398..a9634214dde 100644 --- a/frontend/frontend_driver.h +++ b/frontend/frontend_driver.h @@ -111,8 +111,8 @@ typedef struct frontend_ctx_driver const char* (*get_cpu_model_name)(void); enum retro_language (*get_user_language)(void); bool (*is_narrator_running)(void); - bool (*accessibility_speak)(const char* voice, int speed, - const char* speak_text, int priority); + bool (*accessibility_speak)(int speed, + const char* speak_text, int priority, const char* voice); bool (*set_gamemode)(bool on); const char *ident; diff --git a/input/input_driver.c b/input/input_driver.c index 74d9e74bb82..d89077187f6 100644 --- a/input/input_driver.c +++ b/input/input_driver.c @@ -5083,7 +5083,7 @@ static void input_keys_pressed( void bsv_movie_free(bsv_movie_t*); void bsv_movie_enqueue(input_driver_state_t *input_st, bsv_movie_t * state, enum bsv_flags flags) -{ +{ if (input_st->bsv_movie_state_next_handle) bsv_movie_free(input_st->bsv_movie_state_next_handle); input_st->bsv_movie_state_next_handle = state; @@ -6614,7 +6614,7 @@ void input_keyboard_event(bool down, unsigned code, say_char[1] = '\0'; if (character == 127 || character == 8) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, "backspace", 10); @@ -6622,12 +6622,12 @@ void input_keyboard_event(bool down, unsigned code, { const char *lut_name = accessibility_lut_name(c); if (lut_name) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, lut_name, 10); else if (character != 0) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, say_char, 10); diff --git a/menu/menu_driver.c b/menu/menu_driver.c index af4d8ee4a6c..171c0de8ea6 100644 --- a/menu/menu_driver.c +++ b/menu/menu_driver.c @@ -7016,7 +7016,7 @@ static int generic_menu_iterate( && is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, menu->menu_state_msg, 10); @@ -7148,18 +7148,18 @@ static int generic_menu_iterate( menu_st, current_sublabel, sizeof(current_sublabel)); if (string_is_equal(current_sublabel, "")) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, menu->menu_state_msg, 10); else - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, current_sublabel, 10); } else - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, menu->menu_state_msg, 10); @@ -7321,7 +7321,7 @@ static int generic_menu_iterate( && is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, "Closed dialog.", 10); @@ -7759,7 +7759,7 @@ int generic_menu_entry_action( } if (!string_is_empty(speak_string)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, speak_string, 10); @@ -7892,7 +7892,7 @@ bool menu_input_dialog_start_search(void) if (is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, (char*)msg_hash_to_str(MENU_ENUM_LABEL_VALUE_SEARCH), 10); @@ -7946,7 +7946,7 @@ bool menu_input_dialog_start(menu_input_ctx_line_t *line) if (is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, "Keyboard input:", 10); diff --git a/retroarch.c b/retroarch.c index 678f8f7224b..c76b426ff65 100644 --- a/retroarch.c +++ b/retroarch.c @@ -3169,7 +3169,7 @@ bool command_event(enum event_command cmd, void *data) if (is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, (char*)msg_hash_to_str(MSG_UNPAUSED), 10); @@ -4559,12 +4559,12 @@ bool command_event(enum event_command cmd, void *data) access_st->enabled)) { if (paused) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, (char*)msg_hash_to_str(MSG_PAUSED), 10); else - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, (char*)msg_hash_to_str(MSG_UNPAUSED), 10); @@ -5312,7 +5312,7 @@ bool command_event(enum event_command cmd, void *data) if (is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, (char*)msg_hash_to_str(MSG_AI_SERVICE_STOPPED), @@ -5327,7 +5327,7 @@ bool command_event(enum event_command cmd, void *data) access_st->enabled) && (ai_service_mode == 2) && is_narrator_running(accessibility_enable)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, (char*)msg_hash_to_str(MSG_AI_SERVICE_STOPPED), @@ -7386,7 +7386,7 @@ bool retroarch_main_init(int argc, char *argv[]) if (is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, (char*)msg_hash_to_str(MSG_ACCESSIBILITY_STARTUP), @@ -8337,39 +8337,58 @@ void retroarch_favorites_deinit(void) } #ifdef HAVE_ACCESSIBILITY -bool accessibility_speak_priority( +bool navigation_say( bool accessibility_enable, unsigned accessibility_narrator_speech_speed, const char* speak_text, int priority) -{ - const char *voice = get_user_language_iso639_1(false); - return narrator_speak_priority(accessibility_enable, voice, accessibility_narrator_speech_speed, speak_text, priority); -} - -bool narrator_speak_priority( - bool accessibility_enable, - const char *voice, - unsigned accessibility_narrator_speech_speed, - const char *speak_text, - int priority) { access_state_t *access_st = access_state_get_ptr(); if (is_accessibility_enabled( accessibility_enable, access_st->enabled)) { - frontend_ctx_driver_t *frontend = - frontend_state_get_ptr()->current_frontend_ctx; + const char *voice = get_user_language_iso639_1(false); + bool native_narrator = accessibility_speak_priority(accessibility_narrator_speech_speed, + speak_text, priority, voice); - RARCH_LOG("Spoke: %s\n", speak_text); - - if (frontend && frontend->accessibility_speak) - return frontend->accessibility_speak(voice, accessibility_narrator_speech_speed, speak_text, - priority); - - RARCH_LOG("Platform not supported for accessibility.\n"); + if (!native_narrator) + { + /* + * The following method is a fallback for other platforms to use the + * AI Service url to do the TTS. However, since the playback is done + * via the audio mixer, which only processes the audio while the + * core is running, this playback method won't work. When the audio + * mixer can handle playing streams while the core is paused, then + * we can use this. + */ +#if 0 +#if defined(HAVE_NETWORKING) + return accessibility_speak_ai_service(speak_text, voice, priority); +#endif +#endif + } } return true; } + +bool accessibility_speak_priority( + unsigned accessibility_narrator_speech_speed, + const char *speak_text, + int priority, + const char *voice) +{ + frontend_ctx_driver_t *frontend = + frontend_state_get_ptr()->current_frontend_ctx; + + RARCH_LOG("Spoke: %s\n", speak_text); + + if (frontend && frontend->accessibility_speak) + return frontend->accessibility_speak(accessibility_narrator_speech_speed, + speak_text, priority, voice); + + RARCH_LOG("Platform not supported for accessibility.\n"); + + return false; +} #endif diff --git a/runloop.c b/runloop.c index 23ed4a33f37..5ee7430ea9b 100644 --- a/runloop.c +++ b/runloop.c @@ -5311,7 +5311,7 @@ void runloop_msg_queue_push(const char *msg, if (is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, (char*) msg, 0); @@ -7336,7 +7336,7 @@ void runloop_task_msg_queue_push( if (is_accessibility_enabled( accessibility_enable, access_st->enabled)) - accessibility_speak_priority( + navigation_say( accessibility_enable, accessibility_narrator_speech_speed, (char*)msg, 0); diff --git a/tasks/task_translation.c b/tasks/task_translation.c index be24d278c40..b83fd9c223f 100644 --- a/tasks/task_translation.c +++ b/tasks/task_translation.c @@ -303,7 +303,7 @@ static void accessibility_speak(const char *text) bool narrator_on = settings->bools.accessibility_enable; const char* voice = ai_service_get_str(settings->uints.ai_service_target_lang); - narrator_speak_priority(narrator_on, voice, speed, text, 10); + navigation_say(narrator_on, speed, text, 10); #endif } @@ -325,7 +325,7 @@ static void translation_speak(const char *text) /* Force the use of the narrator in Narrator modes (TTS) */ if (mode == 2 || mode == 4 || mode == 5 || narrator_on || access_st->enabled) - narrator_speak_priority(true, voice, speed, text, 10); + accessibility_speak_priority(speed, text, 10, voice); #endif }