forked from mayeaux/generate-subtitles
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconstants.js
105 lines (91 loc) · 4.14 KB
/
constants.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
const languageNameMap = require('language-name-map/map')
/** STUFF FOR WHISPER **/
const whisperLanguagesString = 'af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,hi,hr,ht,hu,hy,id,is,it,iw,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,zh';
const whisperLanguagesHumanNames = 'Afrikaans,Albanian,Amharic,Arabic,Armenian,Assamese,Azerbaijani,Bashkir,Basque,Belarusian,Bengali,Bosnian,Breton,Bulgarian,Burmese,Castilian,Catalan,Chinese,Croatian,Czech,Danish,Dutch,English,Estonian,Faroese,Finnish,Flemish,French,Galician,Georgian,German,Greek,Gujarati,Haitian,Haitian Creole,Hausa,Hawaiian,Hebrew,Hindi,Hungarian,Icelandic,Indonesian,Italian,Japanese,Javanese,Kannada,Kazakh,Khmer,Korean,Lao,Latin,Latvian,Letzeburgesch,Lingala,Lithuanian,Luxembourgish,Macedonian,Malagasy,Malay,Malayalam,Maltese,Maori,Marathi,Moldavian,Moldovan,Mongolian,Myanmar,Nepali,Norwegian,Nynorsk,Occitan,Panjabi,Pashto,Persian,Polish,Portuguese,Punjabi,Pushto,Romanian,Russian,Sanskrit,Serbian,Shona,Sindhi,Sinhala,Sinhalese,Slovak,Slovenian,Somali,Spanish,Sundanese,Swahili,Swedish,Tagalog,Tajik,Tamil,Tatar,Telugu,Thai,Tibetan,Turkish,Turkmen,Ukrainian,Urdu,Uzbek,Valencian,Vietnamese,Welsh,Yiddish,Yoruba';
const whisperModelsString = 'tiny.en,tiny,base.en,base,small.en,small,medium.en,medium,large';
const whisperModelsArray = whisperModelsString.split(',');
const whisperLanguagesHumanReadableArray = whisperLanguagesHumanNames.split(',');
const whisperLanguagesAsSpacedString = whisperLanguagesHumanReadableArray.join(' ')
function getLanguageCodeForAllLanguages(languageName){
let foundLanguageCode;
Object.keys(languageNameMap).forEach(languageCode =>{
if(languageNameMap[languageCode].name === languageName){
foundLanguageCode = languageCode
}
});
return foundLanguageCode
}
// available models in Libretranslate
const translationLanguages = [
{"code":"ar","name":"Arabic"},
{"code":"az","name":"Azerbaijani"},
{"code":"zh","name":"Chinese"},
{"code":"cs","name":"Czech"},
{"code":"da","name":"Danish"},
{"code":"nl","name":"Dutch"},
{"code":"en","name":"English"},
{"code":"fi","name":"Finnish"},
{"code":"fr","name":"French"},
{"code":"de","name":"German"},
{"code":"el","name":"Greek"},
{"code":"he","name":"Hebrew"},
{"code":"hi","name":"Hindi"},
{"code":"hu","name":"Hungarian"},
{"code":"id","name":"Indonesian"},
{"code":"ga","name":"Irish"},
{"code":"it","name":"Italian"},
{"code":"ja","name":"Japanese"},
{"code":"ko","name":"Korean"},
{"code":"fa","name":"Persian"},
{"code":"pl","name":"Polish"},
{"code":"pt","name":"Portuguese"},
{"code":"ru","name":"Russian"},
{"code":"sk","name":"Slovak"},
{"code":"es","name":"Spanish"},
{"code":"sv","name":"Swedish"},
{"code":"tr","name":"Turkish"},
{"code":"uk","name":"Ukranian"}
];
const languagesToTranslateTo = [
// {"code":"ar","name":"Arabic"}, // haven't got these two to work
// {"code":"zh","name":"Chinese"}, // webvtt format is too broken after translate
{"code":"en","name":"English"},
{"code":"fr","name":"French"},
{"code":"de","name":"German"},
{"code":"es","name":"Spanish"},
{"code":"ru","name":"Russian"},
{"code":"ja","name":"Japanese"},
];
// if the human readable name matches thing (or the 'en' version, transcribe
const languagesToTranscribe = [
'English', //
'French',
'German',
'Spanish',
'Russian',
// 'Chinese', // can't get Chinese to work
'Japanese',
]
function shouldTranslateFrom(languageName){
return translationLanguages.find(function(filteredLanguage){
return languageName === filteredLanguage.name;
})
}
let newLanguagesMap = [];
Object.keys(languageNameMap).forEach(languageCode =>{
newLanguagesMap.push({
languageCode,
name: languageNameMap[languageCode].name
})
});
// const languagesToTranscribeFrom =
module.exports = {
whisperLanguagesHumanNames,
whisperLanguagesHumanReadableArray,
languagesToTranscribe,
whisperLanguagesAsSpacedString,
shouldTranslateFrom,
translationLanguages,
getLanguageCodeForAllLanguages,
newLanguagesMap
}