diff --git a/nisaba/scripts/brahmic/data/Beng/mni/vowel.textproto b/nisaba/scripts/brahmic/data/Beng/mni/vowel.textproto index 903a4806..a2850691 100644 --- a/nisaba/scripts/brahmic/data/Beng/mni/vowel.textproto +++ b/nisaba/scripts/brahmic/data/Beng/mni/vowel.textproto @@ -36,15 +36,15 @@ item { uname: "E" raw: "এ" to_raw: "ē" } item { uname: "O" raw: "ও" to_raw: "ō" } # Atiya + Cheinap /ɐi/. -item { uname: "AI" raw: "ঐ" to_raw: "ai" } -item { uname: ["A", "I"] raw: "অই" to_raw: "ai" } +item { uname: "AI" raw: "ঐ" to_raw: "əi" } +item { uname: ["A", "I"] raw: "অই" to_raw: "əi" } # Atiya + Sounap /ɐu/. -item { uname: "AU" raw: "ঔ" to_raw: "au" } -item { uname: ["A", "U"] raw: "অউ" to_raw: "au" } +item { uname: "AU" raw: "ঔ" to_raw: "əu" } +item { uname: ["A", "U"] raw: "অউ" to_raw: "əu" } # Atiya + Nung /ɐŋ/. -item { uname: ["A", "ANUSVARA"] raw: "অং" to_raw: "aṁ" } +item { uname: ["A", "ANUSVARA"] raw: "অং" to_raw: "əṁ" } # TODO: Double-check. item { uname: "VOCALIC R" raw: "ঋ" to_raw: "ri" } diff --git a/nisaba/scripts/brahmic/data/Beng/mni/vowel_sign.textproto b/nisaba/scripts/brahmic/data/Beng/mni/vowel_sign.textproto index 59396388..3ff0b909 100644 --- a/nisaba/scripts/brahmic/data/Beng/mni/vowel_sign.textproto +++ b/nisaba/scripts/brahmic/data/Beng/mni/vowel_sign.textproto @@ -35,10 +35,10 @@ item { uname: "SIGN E" to_raw: "ē" } item { uname: "SIGN O" to_raw: "ō" } # Sounap /ɐu/. -item { uname: "SIGN AU" to_raw: "au" } +item { uname: "SIGN AU" to_raw: "əu" } # Cheinap /ɐi/. -item { uname: "SIGN AI" to_raw: "ai" } +item { uname: "SIGN AI" to_raw: "əi" } # TODO: Double-check. item { uname: "SIGN VOCALIC R" to_raw: "ri" } diff --git a/nisaba/scripts/brahmic/data/Mtei/vowel.textproto b/nisaba/scripts/brahmic/data/Mtei/vowel.textproto index 30f28e6e..f365f03a 100644 --- a/nisaba/scripts/brahmic/data/Mtei/vowel.textproto +++ b/nisaba/scripts/brahmic/data/Mtei/vowel.textproto @@ -28,6 +28,11 @@ item { uname: ["ATIYA", "SIGN INAP"] raw: "ꯑꯤ" to_raw: "ī" } item { uname: ["ATIYA", "SIGN UNAP"] raw: "ꯑꯨ" to_raw: "ū" } item { uname: ["ATIYA", "SIGN YENAP"] raw: "ꯑꯦ" to_raw: "ē" } item { uname: ["ATIYA", "SIGN ONAP"] raw: "ꯑꯣ" to_raw: "ō" } -item { uname: ["ATIYA", "SIGN CHEINAP"] raw: "ꯑꯩ" to_raw: "ai" } -item { uname: ["ATIYA", "SIGN SOUNAP"] raw: "ꯑꯧ" to_raw: "au" } -item { uname: ["ATIYA", "SIGN NUNG"] raw: "ꯑꯪ" to_raw: "aṁ" } + +# The "real", more accurate output representation of diphthongs below +# that matches the above output notation, is `ai`, `au` and `aṁ`. But +# we map them here to unique output symbols to avoid non-determinism +# by replacing `a` with `ə`. +item { uname: ["ATIYA", "SIGN CHEINAP"] raw: "ꯑꯩ" to_raw: "əi" } +item { uname: ["ATIYA", "SIGN SOUNAP"] raw: "ꯑꯧ" to_raw: "əu" } +item { uname: ["ATIYA", "SIGN NUNG"] raw: "ꯑꯪ" to_raw: "əṁ" } diff --git a/nisaba/scripts/brahmic/data/Mtei/vowel_sign.textproto b/nisaba/scripts/brahmic/data/Mtei/vowel_sign.textproto index 3afe5813..0d19442b 100644 --- a/nisaba/scripts/brahmic/data/Mtei/vowel_sign.textproto +++ b/nisaba/scripts/brahmic/data/Mtei/vowel_sign.textproto @@ -40,11 +40,14 @@ item { uname: "SIGN ONAP" raw: "ꯣ" to_raw: "ō" } + +# Please see the corresponding note on output mapping symbols for diphthongs +# in `vowel.textproto`. item { uname: "SIGN CHEINAP" raw: "ꯩ" - to_raw: "ai" + to_raw: "əi" } item { uname: "SIGN SOUNAP" raw: "ꯧ" - to_raw: "au" + to_raw: "əu" } diff --git a/nisaba/scripts/brahmic/testdata/iso.tsv b/nisaba/scripts/brahmic/testdata/iso.tsv index 9191f19b..15e62a4c 100644 --- a/nisaba/scripts/brahmic/testdata/iso.tsv +++ b/nisaba/scripts/brahmic/testdata/iso.tsv @@ -122,19 +122,19 @@ FROM_BENG নিঃহৗঁত niḥhăm̐ta # ------------------------------------------------------------------------------ # Meetei Mayek (Manipuri): # ------------------------------------------------------------------------------ -FROM_MTEI ꯑꯩꯅ aina +FROM_MTEI ꯑꯩꯅ əina FROM_MTEI ꯀꯣꯛ kōk' FROM_MTEI ꯁꯝ sam' FROM_MTEI ꯑꯇꯤꯌꯥ atīyā FROM_MTEI ꯆꯤꯟ cʰīn' FROM_MTEI ꯒ꯭ꯂꯥꯁ glāsa FROM_MTEI ꯀꯐꯣꯢ kapʰōi' -FROM_MTEI ꯀꯨꯁꯨꯝꯂꯩ kūsūm'lai +FROM_MTEI ꯀꯨꯁꯨꯝꯂꯩ kūsūm'ləi FROM_MTEI ꯀꯦꯇꯨꯀꯤ kētūkī FROM_MTEI ꯀꯦꯇꯦꯀꯤ kētēkī FROM_MTEI ꯀꯣꯝꯂꯥ kōm'lā FROM_MTEI ꯁꯤꯡ sīṅ' -FROM_MTEI ꯂꯩꯁꯥꯕꯤ laisābī +FROM_MTEI ꯂꯩꯁꯥꯕꯤ ləisābī FROM_MTEI ꯃꯌꯥꯡꯇꯣꯟ mayāṅ'tōn' FROM_MTEI ꯃꯦꯊꯤ mētʰī FROM_MTEI ꯄꯨꯗꯤꯅꯥ pūdīnā @@ -142,7 +142,7 @@ FROM_MTEI ꯅꯨꯁꯤꯍꯤꯗꯥꯛ nūsīhīdāk' FROM_MTEI ꯆꯝꯄ꯭ꯔꯥ cʰam'prā FROM_MTEI ꯇꯨꯜꯁꯤꯄꯝꯕꯤ tūl'sīpam'bī FROM_MTEI ꯌꯥꯢꯉꯪ yāi'ṅaṁ -FROM_MTEI ꯍꯩꯅꯧ hainau +FROM_MTEI ꯍꯩꯅꯧ həinəu FROM_MTEI ꯎꯁꯤꯡꯁꯥ usīṅ'sā FROM_MTEI ꯑꯗꯨꯔꯒꯨꯂꯥꯕ adūragūlāba FROM_MTEI ꯒꯨꯂꯥꯕ gūlāba @@ -150,19 +150,19 @@ FROM_MTEI ꯖꯥꯢꯐꯜ jāi'pʰal' FROM_MTEI ꯀꯥꯡ kāṅ' FROM_MTEI ꯀꯥ꯬ꯉ kā^ṅa FROM_MTEI ꯂꯦꯡꯖꯨꯝ lēṅ'jūm' -TO_MTEI aina ꯑꯩꯅ +TO_MTEI əina ꯑꯩꯅ TO_MTEI kōk' ꯀꯣꯛ TO_MTEI sam' ꯁꯝ TO_MTEI atīyā ꯑꯇꯤꯌꯥ TO_MTEI cʰīn' ꯆꯤꯟ TO_MTEI glāsa ꯒ꯭ꯂꯥꯁ TO_MTEI kapʰōi' ꯀꯐꯣꯢ -TO_MTEI kūsūm'lai ꯀꯨꯁꯨꯝꯂꯩ +TO_MTEI kūsūm'ləi ꯀꯨꯁꯨꯝꯂꯩ TO_MTEI kētūkī ꯀꯦꯇꯨꯀꯤ TO_MTEI kētēkī ꯀꯦꯇꯦꯀꯤ TO_MTEI kōm'lā ꯀꯣꯝꯂꯥ TO_MTEI sīṅ' ꯁꯤꯡ -TO_MTEI laisābī ꯂꯩꯁꯥꯕꯤ +TO_MTEI ləisābī ꯂꯩꯁꯥꯕꯤ TO_MTEI mayāṅ'tōn' ꯃꯌꯥꯡꯇꯣꯟ TO_MTEI mētʰī ꯃꯦꯊꯤ TO_MTEI pūdīnā ꯄꯨꯗꯤꯅꯥ @@ -170,7 +170,7 @@ TO_MTEI nūsīhīdāk' ꯅꯨꯁꯤꯍꯤꯗꯥꯛ TO_MTEI cʰam'prā ꯆꯝꯄ꯭ꯔꯥ TO_MTEI tūl'sīpam'bī ꯇꯨꯜꯁꯤꯄꯝꯕꯤ TO_MTEI yāi'ṅaṁ ꯌꯥꯢꯉꯪ -TO_MTEI hainau ꯍꯩꯅꯧ +TO_MTEI həinəu ꯍꯩꯅꯧ TO_MTEI usīṅ'sā ꯎꯁꯤꯡꯁꯥ TO_MTEI adūragūlāba ꯑꯗꯨꯔꯒꯨꯂꯥꯕ TO_MTEI gūlāba ꯒꯨꯂꯥꯕ @@ -178,6 +178,12 @@ TO_MTEI jāi'pʰal' ꯖꯥꯢꯐꯜ TO_MTEI lēṅ'jūm' ꯂꯦꯡꯖꯨꯝ TO_MTEI kā^ṅa ꯀꯥ꯬ꯉ +# Word-initial diphthongs. +FROM_MTEI ꯑꯧꯇꯗꯣꯔꯒꯤ əutadōragī +FROM_MTEI ꯑꯧꯠ əut' +TO_MTEI əutadōragī ꯑꯧꯇꯗꯣꯔꯒꯤ +TO_MTEI əut' ꯑꯧꯠ + # ------------------------------------------------------------------------------ # Baybayin (Tagalog). # ------------------------------------------------------------------------------