Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync with 2.8.1 #81

Merged
merged 3 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions scripts/gen-tag-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import collections
from html.parser import HTMLParser
from html import unescape
import html
import io
import itertools
import re
Expand Down Expand Up @@ -371,10 +371,10 @@ def handle_data(self, data):
self._current_tr[-1] += data

def handle_charref(self, name):
self.handle_data(html_unescape(self, '&#%s;' % name))
self.handle_data(html.unescape('&#%s;' % name))

def handle_entityref(self, name):
self.handle_data(html_unescape(self, '&%s;' % name))
self.handle_data(html.unescape('&%s;' % name))

def parse(self, filename):
"""Parse the OpenType language system tag registry.
Expand Down
4 changes: 2 additions & 2 deletions src/fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ fn recategorize_combining_class(u: u32, mut class: u8) -> u8 {
mcc::CCC15 => Class::Below as u8, // tsere
mcc::CCC16 => Class::Below as u8, // segol
mcc::CCC17 => Class::Below as u8, // patah
mcc::CCC18 => Class::Below as u8, // qamats
mcc::CCC18 => Class::Below as u8, // qamats & qamats qatan
mcc::CCC20 => Class::Below as u8, // qubuts
mcc::CCC22 => Class::Below as u8, // meteg
mcc::CCC23 => Class::AttachedAbove as u8, // rafe
mcc::CCC24 => Class::AboveRight as u8, // shin dot
mcc::CCC25 => Class::AboveLeft as u8, // sin dot
mcc::CCC19 => Class::AboveLeft as u8, // holam
mcc::CCC19 => Class::AboveLeft as u8, // holam & holam haser for vav
mcc::CCC26 => Class::Above as u8, // point varika
mcc::CCC21 => class, // dagesh

Expand Down
14 changes: 7 additions & 7 deletions src/tag_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "dik", tag: Tag::from_bytes(b"DNK ") }, // Southwestern Dinka -> Dinka
LangTag { language: "din", tag: Tag::from_bytes(b"DNK ") }, // Dinka [macrolanguage]
LangTag { language: "dip", tag: Tag::from_bytes(b"DNK ") }, // Northeastern Dinka -> Dinka
LangTag { language: "diq", tag: Tag::from_bytes(b"DIQ ") }, // Dimli
LangTag { language: "diq", tag: Tag::from_bytes(b"DIQ ") }, // Dimli
LangTag { language: "diq", tag: Tag::from_bytes(b"ZZA ") }, // Dimli -> Zazaki
LangTag { language: "diw", tag: Tag::from_bytes(b"DNK ") }, // Northwestern Dinka -> Dinka
LangTag { language: "dje", tag: Tag::from_bytes(b"DJR ") }, // Zarma
Expand Down Expand Up @@ -700,7 +700,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
// LangTag { language: "khw", tag: Tag::from_bytes(b"KHW ") }, // Khowar
LangTag { language: "ki", tag: Tag::from_bytes(b"KIK ") }, // Kikuyu (Gikuyu)
LangTag { language: "kis", tag: Tag(0) }, // Kis != Kisii
LangTag { language: "kiu", tag: Tag::from_bytes(b"KIU ") }, // Kirmanjki
LangTag { language: "kiu", tag: Tag::from_bytes(b"KIU ") }, // Kirmanjki
LangTag { language: "kiu", tag: Tag::from_bytes(b"ZZA ") }, // Kirmanjki -> Zazaki
LangTag { language: "kj", tag: Tag::from_bytes(b"KUA ") }, // Kuanyama
LangTag { language: "kjb", tag: Tag::from_bytes(b"MYN ") }, // Q'anjob'al -> Mayan
Expand Down Expand Up @@ -729,7 +729,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "knc", tag: Tag::from_bytes(b"KNR ") }, // Central Kanuri -> Kanuri
LangTag { language: "kng", tag: Tag::from_bytes(b"KON0") }, // Koongo -> Kongo
LangTag { language: "knj", tag: Tag::from_bytes(b"MYN ") }, // Western Kanjobal -> Mayan
LangTag { language: "knn", tag: Tag::from_bytes(b"KOK ") }, // Konkani
LangTag { language: "knn", tag: Tag::from_bytes(b"KOK ") }, // Konkani
LangTag { language: "knr", tag: Tag(0) }, // Kaningra != Kanuri
LangTag { language: "ko", tag: Tag::from_bytes(b"KOR ") }, // Korean
LangTag { language: "ko", tag: Tag::from_bytes(b"KOH ") }, // Korean -> Korean Old Hangul
Expand Down Expand Up @@ -1046,7 +1046,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
// LangTag { language: "noe", tag: Tag::from_bytes(b"NOE ") }, // Nimadi
// LangTag { language: "nog", tag: Tag::from_bytes(b"NOG ") }, // Nogai
// LangTag { language: "nov", tag: Tag::from_bytes(b"NOV ") }, // Novial
LangTag { language: "npi", tag: Tag::from_bytes(b"NEP ") }, // Nepali
LangTag { language: "npi", tag: Tag::from_bytes(b"NEP ") }, // Nepali
LangTag { language: "npl", tag: Tag::from_bytes(b"NAH ") }, // Southeastern Puebla Nahuatl -> Nahuatl
LangTag { language: "nqo", tag: Tag::from_bytes(b"NKO ") }, // N’Ko
LangTag { language: "nr", tag: Tag::from_bytes(b"NDB ") }, // South Ndebele -> Ndebele
Expand Down Expand Up @@ -1376,7 +1376,7 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "sw", tag: Tag::from_bytes(b"SWK ") }, // Swahili [macrolanguage]
LangTag { language: "swb", tag: Tag::from_bytes(b"CMR ") }, // Maore Comorian -> Comorian
LangTag { language: "swc", tag: Tag::from_bytes(b"SWK ") }, // Congo Swahili -> Swahili
LangTag { language: "swh", tag: Tag::from_bytes(b"SWK ") }, // Swahili
LangTag { language: "swh", tag: Tag::from_bytes(b"SWK ") }, // Swahili
LangTag { language: "swk", tag: Tag(0) }, // Malawi Sena != Swahili
LangTag { language: "swn", tag: Tag::from_bytes(b"BBR ") }, // Sawknah -> Berber
LangTag { language: "swv", tag: Tag::from_bytes(b"MAW ") }, // Shekhawati -> Marwari
Expand Down Expand Up @@ -1592,13 +1592,13 @@ pub const OPEN_TYPE_LANGUAGES: &[LangTag] = &[
LangTag { language: "zhn", tag: Tag::from_bytes(b"ZHA ") }, // Nong Zhuang -> Zhuang
LangTag { language: "zkb", tag: Tag::from_bytes(b"KHA ") }, // Koibal(retired code) -> Khakass
LangTag { language: "zlj", tag: Tag::from_bytes(b"ZHA ") }, // Liujiang Zhuang -> Zhuang
LangTag { language: "zlm", tag: Tag::from_bytes(b"MLY ") }, // Malay
LangTag { language: "zlm", tag: Tag::from_bytes(b"MLY ") }, // Malay
LangTag { language: "zln", tag: Tag::from_bytes(b"ZHA ") }, // Lianshan Zhuang -> Zhuang
LangTag { language: "zlq", tag: Tag::from_bytes(b"ZHA ") }, // Liuqian Zhuang -> Zhuang
LangTag { language: "zmi", tag: Tag::from_bytes(b"MLY ") }, // Negeri Sembilan Malay -> Malay
LangTag { language: "zmz", tag: Tag::from_bytes(b"BAD0") }, // Mbandja -> Banda
LangTag { language: "znd", tag: Tag(0) }, // Zande [family] != Zande
LangTag { language: "zne", tag: Tag::from_bytes(b"ZND ") }, // Zande
LangTag { language: "zne", tag: Tag::from_bytes(b"ZND ") }, // Zande
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extra space?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I have no idea why rustfmt doesn’t strip the trailing space from the comment.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just remove them manually for now. In my editor I have "remove trailing whitespaces" feature, so it's being trimmed automatically.

LangTag { language: "zom", tag: Tag::from_bytes(b"QIN ") }, // Zou -> Chin
LangTag { language: "zqe", tag: Tag::from_bytes(b"ZHA ") }, // Qiubei Zhuang -> Zhuang
LangTag { language: "zsm", tag: Tag::from_bytes(b"MLY ") }, // Standard Malay -> Malay
Expand Down
4 changes: 2 additions & 2 deletions src/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ pub mod modified_combining_class {
pub const CCC15: u8 = 18; // tsere
pub const CCC16: u8 = 19; // segol
pub const CCC17: u8 = 20; // patah
pub const CCC18: u8 = 21; // qamats
pub const CCC19: u8 = 14; // holam
pub const CCC18: u8 = 21; // qamats & qamats qatan
pub const CCC19: u8 = 14; // holam & holam haser for vav
pub const CCC20: u8 = 24; // qubuts
pub const CCC21: u8 = 12; // dagesh
pub const CCC22: u8 = 25; // meteg
Expand Down