From d5fb8a9c5d3871e6d3d34f57d6db7cff2dd54f53 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:12:14 +0200 Subject: [PATCH 01/29] [cmap] Fix macroman lookup --- src/hb/face.rs | 2 +- tests/shaping/text_rendering_tests.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/hb/face.rs b/src/hb/face.rs index 0c6b6b35..847eaada 100644 --- a/src/hb/face.rs +++ b/src/hb/face.rs @@ -444,5 +444,5 @@ fn unicode_to_macroman(c: u32) -> u32 { let Some(index) = UNICODE_TO_MACROMAN.iter().position(|m| *m == u) else { return 0; }; - (0x7F + index) as u32 + (0x80 + index) as u32 } diff --git a/tests/shaping/text_rendering_tests.rs b/tests/shaping/text_rendering_tests.rs index 4f025ade..c8ca4484 100644 --- a/tests/shaping/text_rendering_tests.rs +++ b/tests/shaping/text_rendering_tests.rs @@ -706,7 +706,7 @@ fn cmap_3_001() { "\u{201C}", "--ned --remove-default-ignorables", ), - "gid196" + "gid200" ); } @@ -742,7 +742,7 @@ fn cmap_3_004() { "\u{00C7}", "--ned --remove-default-ignorables", ), - "gid124" + "gid126" ); } @@ -790,7 +790,7 @@ fn cmap_3_008() { "\u{00D6}", "--ned --remove-default-ignorables", ), - "gid135" + "gid140" ); } @@ -814,7 +814,7 @@ fn cmap_3_010() { "\u{00DC}", "--ned --remove-default-ignorables", ), - "gid140" + "gid145" ); } @@ -826,7 +826,7 @@ fn cmap_3_011() { "\u{201D}", "--ned --remove-default-ignorables", ), - "gid200" + "gid201" ); } @@ -862,7 +862,7 @@ fn cmap_3_014() { "\u{00E7}", "--ned --remove-default-ignorables", ), - "gid152" + "gid154" ); } From 1fad5cbeae8be40455082c3f2c7435986fe6d3f8 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:13:53 +0200 Subject: [PATCH 02/29] Readd broken cmap tests --- scripts/gen-shaping-tests.py | 4 --- tests/shaping/text_rendering_tests.rs | 36 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/scripts/gen-shaping-tests.py b/scripts/gen-shaping-tests.py index f7a8da11..3d457393 100755 --- a/scripts/gen-shaping-tests.py +++ b/scripts/gen-shaping-tests.py @@ -43,10 +43,6 @@ # Wasn't able to figure out the problem, but the problem occurs during kerning. In harfbuzz, it uses the `drive` # method, while in rustybuzz it uses `state_machine_kerning` which seems to apply some different rules for the flags. "macos_122", - # Broken in HarfBuzz. https://github.com/harfbuzz/harfbuzz/issues/4774 - "cmap_3_016", - "cmap_3_018", - "cmap_3_020", # This custom test fails because harfbuzz uses a set digest in AAT to abort early # which we don't do yet. Is basically the same as morx_20_005, but with `--show-flags` diff --git a/tests/shaping/text_rendering_tests.rs b/tests/shaping/text_rendering_tests.rs index c8ca4484..777bc37c 100644 --- a/tests/shaping/text_rendering_tests.rs +++ b/tests/shaping/text_rendering_tests.rs @@ -878,6 +878,18 @@ fn cmap_3_015() { ); } +#[test] +fn cmap_3_016() { + assert_eq!( + shape( + "tests/fonts/text-rendering-tests/TestCMAPMacTurkish.ttf", + "\u{0131}", + "--ned --remove-default-ignorables", + ), + "gid0" + ); +} + #[test] fn cmap_3_017() { assert_eq!( @@ -890,6 +902,18 @@ fn cmap_3_017() { ); } +#[test] +fn cmap_3_018() { + assert_eq!( + shape( + "tests/fonts/text-rendering-tests/TestCMAPMacTurkish.ttf", + "\u{00F6}", + "--ned --remove-default-ignorables", + ), + "gid168" + ); +} + #[test] fn cmap_3_019() { assert_eq!( @@ -902,6 +926,18 @@ fn cmap_3_019() { ); } +#[test] +fn cmap_3_020() { + assert_eq!( + shape( + "tests/fonts/text-rendering-tests/TestCMAPMacTurkish.ttf", + "\u{00FC}", + "--ned --remove-default-ignorables", + ), + "gid174" + ); +} + #[test] fn cvar_1_001() { assert_eq!( From 39fe3f0e220451a035065e96f21b2bb54c3b43f0 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:15:05 +0200 Subject: [PATCH 03/29] [ChainContext] Fix fast-path deviation from slow path --- tests/shaping/in_house.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/shaping/in_house.rs b/tests/shaping/in_house.rs index a63a6676..a5a0ea56 100644 --- a/tests/shaping/in_house.rs +++ b/tests/shaping/in_house.rs @@ -3174,6 +3174,23 @@ fn indic_joiners_006() { ); } +#[test] +fn indic_joiners_007() { + assert_eq!( + shape( + "tests/fonts/in-house/63e224dcb3d559d590f80c83b832cfca789e5dcc.ttf", + "\u{0ABE}\u{0AA8}\u{0ACD}\u{200D}\u{0AA4}\u{0ABF}", + "", + ), + "uni25CC=0+596|\ + uni0ABE=0+251|\ + uni0ABF.05=1+251|\ + uni0AA80ACD=1+293|\ + space=1+0|\ + uni0AA4=1+543" + ); +} + #[test] fn indic_malayalam_dot_reph_001() { assert_eq!( From 03eaa7de55d59289b4c7c8fb7de26505e462febd Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:17:22 +0200 Subject: [PATCH 04/29] [arabic] Remove non-sensical code --- src/hb/ot_shaper_arabic.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/hb/ot_shaper_arabic.rs b/src/hb/ot_shaper_arabic.rs index 4e126fa7..4a6d79b9 100644 --- a/src/hb/ot_shaper_arabic.rs +++ b/src/hb/ot_shaper_arabic.rs @@ -247,9 +247,6 @@ fn collect_features(planner: &mut hb_ot_shape_planner_t) { /* https://github.com/harfbuzz/harfbuzz/issues/1573 */ if !planner.ot_map.has_feature(hb_tag_t::from_bytes(b"rclt")) { planner.ot_map.add_gsub_pause(None); - planner - .ot_map - .enable_feature(hb_tag_t::from_bytes(b"rclt"), F_MANUAL_ZWJ, 1); } planner From 618d5f0f99393fbefcbc6a48c7edb41adec19d71 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:18:25 +0200 Subject: [PATCH 05/29] [USE, Unicode 16] Update the data files --- .../IndicPositionalCategory-Additional.txt | 10 ++--- .../IndicSyllabicCategory-Additional.txt | 42 +++++++++++++------ 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/scripts/ms-use/IndicPositionalCategory-Additional.txt b/scripts/ms-use/IndicPositionalCategory-Additional.txt index cb07643b..b607241e 100644 --- a/scripts/ms-use/IndicPositionalCategory-Additional.txt +++ b/scripts/ms-use/IndicPositionalCategory-Additional.txt @@ -9,6 +9,7 @@ # Updated for Unicode 14.0 by Andrew Glass 2021-09-28 # Updated for Unicode 15.0 by Andrew Glass 2022-09-16 # Updated for Unicode 15.1 by Andrew Glass 2023-09-14 +# Updated for Unicode 16.0 by Andrew Glass 2024-09-11 # ================================================ # ================================================ @@ -27,7 +28,6 @@ A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA 11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI 1BF2..1BF3 ; Bottom # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20 - # ================================================ # Indic_Positional_Category=Left @@ -49,7 +49,7 @@ A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from AA35   ; Top # Mn       CHAM CONSONANT SIGN 1112A..1112B ; Top # Mn [2] CHAKMA VOWEL SIGN U..CHAKMA VOWEL SIGN UU # see USE issue #25 11131..11132 ; Top # Mn [2] CHAKMA O MARK..CHAKMA AU MARK # see USE issue #25 -1E4EC..1E4EF ; Top # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH # 1E4EE is below, but made to for ccc +1E4EC..1E4EF ; Top # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH # 1E4EE is below, but made to for ccc # ================================================ @@ -80,9 +80,8 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN 16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW -#HIEROGLYPHS defined here while ISC is being used as a proxy for dedicated Hieroglyph cluster -13440 ; Bottom # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY -13447..13455 ; Bottom # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1E5EE ; Bottom # Mn OL ONAL SIGN MU # Not really below, but need to override to fit into Universal model +1E5EF ; Bottom # Mn OL ONAL SIGN IKIR # ================================================ @@ -98,6 +97,7 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN 1CF8..1CF9 ; Top # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE 10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10D69..10D6D ; Top # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Top # Mn TOTO SIGN RISING TONE diff --git a/scripts/ms-use/IndicSyllabicCategory-Additional.txt b/scripts/ms-use/IndicSyllabicCategory-Additional.txt index 43326c12..a42b975a 100644 --- a/scripts/ms-use/IndicSyllabicCategory-Additional.txt +++ b/scripts/ms-use/IndicSyllabicCategory-Additional.txt @@ -7,15 +7,17 @@ # Updated for Unicode 14.0 by Andrew Glass 2021-09-25 # Updated for Unicode 15.0 by Andrew Glass 2022-09-16 # Updated for Unicode 15.1 by Andrew Glass 2023-09-14 +# Updated for Unicode 16.0 by Andrew Glass 2024-09-11 # ================================================ # OVERRIDES TO ASSIGNED VALUES # ================================================ -# Indic_Syllabic_Category=Bindu +# Indic_Syllabic_Category=Bindu 193A ; Bindu # Mn LIMBU SIGN KEMPHRENG AA29 ; Bindu # Mn CHAM VOWEL SIGN AA 10A0D ; Bindu # Mn KHAROSHTHI SIGN DOUBLE RING BELOW +113CE ; Bindu # Mn TULU-TIGALARI SIGN VIRAMA # ================================================ @@ -30,19 +32,30 @@ AA29 ; Bindu # Mn CHAM VOWEL SIGN AA # ================================================ +# Indic_Syllabic_Category=Consonant_With_Stacker +11A3A ; Consonant_With_Stacker # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA + +# ================================================ + +# Indic_Syllabic_Category=Consonant_Subjoined +11A3B..11A3E ; Consonant_Subjoined # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA + +# ================================================ + # Indic_Syllabic_Category=Consonant_Final_Modifier 1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN # ================================================ -# Indic_Syllabic_Category=Gemination_Mark +# Indic_Syllabic_Category=Gemination_Mark 11134 ; Gemination_Mark # Mc CHAKMA MAAYYAA # ================================================ -# Indic_Syllabic_Category=Nukta +# Indic_Syllabic_Category=Nukta 0F71 ; Nukta # Mn TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel, but see #22 1BF2..1BF3 ; Nukta # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20 +113CF ; Nukta # Mc TULU-TIGALARI SIGN LOOPED VIRAMA # ================================================ @@ -71,8 +84,9 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN 1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work 1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER 180A ; Consonant # Po MONGOLIAN NIRUGU -1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS +1820..1842 ; Consonant # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI 1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; Consonant # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS 2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 10570..1057A ; Consonant # Lo [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA @@ -86,6 +100,10 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN 10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW 10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW 10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4F ; Consonant # Lo [6] GARAY VOWEL SIGN A..GARAY SUKUN +10D50..10D65 ; Consonant # Lu [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D70..10D85 ; Consonant # Ll [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D6F ; Consonant # Lm GARAY REDUPLICATION MARK 10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -95,8 +113,9 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN 16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER # Avoids Mn pushing this into VOWEL class 18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF ; Consonant # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M -1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W @@ -107,6 +126,8 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN 1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH 1E4D0..1E4EA ; Consonant # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL 1E4EB ; Consonant # Lm NAG MUNDARI SIGN OJOD +1E5D0..1E5ED ; Consonant # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; Consonant # Lo OL ONAL SIGN HODDOND 1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA 1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA 1E94B ; Consonant # Lm ADLAM NASALIZATION MARK @@ -149,6 +170,7 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SEL 1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE 1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE 1E4F0..1E4F9 ; Number # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5F1..1E5FA ; Number # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE # ================================================ @@ -173,18 +195,13 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SEL # ================================================ -# Indic_Syllabic_Category=Vowel_Independent -AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA -AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA -AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN - -# ================================================ - # Indic_Syllabic_Category=Vowel_Dependent 0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE +10D69..10D6D ; Vowel_Dependent # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 1E4EC..1E4EF ; Vowel_Dependent # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; Vowel_Dependent # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR # ================================================ @@ -207,6 +224,7 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN 13000..1342F ; Hieroglyph # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 1343C..1343F ; Hieroglyph # Cf [4] EGYPTIAN HIEROGLYPH BEGIN ENCLOSURE..END WALLED ENCLOSURE 13441..13446 ; Hieroglyph # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN +13460..143FA ; Hieroglyph # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA # ================================================ From fbdba53ad29ccd97bf92862df63b282e8b0ad4bb Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:21:19 +0200 Subject: [PATCH 06/29] [Unicode 16] Add new `hb_script_t` values --- src/hb/common.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/hb/common.rs b/src/hb/common.rs index 43b8c5ee..432ee692 100644 --- a/src/hb/common.rs +++ b/src/hb/common.rs @@ -460,6 +460,14 @@ pub mod script { // Since 15.0 pub const KAWI: Script = Script::from_bytes(b"Kawi"); pub const NAG_MUNDARI: Script = Script::from_bytes(b"Nagm"); + // Since 16.0 + pub const GARAY: Script = Script::from_bytes(b"Gara"); + pub const GURUNG_KHEMA: Script = Script::from_bytes(b"Gukh"); + pub const KIRAT_RAI: Script = Script::from_bytes(b"Krai"); + pub const OL_ONAL: Script = Script::from_bytes(b"Onao"); + pub const SUNUWAR: Script = Script::from_bytes(b"Sunu"); + pub const TODHRI: Script = Script::from_bytes(b"Todr"); + pub const TULU_TIGALARI: Script = Script::from_bytes(b"Tutg"); pub const SCRIPT_MATH: Script = Script::from_bytes(b"Zmth"); From 4e88d3d5e1b528f095b0deb28da8ee53b544b15c Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:24:12 +0200 Subject: [PATCH 07/29] Update .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index e02b4701..843023e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ /target -Cargo.lock .directory .DS_Store /src/complex/*.ri From 4799922744d9801e3ff2e99bdb9fe700942fd112 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:26:37 +0200 Subject: [PATCH 08/29] Update unicode dependency --- Cargo.toml | 2 +- src/hb/unicode.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5a44b460..c0ed8bbc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ core_maths = "0.1.0" # only for no_std builds smallvec = "1.6" unicode-bidi-mirroring = "0.3.0" unicode-ccc = "0.3.0" -unicode-properties = { version = "0.1.0", default-features = false, features = ["general-category"] } +unicode-properties = { version = "0.1.2", default-features = false, features = ["general-category"] } unicode-script = "0.5.2" wasmi = { version = "0.36.0", optional = true } log = "0.4.22" diff --git a/src/hb/unicode.rs b/src/hb/unicode.rs index 14082521..a80f30b1 100644 --- a/src/hb/unicode.rs +++ b/src/hb/unicode.rs @@ -843,7 +843,7 @@ mod tests { assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (15, 1, 0)); assert_eq!(unicode_ccc::UNICODE_VERSION, (15, 0, 0)); assert_eq!(unicode_properties::UNICODE_VERSION, (15, 1, 0)); - assert_eq!(unicode_script::UNICODE_VERSION, (15, 1, 0)); + assert_eq!(unicode_script::UNICODE_VERSION, (16, 0, 0)); assert_eq!(crate::hb::unicode_norm::UNICODE_VERSION, (15, 0, 0)); } } From 07d5bc29e07d934527b9722deac9bc9e27b4a350 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:30:59 +0200 Subject: [PATCH 09/29] [Unicode 16] Update the UCD table --- scripts/gen-unicode-norm-table.py | 4 +-- src/hb/unicode.rs | 2 +- src/hb/unicode_norm.rs | 42 ++++++++++++++++++++++++++++++- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/scripts/gen-unicode-norm-table.py b/scripts/gen-unicode-norm-table.py index ee8ae5c3..bf456237 100755 --- a/scripts/gen-unicode-norm-table.py +++ b/scripts/gen-unicode-norm-table.py @@ -6,7 +6,7 @@ URL = 'https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt' FILE_NAME = 'UnicodeData.txt' -EXCLUSIONS_URL = 'https://www.unicode.org/Public/draft/UCD/ucd/CompositionExclusions.txt' +EXCLUSIONS_URL = 'https://www.unicode.org/Public/UCD/latest/ucd/CompositionExclusions.txt' EXCLUSIONS_FILE_NAME = 'CompositionExclusions.txt' @@ -39,7 +39,7 @@ def hex_to_char_rs(c): print('//! The current implementation is not the fastest one. Just good enough.') print() print('#[allow(dead_code)]') -print('pub const UNICODE_VERSION: (u8, u8, u8) = (15, 0, 0);') +print('pub const UNICODE_VERSION: (u8, u8, u8) = (16, 0, 0);') print() print('// Rust support `Option` layout optimization, so it will take only 4 bytes.') print('pub const DECOMPOSITION_TABLE: &[(char, char, Option)] = &[') diff --git a/src/hb/unicode.rs b/src/hb/unicode.rs index a80f30b1..cce460f0 100644 --- a/src/hb/unicode.rs +++ b/src/hb/unicode.rs @@ -844,7 +844,7 @@ mod tests { assert_eq!(unicode_ccc::UNICODE_VERSION, (15, 0, 0)); assert_eq!(unicode_properties::UNICODE_VERSION, (15, 1, 0)); assert_eq!(unicode_script::UNICODE_VERSION, (16, 0, 0)); - assert_eq!(crate::hb::unicode_norm::UNICODE_VERSION, (15, 0, 0)); + assert_eq!(crate::hb::unicode_norm::UNICODE_VERSION, (16, 0, 0)); } } diff --git a/src/hb/unicode_norm.rs b/src/hb/unicode_norm.rs index 4047789f..24e79b7e 100644 --- a/src/hb/unicode_norm.rs +++ b/src/hb/unicode_norm.rs @@ -5,7 +5,7 @@ //! The current implementation is not the fastest one. Just good enough. #[allow(dead_code)] -pub const UNICODE_VERSION: (u8, u8, u8) = (15, 0, 0); +pub const UNICODE_VERSION: (u8, u8, u8) = (16, 0, 0); // Rust support `Option` layout optimization, so it will take only 4 bytes. pub const DECOMPOSITION_TABLE: &[(char, char, Option)] = &[ @@ -1502,6 +1502,8 @@ pub const DECOMPOSITION_TABLE: &[(char, char, Option)] = &[ ('\u{FB4C}', '\u{05D1}', Some('\u{05BF}')), ('\u{FB4D}', '\u{05DB}', Some('\u{05BF}')), ('\u{FB4E}', '\u{05E4}', Some('\u{05BF}')), + ('\u{105C9}', '\u{105D2}', Some('\u{0307}')), + ('\u{105E4}', '\u{105DA}', Some('\u{0307}')), ('\u{1109A}', '\u{11099}', Some('\u{110BA}')), ('\u{1109C}', '\u{1109B}', Some('\u{110BA}')), ('\u{110AB}', '\u{110A5}', Some('\u{110BA}')), @@ -1509,12 +1511,30 @@ pub const DECOMPOSITION_TABLE: &[(char, char, Option)] = &[ ('\u{1112F}', '\u{11132}', Some('\u{11127}')), ('\u{1134B}', '\u{11347}', Some('\u{1133E}')), ('\u{1134C}', '\u{11347}', Some('\u{11357}')), + ('\u{11383}', '\u{11382}', Some('\u{113C9}')), + ('\u{11385}', '\u{11384}', Some('\u{113BB}')), + ('\u{1138E}', '\u{1138B}', Some('\u{113C2}')), + ('\u{11391}', '\u{11390}', Some('\u{113C9}')), + ('\u{113C5}', '\u{113C2}', Some('\u{113C2}')), + ('\u{113C7}', '\u{113C2}', Some('\u{113B8}')), + ('\u{113C8}', '\u{113C2}', Some('\u{113C9}')), ('\u{114BB}', '\u{114B9}', Some('\u{114BA}')), ('\u{114BC}', '\u{114B9}', Some('\u{114B0}')), ('\u{114BE}', '\u{114B9}', Some('\u{114BD}')), ('\u{115BA}', '\u{115B8}', Some('\u{115AF}')), ('\u{115BB}', '\u{115B9}', Some('\u{115AF}')), ('\u{11938}', '\u{11935}', Some('\u{11930}')), + ('\u{16121}', '\u{1611E}', Some('\u{1611E}')), + ('\u{16122}', '\u{1611E}', Some('\u{16129}')), + ('\u{16123}', '\u{1611E}', Some('\u{1611F}')), + ('\u{16124}', '\u{16129}', Some('\u{1611F}')), + ('\u{16125}', '\u{1611E}', Some('\u{16120}')), + ('\u{16126}', '\u{16121}', Some('\u{1611F}')), + ('\u{16127}', '\u{16122}', Some('\u{1611F}')), + ('\u{16128}', '\u{16121}', Some('\u{16120}')), + ('\u{16D68}', '\u{16D67}', Some('\u{16D67}')), + ('\u{16D69}', '\u{16D63}', Some('\u{16D67}')), + ('\u{16D6A}', '\u{16D69}', Some('\u{16D67}')), ('\u{1D15E}', '\u{1D157}', Some('\u{1D165}')), ('\u{1D15F}', '\u{1D158}', Some('\u{1D165}')), ('\u{1D160}', '\u{1D15F}', Some('\u{1D16E}')), @@ -3007,6 +3027,8 @@ pub const COMPOSITION_TABLE: &[(u64, char)] = &[ (53811645264025, '\u{30F9}'), (53815940231321, '\u{30FA}'), (53863184871577, '\u{30FE}'), + (287874477982471, '\u{105C9}'), + (287908837720839, '\u{105E4}'), (299724292821178, '\u{1109A}'), (299732882755770, '\u{1109C}'), (299775832428730, '\u{110AB}'), @@ -3014,10 +3036,28 @@ pub const COMPOSITION_TABLE: &[(u64, char)] = &[ (300381422817575, '\u{1112F}'), (302670640386878, '\u{1134B}'), (302670640386903, '\u{1134C}'), + (302924043457481, '\u{11383}'), + (302932633392059, '\u{11385}'), + (302962698163138, '\u{1138E}'), + (302984172999625, '\u{11391}'), + (303198921364408, '\u{113C7}'), + (303198921364418, '\u{113C5}'), + (303198921364425, '\u{113C8}'), (304259778286768, '\u{114BC}'), (304259778286778, '\u{114BB}'), (304259778286781, '\u{114BE}'), (305354994947503, '\u{115BA}'), (305359289914799, '\u{115BB}'), (309190400743728, '\u{11938}'), + (388256453714206, '\u{16121}'), + (388256453714207, '\u{16123}'), + (388256453714208, '\u{16125}'), + (388256453714217, '\u{16122}'), + (388269338616095, '\u{16126}'), + (388269338616096, '\u{16128}'), + (388273633583391, '\u{16127}'), + (388303698354463, '\u{16124}'), + (401746945994087, '\u{16D69}'), + (401764125863271, '\u{16D68}'), + (401772715797863, '\u{16D6A}'), ]; From 7b8cf3118565f31740d2aadef28f0a998101a786 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:31:43 +0200 Subject: [PATCH 10/29] Add missing font --- .../63e224dcb3d559d590f80c83b832cfca789e5dcc.ttf | Bin 0 -> 9080 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/fonts/in-house/63e224dcb3d559d590f80c83b832cfca789e5dcc.ttf diff --git a/tests/fonts/in-house/63e224dcb3d559d590f80c83b832cfca789e5dcc.ttf b/tests/fonts/in-house/63e224dcb3d559d590f80c83b832cfca789e5dcc.ttf new file mode 100644 index 0000000000000000000000000000000000000000..4abb1560a6861ca0b721682a6e26c1bf01c7cd77 GIT binary patch literal 9080 zcmb7J2XI@*nci1W-a`i-3Q3RvK>}HJ7smQJKs* zGwCiVlbhsxaV~e0Nt-y%TpXR1IFchdkz6FJ7pqyML=x--f&2Eo2apn#8D}x??e4$- z{`>d+0T6}|BH_i8$ilYy3w|N~ypj-&5zwD3T)CqCmq*^jQ2!-D*xH5dYg_;0!hfA0 zMEeUunB$8cox8fX>CaCR!kq`wHKi>#r7UdkOPBnD5-uv6uE% z;4ee3-?DT6=H(gx_gg|t-yuYIbqnxk?>Bvu5a)A*n90^n9UFa;^(657fM2^60PWie zdR`ai`CE5&J(uwodlUR^#C*}t-5Wa0Zm)BM0b z$Rqv+doJ(Uy|?S!OIiD&FU-jVVF;5<5(|1l?Bp;w{W{UfGXT`aQ7lMwJ<$R^jKyCW ze?!~$01n-)w0g3ae4894H<&DDCG%zGXUx0I5ZlatlkMU9xqcwgv*=qmi~~Phd*a)l z{`b|}6l6Rh&d&IrTMTUu`l_m`PDe$jEQO*e=jhQ(8}@9a`cq&=dBux2FoV8`{6y4qD2^zykCCUC5*+Iz z^{kC}p{cd{q@D?qEaodjWOw6f0)7+8XV;JldK}@|YUUe614=J~CLv3e zo>@mKnN~nq4Xf2?Se8G)Q?C4UkK7inSg{c5@duBqdaq@Fh6PzB*2AvE*iY%o(sJk} zLhVF*p3qe*J{0a3UlV^S{!u*PHwBNw)+xwaDM<3l(7K^73@sRHxXhoOX;_%16pqooU?o z@x0DU744TQ*Iq4KaZMxKVY$IEX*@nI@1%Gf^+($7^9hDf52E}a%Hattm7k%M?Ied3 zlUlNXtS0NocJf7q5_CahQLrrI1<&A zisMu?j~SPayfUmVf(Cdz8mwsX;SkLRj7`u8jba`+RI^hLg=+E8y;Q2%c*RqY^ zZ&)%XGE3umO{~qUF?(b+Q-@$h(Vn9s5l>DSpoeJ@qJkD$@&^RD`XP;G4~}8sNj#%r zw2Y3?GXf(r2FA#k7&BvGtc*?f>@(ZKbLUp8eZA7R&R6>dN_t8;lj_Q*_C*36qnnW+qi+Z2n5D;sf<7c;blF-hMu4hQAY$Jk5r8=Y5L{n$7PoyEx*w&7n zn@P@|&P{uXq`avfv5SLE6|KX687~ZJ;t!d{i(G%GS#RXCwkL!cmm0_DzvG3Cby$|m$Iu26+r78nL#P$_68P|l(I zd0^93N>{uOBM0ylWdp`EDtO>1BT8M#up)Vcbg8zim2Fuo+cG5EvUZv+Yq1{<)>Gps zJy&eYt>3wC7x8c0wzrF9?&|2=A;)D(k8~^Lk|SrxENE^@2X7N^H}{BaP(w7GE^%ihv*AH;~et`jHzD0#{n97Q*&0H(|cN3=ja@?lsWj3*T0=! zSLQ!T&v~pqZ&&s@Yl1F9Cy;#{TXTC~OhPo^a zeWqynr&06Ipyz*p-qijk;I6=DA!$m}`s{q&&*f+1WgcYML-xy(D|++8{$V_o$mvvH zo6t|uS_#bE(9R~TT@+ITUF)E3uv8^%jI?lLIt5vjm>S7 zsw!*i+*<9mS&vjye6H@Ly{(IUwSjSWfy*bY5^N>bVpEU{u3cYOy(qN0u_1qPn=c?) zJ%u^U&D_ax(d}pxC7kEQc}+Oah8uxsW>60XWEQF_b6B^`gi~S$+#$D5S}7`LBwn-^ z=etWQN|v`)n7hhc8i&nYH2si|T%qy|Z;sAj;7U!l01A4BtW8cZLHSfNQ=;?kkjv*> zCD==>MaH0J2Gh!Z_Rri4$inIUR8N#kT^S2yX`N{Wop%S_K4l))lA+z3ypTP-MwA zx`lw%S6S6sp3ItC&hv{|jiq=QzT(MJvM+yTE~(HJ>lZg~`@&*xLE(lSwGH!qDJ~Tq zTz~rcIVB5Lg)$nz<(3tbeJL)fKDppObyCBg_9b80x2Ygd_u_XJwXH3<|D;P#M_Y^A zR)(_c+RJLTubdpd@1!sPQzy0j>*MX;e{o;AFY?+i*DQUi^!}61b{v{lgp+b=+so^A zu9}=kpH!cmWPZf_3U_om4k{47B4N8Hwfb9M$hc{c6ngqq^VTUYol6jT+ zI;{%oX8QBD6#p%K?2%^c_sU!+6?fGejC`@|uBdOo$#rm&i3pLyq0Vh)Wk0BLruit^6@81 zW+b?2LY)-sJMS^6Ikv#o1MJk;tvMFN@hc){Dx>P4bZY z>CM+pp2EGqk96bLqVJQ}$*;*Dkv0NU%~h3@>*-^tIt?$FHvUI&R^uAA5#0b)&5hpXA0Fp;sbS%pIWI<2a_p)OC6k1`_zt? z)%i0i#MfmW45Bshr_`uCHQ1jTd^I(=lp35!4Vc{2Xh~`?MTyywhNX8hRVi2+lqQxw zke0cO{LpVQjfghdBJx`zuP}aQ8*_s#X8)7R<^G);<(Kil(zI)x+ROBFSpG%j0Eb^} zXqByrfD*Q9A}sx-1x59EwEF;w(-$34yhnhyLd9F5;)S$w;Owzu7g?9mtUo@6q@<{`9Tu&}1GlvLt)PL^oOl#YeX`EDnS*@`wd zvf%TNK0?+4@8LFD0*;ob94%Kl%KzK>UYVn;T%XO593G0>3|32$9|nihS@g;FaY-V> zFgk;FLa+oXD=X`2iqc-1UzF#Nc}|GFl8V}fIg1+VBJ-P@!c+z-M71J>PeX->jcVYf zQijK>Ne&O7-Mf>r(;>^k<~&)dxUG6&d&jQlBjjt2XLs$`v0=k%D(EyxeX>Dr_3PoI zf#QNpUA&hve)W9HLWgAU3_|jtDtStice(uGxenB%zJ7!3-Q!QJe5lyB1_U@=ZecJ(8@~&# zZ(co@7F)@Q)8$Ux42U%>}um!8Ds~a9} z?%Z%~eM4mF?D_@EXa56p_sMcQJZ{9>2*NFP?{->lbK2bYnkA2J-q%XLfnRREZh!t? zwm;qRaGPK4qr&?OLs=R{_{$%qg_k3)3Z7o2MqHKZ=^o^5G)`x?=rt3gvHpw4jvYI5 zXMhY&-GfG*d2Vy+F6`^}==kL1T7Ch^8MEF-rjDnpx4611dGui3c@67l0?bj4F-dLkDaj5%w&?`IIUp3 z{pp?CH;}Q-W~7Rti3Bg2y1iDj0DD?9C5O#onc&<3Tb@5WXU?3uB0u4xnJ8H_8l%kHlwJd7IDy^;hE{>lx6VtX}PkvnX0G4qCmAJ zT2TfFLW0%XoJOOR4WV;09EG_KqX9Q6X7a67RWXZHM6+hjRM(WgXyrl6 zE2(+WA1C`&4enPp*d!O9TAkTrFpONK)n`wvcbGRA41(2c5ZU27D9CVyeRAx> zTf+vY(~0R_)MxLX_$&E1dLR^!-M;q8P1LD~H77sb-`D4G2ooa%{jq_;VT)0K!)OJQ zV`kaXq}MViT6l{m7<6aC-fqe6&5&$18}3+1g56O6qInyugE`sor8=}){0K9zh0Kc< zpm^2lO>Q87NPIHM>BW(e3BqZ?xhRf|j-e(S8W|l)>3gaUYEnvCU1m`vQaG!qsHh;2 z6Uxg$p_SwJ=j7z)=NHH=wBfNxRRtJZhxX)?>%Z8!opeSE5(Mt-3_-Jq-tmcq#%znl zM&e0Ur^DCq*x=Cc@Nj=@FqX2yRPih4I{J^p3#tuXP;DUn`j4ls45PA#`p=v_efkf- z`|ZUWLkB~*-aj|q&s;rx`0(3DjvP6a96bNt@%K)izI6Q#t&2x{FCTg1ZzJSLG}`!J zL2SgO-*@HsyPvR)O>zGF^I{ zKFMd4H?%vP&;6ki1< d<6}dy*g$N26uB`*%f~o=lA%?&WMI3={{#B-Dd7MB literal 0 HcmV?d00001 From 14b63bf502984be160d89037ccbce6c0cb08af45 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:33:35 +0200 Subject: [PATCH 11/29] [Unicode 16] Update the Arabic table --- src/hb/ot_shaper_arabic_table.rs | 35 ++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/hb/ot_shaper_arabic_table.rs b/src/hb/ot_shaper_arabic_table.rs index 7b757260..a20a4788 100644 --- a/src/hb/ot_shaper_arabic_table.rs +++ b/src/hb/ot_shaper_arabic_table.rs @@ -99,6 +99,10 @@ pub const JOINING_TABLE: &[hb_arabic_joining_type_t] = &[ /* 10D00 */ L,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, /* 10D20 */ D,D,R,D, + /* Arabic Extended-C */ + + /* 10EC0 */ R,D,D, + /* Sogdian */ /* 10F20 */ D,D,D,R,D,D,D,D,D,D,D,D,D,D,D,D, @@ -135,55 +139,59 @@ const JOINING_OFFSET_0XA840: usize = 998; const JOINING_OFFSET_0X10AC0: usize = 1050; const JOINING_OFFSET_0X10B80: usize = 1098; const JOINING_OFFSET_0X10D00: usize = 1146; -const JOINING_OFFSET_0X10F30: usize = 1182; -const JOINING_OFFSET_0X110BD: usize = 1338; -const JOINING_OFFSET_0X1E900: usize = 1355; +const JOINING_OFFSET_0X10EC2: usize = 1182; +const JOINING_OFFSET_0X10F30: usize = 1185; +const JOINING_OFFSET_0X110BD: usize = 1341; +const JOINING_OFFSET_0X1E900: usize = 1358; pub fn joining_type(u: char) -> hb_arabic_joining_type_t { let u = u as u32; match u >> 12 { 0x0 => { if (0x0600..=0x08E2).contains(&u) { - return JOINING_TABLE[u as usize - 0x0600 + JOINING_OFFSET_0X0600]; + return JOINING_TABLE[u as usize - 0x0600 + JOINING_OFFSET_0X0600] } } 0x1 => { if (0x1806..=0x18AA).contains(&u) { - return JOINING_TABLE[u as usize - 0x1806 + JOINING_OFFSET_0X1806]; + return JOINING_TABLE[u as usize - 0x1806 + JOINING_OFFSET_0X1806] } } 0x2 => { if (0x200C..=0x2069).contains(&u) { - return JOINING_TABLE[u as usize - 0x200C + JOINING_OFFSET_0X200C]; + return JOINING_TABLE[u as usize - 0x200C + JOINING_OFFSET_0X200C] } } 0xA => { if (0xA840..=0xA873).contains(&u) { - return JOINING_TABLE[u as usize - 0xA840 + JOINING_OFFSET_0XA840]; + return JOINING_TABLE[u as usize - 0xA840 + JOINING_OFFSET_0XA840] } } 0x10 => { if (0x10AC0..=0x10AEF).contains(&u) { - return JOINING_TABLE[u as usize - 0x10AC0 + JOINING_OFFSET_0X10AC0]; + return JOINING_TABLE[u as usize - 0x10AC0 + JOINING_OFFSET_0X10AC0] } if (0x10B80..=0x10BAF).contains(&u) { - return JOINING_TABLE[u as usize - 0x10B80 + JOINING_OFFSET_0X10B80]; + return JOINING_TABLE[u as usize - 0x10B80 + JOINING_OFFSET_0X10B80] } if (0x10D00..=0x10D23).contains(&u) { - return JOINING_TABLE[u as usize - 0x10D00 + JOINING_OFFSET_0X10D00]; + return JOINING_TABLE[u as usize - 0x10D00 + JOINING_OFFSET_0X10D00] + } + if (0x10EC2..=0x10EC4).contains(&u) { + return JOINING_TABLE[u as usize - 0x10EC2 + JOINING_OFFSET_0X10EC2] } if (0x10F30..=0x10FCB).contains(&u) { - return JOINING_TABLE[u as usize - 0x10F30 + JOINING_OFFSET_0X10F30]; + return JOINING_TABLE[u as usize - 0x10F30 + JOINING_OFFSET_0X10F30] } } 0x11 => { if (0x110BD..=0x110CD).contains(&u) { - return JOINING_TABLE[u as usize - 0x110BD + JOINING_OFFSET_0X110BD]; + return JOINING_TABLE[u as usize - 0x110BD + JOINING_OFFSET_0X110BD] } } 0x1E => { if (0x1E900..=0x1E94B).contains(&u) { - return JOINING_TABLE[u as usize - 0x1E900 + JOINING_OFFSET_0X1E900]; + return JOINING_TABLE[u as usize - 0x1E900 + JOINING_OFFSET_0X1E900] } } _ => {} @@ -191,3 +199,4 @@ pub fn joining_type(u: char) -> hb_arabic_joining_type_t { X } + From 915cfeaf15f2be36a792773d41ece4bbe4971d01 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:38:40 +0200 Subject: [PATCH 12/29] [Unicode 16] Update the Indic table --- scripts/gen-indic-table.py | 1 + src/hb/ot_shaper_indic_table.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/scripts/gen-indic-table.py b/scripts/gen-indic-table.py index dd54e5d2..ab9e400e 100755 --- a/scripts/gen-indic-table.py +++ b/scripts/gen-indic-table.py @@ -37,6 +37,7 @@ 'Devanagari Extended', 'Myanmar Extended-B', 'Myanmar Extended-A', + 'Myanmar Extended-C', ] files = [io.open(x, encoding='utf-8') for x in DEPENDENCIES] diff --git a/src/hb/ot_shaper_indic_table.rs b/src/hb/ot_shaper_indic_table.rs index 43910a64..4c97e5db 100644 --- a/src/hb/ot_shaper_indic_table.rs +++ b/src/hb/ot_shaper_indic_table.rs @@ -21,7 +21,7 @@ use OT_MR as _OT_MR; /* 1 chars; MR */ use OT_MW as _OT_MW; /* 2 chars; MW */ use OT_MY as _OT_MY; /* 3 chars; MY */ use OT_N as _OT_N; /* 17 chars; N */ -use OT_PLACEHOLDER as _OT_GB; /* 165 chars; PLACEHOLDER */ +use OT_PLACEHOLDER as _OT_GB; /* 185 chars; PLACEHOLDER */ use OT_PT as _OT_PT; /* 8 chars; PT */ use OT_Ra as _OT_R; /* 14 chars; Ra */ use OT_Repha as _OT_Rf; /* 1 chars; Repha */ @@ -44,7 +44,7 @@ use POS_ABOVE_C as _POS_T; /* 22 chars; ABOVE_C */ use POS_AFTER_MAIN as _POS_A; /* 3 chars; AFTER_MAIN */ use POS_AFTER_POST as _POS_AP; /* 50 chars; AFTER_POST */ use POS_AFTER_SUB as _POS_AS; /* 51 chars; AFTER_SUB */ -use POS_BASE_C as _POS_C; /* 833 chars; BASE_C */ +use POS_BASE_C as _POS_C; /* 853 chars; BASE_C */ use POS_BEFORE_SUB as _POS_BS; /* 25 chars; BEFORE_SUB */ use POS_BELOW_C as _POS_B; /* 13 chars; BELOW_C */ use POS_END as _POS_X; /* 71 chars; END */ @@ -343,6 +343,12 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 11300 */ (_OT_X,_POS_X),(_OT_SM,_POS_SM),(_OT_SM,_POS_SM),(_OT_SM,_POS_SM), (_OT_X,_POS_X), (_OT_X,_POS_X), (_OT_X,_POS_X), (_OT_X,_POS_X), /* 11338 */ (_OT_X,_POS_X), (_OT_X,_POS_X), (_OT_X,_POS_X), (_OT_N,_POS_X), (_OT_N,_POS_X), (_OT_X,_POS_X), (_OT_X,_POS_X), (_OT_X,_POS_X), + /* Myanmar Extended-C */ + + /* 116D0 */(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C), + /* 116D8 */(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C), + /* 116E0 */(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C),(_OT_GB,_POS_C), (_OT_X,_POS_X), (_OT_X,_POS_X), (_OT_X,_POS_X), (_OT_X,_POS_X), + ]; const OFFSET_0X0028: usize = 0; @@ -360,6 +366,7 @@ const OFFSET_0XAA60: usize = 1664; const OFFSET_0XFE00: usize = 1696; const OFFSET_0X11300: usize = 1712; const OFFSET_0X11338: usize = 1720; +const OFFSET_0X116D0: usize = 1728; #[rustfmt::skip] pub fn get_categories(u: u32) -> (SyllabicCategory, MatraCategory) { @@ -392,6 +399,7 @@ pub fn get_categories(u: u32) -> (SyllabicCategory, MatraCategory) { 0x11 => { if (0x11300..=0x11307).contains(&u) { return TABLE[u as usize - 0x11300 + OFFSET_0X11300]; } if (0x11338..=0x1133F).contains(&u) { return TABLE[u as usize - 0x11338 + OFFSET_0X11338]; } + if (0x116D0..=0x116E7).contains(&u) { return TABLE[u as usize - 0x116D0 + OFFSET_0X116D0]; } } _ => {} } From eeb11563f18b6301eb99d0391c40b9f7b8bba2cd Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:43:47 +0200 Subject: [PATCH 13/29] Reformat --- src/hb/ot_shaper_arabic_table.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/hb/ot_shaper_arabic_table.rs b/src/hb/ot_shaper_arabic_table.rs index a20a4788..e84fc394 100644 --- a/src/hb/ot_shaper_arabic_table.rs +++ b/src/hb/ot_shaper_arabic_table.rs @@ -149,49 +149,49 @@ pub fn joining_type(u: char) -> hb_arabic_joining_type_t { match u >> 12 { 0x0 => { if (0x0600..=0x08E2).contains(&u) { - return JOINING_TABLE[u as usize - 0x0600 + JOINING_OFFSET_0X0600] + return JOINING_TABLE[u as usize - 0x0600 + JOINING_OFFSET_0X0600]; } } 0x1 => { if (0x1806..=0x18AA).contains(&u) { - return JOINING_TABLE[u as usize - 0x1806 + JOINING_OFFSET_0X1806] + return JOINING_TABLE[u as usize - 0x1806 + JOINING_OFFSET_0X1806]; } } 0x2 => { if (0x200C..=0x2069).contains(&u) { - return JOINING_TABLE[u as usize - 0x200C + JOINING_OFFSET_0X200C] + return JOINING_TABLE[u as usize - 0x200C + JOINING_OFFSET_0X200C]; } } 0xA => { if (0xA840..=0xA873).contains(&u) { - return JOINING_TABLE[u as usize - 0xA840 + JOINING_OFFSET_0XA840] + return JOINING_TABLE[u as usize - 0xA840 + JOINING_OFFSET_0XA840]; } } 0x10 => { if (0x10AC0..=0x10AEF).contains(&u) { - return JOINING_TABLE[u as usize - 0x10AC0 + JOINING_OFFSET_0X10AC0] + return JOINING_TABLE[u as usize - 0x10AC0 + JOINING_OFFSET_0X10AC0]; } if (0x10B80..=0x10BAF).contains(&u) { - return JOINING_TABLE[u as usize - 0x10B80 + JOINING_OFFSET_0X10B80] + return JOINING_TABLE[u as usize - 0x10B80 + JOINING_OFFSET_0X10B80]; } if (0x10D00..=0x10D23).contains(&u) { - return JOINING_TABLE[u as usize - 0x10D00 + JOINING_OFFSET_0X10D00] + return JOINING_TABLE[u as usize - 0x10D00 + JOINING_OFFSET_0X10D00]; } if (0x10EC2..=0x10EC4).contains(&u) { - return JOINING_TABLE[u as usize - 0x10EC2 + JOINING_OFFSET_0X10EC2] + return JOINING_TABLE[u as usize - 0x10EC2 + JOINING_OFFSET_0X10EC2]; } if (0x10F30..=0x10FCB).contains(&u) { - return JOINING_TABLE[u as usize - 0x10F30 + JOINING_OFFSET_0X10F30] + return JOINING_TABLE[u as usize - 0x10F30 + JOINING_OFFSET_0X10F30]; } } 0x11 => { if (0x110BD..=0x110CD).contains(&u) { - return JOINING_TABLE[u as usize - 0x110BD + JOINING_OFFSET_0X110BD] + return JOINING_TABLE[u as usize - 0x110BD + JOINING_OFFSET_0X110BD]; } } 0x1E => { if (0x1E900..=0x1E94B).contains(&u) { - return JOINING_TABLE[u as usize - 0x1E900 + JOINING_OFFSET_0X1E900] + return JOINING_TABLE[u as usize - 0x1E900 + JOINING_OFFSET_0X1E900]; } } _ => {} @@ -199,4 +199,3 @@ pub fn joining_type(u: char) -> hb_arabic_joining_type_t { X } - From 874f8e23b4dbbc7d18284801939fb1aba4c1ff6c Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:44:38 +0200 Subject: [PATCH 14/29] [Unicode 16] Update the USE table --- src/hb/ot_shaper_use_table.rs | 205 +++++++++++++++++++--------------- 1 file changed, 112 insertions(+), 93 deletions(-) diff --git a/src/hb/ot_shaper_use_table.rs b/src/hb/ot_shaper_use_table.rs index dc0745f5..8b4d40ec 100644 --- a/src/hb/ot_shaper_use_table.rs +++ b/src/hb/ot_shaper_use_table.rs @@ -6,18 +6,18 @@ * * on files with these headers: * - * # IndicSyllabicCategory-15.0.0.txt - * # Date: 2022-05-26, 02:18:00 GMT [KW, RP] - * # IndicPositionalCategory-15.0.0.txt - * # Date: 2022-05-26, 02:18:00 GMT [KW, RP] - * # ArabicShaping-15.0.0.txt - * # Date: 2022-02-14, 18:50:00 GMT [KW, RP] - * # DerivedCoreProperties-15.0.0.txt - * # Date: 2022-08-05, 22:17:05 GMT - * # Blocks-15.0.0.txt - * # Date: 2022-01-28, 20:58:00 GMT [KW] - * # Scripts-15.0.0.txt - * # Date: 2022-04-26, 23:15:02 GMT + * # IndicSyllabicCategory-16.0.0.txt + * # Date: 2024-04-30, 21:48:21 GMT + * # IndicPositionalCategory-16.0.0.txt + * # Date: 2024-04-30, 21:48:21 GMT + * # ArabicShaping-16.0.0.txt + * # Date: 2024-07-30 + * # DerivedCoreProperties-16.0.0.txt + * # Date: 2024-05-31, 18:09:32 GMT + * # Blocks-16.0.0.txt + * # Date: 2024-02-02 + * # Scripts-16.0.0.txt + * # Date: 2024-04-30, 21:48:40 GMT * # Override values For Indic_Syllabic_Category * # Not derivable * # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 @@ -27,6 +27,7 @@ * # Updated for Unicode 14.0 by Andrew Glass 2021-09-25 * # Updated for Unicode 15.0 by Andrew Glass 2022-09-16 * # Updated for Unicode 15.1 by Andrew Glass 2023-09-14 + * # Updated for Unicode 16.0 by Andrew Glass 2024-09-11 * # Override values For Indic_Positional_Category * # Not derivable * # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 @@ -38,21 +39,26 @@ * # Updated for Unicode 14.0 by Andrew Glass 2021-09-28 * # Updated for Unicode 15.0 by Andrew Glass 2022-09-16 * # Updated for Unicode 15.1 by Andrew Glass 2023-09-14 + * # Updated for Unicode 16.0 by Andrew Glass 2024-09-11 * UnicodeData.txt does not have a header. */ +// Note that in rustybuzz, this table is not autogenerated yet. +// There were plans to port harfbuzz's `packTab` to Rust, but we +// haven't gotten around to it yet. + use super::ot_shaper_use::category::*; -const hb_use_u8: [u8; 3187] = +const hb_use_u8: [u8; 3345] = [ - 16, 50, 51, 51, 51, 52, 51, 83, 118, 131, 51, 57, 58, 179, 195, 61, + 16, 50, 51, 51, 51, 52, 51, 83, 118, 131, 57, 58, 59, 195, 211, 62, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, - 14, 0, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 4, 2, 2, + 15, 0, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 4, 2, 2, 5, 6, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 2, 2, 17, 18, 19, 20, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 2, 33, 2, 2, 2, @@ -65,24 +71,26 @@ const hb_use_u8: [u8; 3187] = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 47, 48, 2, 49, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 50, 51, 2, 2, 2, - 2, 2, 2, 2, 2, 52, 53, 2, 54, 2, 2, 55, 2, 2, 56, 57, - 58, 59, 60, 61, 62, 63, 64, 65, 2, 66, 67, 2, 68, 69, 70, 71, - 2, 72, 2, 73, 74, 75, 76, 2, 2, 77, 78, 79, 80, 2, 81, 82, - 2, 83, 83, 83, 83, 83, 83, 83, 83, 84, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 52, 53, 2, 54, 2, 2, 55, 56, 2, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 2, 70, 71, 72, 73, + 2, 74, 2, 75, 76, 77, 78, 2, 2, 79, 80, 81, 82, 2, 83, 84, + 2, 85, 85, 85, 85, 85, 85, 85, 85, 86, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 87, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 85, 86, 2, 2, 2, 2, 2, 2, 2, 87, - 88, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 89, 89, 89, 90, 2, 2, 2, 2, 2, + 2, 2, 2, 88, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 89, 90, 2, 2, 2, 91, 2, 2, 2, 92, + 93, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 94, 94, 94, 95, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 91, 92, 2, 2, 2, 2, 2, - 2, 2, 2, 93, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 96, 97, 2, 2, 2, 2, 2, + 2, 2, 2, 98, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 94, 2, 2, 95, 2, 2, 2, 96, 2, 2, 2, 2, 2, - 2, 2, 2, 97, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 98, 98, 99, 100, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, - 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, - 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 2, 2, 2, 99, 2, 2, 100, 2, 2, 2, 101, 2, 102, 2, 2, 2, + 2, 2, 2, 103, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 104, 104, 105, 106, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, + 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, + 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 5, 0, 0, 0, 0, 0, 6, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -111,7 +119,7 @@ const hb_use_u8: [u8; 3187] = 0, 0, 0, 27, 31, 2, 9, 0, 0, 10, 29, 30, 2, 2, 2, 9, 2, 2, 2, 30, 2, 2, 0, 17, 45, 0, 0, 35, 47, 0, 0, 0, 9, 50, 51, 0, 0, 0, 0, 0, 0, 11, 29, 2, 2, 2, 2, 9, - 2, 2, 2, 2, 2, 2, 52, 53, 23, 23, 19, 31, 48, 33, 48, 34, + 2, 2, 2, 2, 2, 2, 52, 53, 23, 19, 20, 31, 48, 33, 48, 34, 54, 0, 0, 0, 35, 0, 0, 0, 30, 12, 29, 30, 2, 2, 2, 2, 2, 2, 2, 2, 9, 0, 2, 2, 2, 2, 30, 2, 2, 2, 2, 30, 0, 2, 2, 2, 9, 0, 55, 0, 35, 23, 22, 31, 31, 18, 48, 48, @@ -170,6 +178,7 @@ const hb_use_u8: [u8; 3187] = 146, 2, 2, 30, 2, 30, 2, 2, 2, 2, 2, 2, 0, 14, 37, 0, 147, 2, 2, 13, 37, 0, 30, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 2, 2, 9, 2, 2, 11, 41, 0, 0, 0, + 0, 2, 2, 2, 0, 27, 22, 22, 30, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 27, 38, 0, 2, 2, 2, 116, 116, 116, 116, 116, 148, 2, 9, 0, 0, 0, 0, 0, 2, 14, 14, 0, 0, 0, 0, 0, 9, 2, 2, 9, 2, 2, 2, 2, 30, 2, 9, 0, 30, 2, 0, @@ -188,39 +197,45 @@ const hb_use_u8: [u8; 3187] = 0, 11, 11, 30, 2, 2, 2, 9, 30, 9, 2, 30, 2, 2, 58, 17, 23, 16, 23, 47, 32, 33, 32, 34, 0, 0, 0, 0, 35, 0, 0, 0, 2, 2, 23, 0, 11, 11, 11, 46, 0, 11, 11, 46, 0, 0, 0, 0, - 0, 2, 2, 65, 25, 20, 20, 20, 22, 23, 126, 15, 17, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 0, 0, 163, 164, 0, 0, 0, 0, 0, 0, - 0, 18, 19, 20, 20, 66, 99, 25, 160, 11, 165, 9, 0, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 2, 2, 65, 25, 20, 20, 0, 48, 48, 11, - 166, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 20, - 0, 23, 19, 20, 20, 21, 16, 82, 166, 38, 0, 0, 0, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 10, 167, 25, 20, 22, 22, 165, 9, 0, 0, - 0, 2, 2, 2, 2, 2, 9, 43, 136, 23, 22, 20, 76, 21, 22, 0, - 0, 2, 2, 2, 9, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 18, - 19, 20, 21, 22, 105, 166, 37, 0, 0, 2, 2, 2, 9, 30, 0, 2, - 2, 2, 2, 30, 9, 2, 2, 2, 2, 23, 23, 18, 32, 33, 12, 168, - 169, 170, 171, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 2, 2, - 2, 65, 25, 20, 20, 0, 22, 23, 29, 108, 0, 33, 0, 0, 0, 0, - 0, 52, 20, 22, 22, 22, 140, 2, 2, 2, 172, 173, 11, 15, 174, 72, - 175, 0, 0, 1, 147, 0, 0, 0, 0, 52, 20, 22, 16, 19, 20, 2, - 2, 2, 2, 158, 158, 158, 176, 176, 176, 176, 176, 176, 15, 177, 0, 30, - 0, 22, 20, 20, 31, 22, 22, 11, 166, 0, 61, 61, 61, 61, 61, 61, - 61, 66, 21, 82, 46, 0, 0, 0, 0, 2, 2, 2, 9, 2, 30, 2, - 2, 52, 22, 22, 31, 0, 38, 22, 27, 11, 159, 178, 174, 0, 0, 0, - 0, 2, 2, 2, 30, 9, 2, 2, 2, 2, 2, 2, 2, 2, 23, 23, - 47, 22, 35, 82, 68, 0, 0, 0, 0, 2, 179, 66, 47, 0, 0, 0, - 0, 11, 180, 2, 2, 2, 2, 2, 2, 2, 2, 23, 22, 20, 31, 0, - 48, 16, 143, 0, 0, 0, 0, 0, 0, 181, 181, 181, 181, 181, 181, 181, - 181, 182, 182, 182, 183, 184, 182, 181, 181, 185, 181, 181, 186, 187, 187, 187, - 187, 187, 187, 187, 0, 0, 0, 0, 0, 11, 11, 11, 46, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 9, 0, 58, 188, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, - 40, 116, 26, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, - 0, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 58, - 37, 0, 6, 120, 120, 120, 121, 0, 0, 11, 11, 11, 49, 2, 2, 2, - 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, - 46, 2, 2, 2, 2, 2, 2, 11, 11, 2, 2, 2, 2, 2, 2, 22, - 22, 2, 2, 44, 44, 44, 92, 0, 0, O, O, O, GB, B, B, O, + 0, 2, 2, 2, 2, 2, 30, 0, 9, 2, 2, 2, 30, 45, 59, 20, + 20, 31, 33, 32, 32, 25, 163, 29, 164, 165, 37, 0, 0, 0, 0, 0, + 0, 12, 26, 0, 0, 0, 0, 0, 0, 2, 2, 65, 25, 20, 20, 20, + 22, 23, 126, 15, 17, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, + 166, 167, 0, 0, 0, 0, 0, 0, 0, 18, 19, 20, 20, 66, 99, 25, + 160, 11, 168, 9, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, + 65, 25, 20, 20, 0, 48, 48, 11, 169, 37, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 2, 20, 0, 23, 19, 20, 20, 21, 16, 82, + 169, 38, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 10, 170, + 25, 20, 22, 22, 168, 9, 0, 0, 0, 2, 2, 2, 2, 2, 9, 43, + 136, 23, 22, 20, 76, 21, 22, 0, 0, 2, 2, 2, 9, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 18, 19, 20, 21, 22, 105, 169, 37, 0, + 0, 2, 2, 2, 9, 30, 0, 2, 2, 2, 2, 30, 9, 2, 2, 2, + 2, 23, 23, 18, 32, 33, 12, 171, 165, 172, 173, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 0, 2, 2, 2, 65, 25, 20, 20, 0, 22, 23, + 29, 108, 0, 33, 0, 0, 0, 0, 0, 52, 20, 22, 22, 22, 140, 2, + 2, 2, 174, 175, 11, 15, 176, 61, 177, 0, 0, 1, 147, 0, 0, 0, + 0, 52, 20, 22, 16, 19, 20, 2, 2, 2, 2, 158, 158, 158, 178, 178, + 178, 178, 178, 178, 15, 179, 0, 30, 0, 22, 20, 20, 31, 22, 22, 11, + 169, 0, 61, 61, 61, 61, 61, 61, 61, 66, 21, 82, 46, 0, 0, 0, + 0, 2, 2, 2, 9, 2, 30, 2, 2, 52, 22, 22, 31, 0, 38, 22, + 27, 11, 159, 180, 181, 0, 0, 0, 0, 2, 2, 2, 30, 9, 2, 2, + 2, 2, 2, 2, 2, 2, 23, 23, 47, 22, 35, 82, 68, 0, 0, 0, + 0, 2, 182, 66, 47, 0, 0, 0, 0, 11, 183, 2, 2, 2, 2, 2, + 2, 2, 2, 23, 22, 20, 31, 0, 48, 16, 143, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 156, 0, 0, 184, 184, 184, 184, 184, 184, 184, + 184, 185, 185, 185, 186, 187, 185, 184, 184, 188, 184, 184, 189, 190, 190, 190, + 190, 190, 190, 190, 0, 0, 0, 0, 0, 184, 184, 184, 184, 184, 191, 0, + 0, 2, 2, 2, 2, 2, 2, 2, 22, 22, 22, 22, 22, 22, 192, 193, + 194, 11, 11, 11, 46, 0, 0, 0, 0, 29, 74, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 65, 47, 0, 2, 2, 2, 2, 2, 9, 0, + 58, 195, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 0, 0, 0, 40, 116, 26, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 30, 2, 2, 2, 2, 2, 0, 58, 37, 0, 6, 120, 120, 120, 121, 0, + 0, 11, 11, 11, 49, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 46, 2, 2, 2, 2, 2, 2, 11, + 11, 2, 2, 2, 2, 2, 2, 22, 22, 2, 2, 2, 2, 2, 2, 2, + 20, 2, 2, 44, 44, 44, 92, 0, 0, O, O, O, GB, B, B, O, SB, O, SE, GB, O, O, WJ,FMPst,FMPst, O, CGJ, B, O, B,VMAbv,VMAbv, VMAbv, O,VMAbv, B,CMBlw,CMBlw,CMBlw,VMAbv,VMPst, VAbv, VPst,CMBlw, B, VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VPst, VPst, VPst, H, VPre, VPst,VMBlw, O, O, @@ -240,14 +255,15 @@ const hb_use_u8: [u8; 3187] = CMAbv, VAbv, MBlw, MPst, MBlw, H, O, VBlw, MPst, MPre, MAbv, MBlw, O, B, FAbv, FAbv, FPst, VBlw, B, B, VPre, O,VMPst, IS, O,VMPst, VBlw, VPst,VMBlw,VMBlw,VMAbv, O, IS,VMBlw, B,VMPst,VMAbv,VMPst, CS, CS, B, N, N, O, HN, VPre, VBlw, VAbv, - IS,CMAbv, O, VPst, B, R, R,CMBlw, VAbv, VPre,VMAbv,VMAbv, H, VAbv,CMBlw,FMAbv, - B, CS, CS, H,CMBlw,VMPst, H,VMPst, VAbv,VMAbv, VPst, IS, R, MPst, R, MPst, - CMBlw, B,FMBlw, VBlw,VMAbv, R, MBlw, MBlw, GB, FBlw, FBlw,CMAbv, IS, VBlw, IS, GB, - VAbv, R,VMPst, G, G, J, J, J, SB, SE, J, HR, G, G, HM, HM, - HM, O, VBlw, + IS,CMAbv, O, VPst, B, R, R,CMBlw, VAbv, VPre,VMAbv,VMAbv, H, VAbv,CMBlw,VMPst, + O,VMAbv,CMBlw, IS, R,FMAbv, B, CS, CS, H,CMBlw,VMPst, H,VMPst, VAbv,VMAbv, + VPst, MPst, R, MPst,CMBlw, B,FMBlw, VBlw,VMAbv, CS, SUB, SUB, GB, FBlw, FBlw,CMAbv, + IS, VBlw, IS, R, MBlw, GB, VAbv, R,VMPst, G, G, J, J, J, SB, SE, + J, HR, G, G, HM, HM, HM, G, O, MPre, MPre, MPst,VMAbv, MBlw, VBlw, O, + VBlw, ]; -const hb_use_u16: [u16; 808] = +const hb_use_u16: [u16; 856] = [ 0, 0, 1, 2, 0, 3, 0, 3, 0, 0, 4, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, @@ -276,28 +292,31 @@ const hb_use_u16: [u16; 808] = 0, 0, 0, 0, 0, 0, 0,164, 0, 0, 0, 0, 0, 0, 0,165, 0, 0, 0, 0, 0, 0, 0,166,166,167, 34,168, 0, 0, 0, 0, 169,170, 10,171, 95, 0, 0, 0, 0, 0, 0, 0, 70, 10,172, 0, - 10,173,174, 0, 0, 0, 0, 0, 10, 10,175, 2, 0, 0, 0, 0, - 10, 10,176,173, 0, 0, 0, 0, 0, 0, 0, 10,177,178, 0, 10, - 179, 0, 0,180,181, 0, 0, 0,182, 10, 10,183,184,185,186,187, - 188, 10, 10,189,190, 0, 0, 0,191, 10,192,193,194, 10, 10,195, - 188, 10, 10,196,197,106,198,103, 10, 34,199,200,201, 0, 0, 0, - 202,203, 95, 10, 10,204,205, 2,206, 21, 22,207,208,209,210,211, - 10, 10, 10,212,213,214,215, 0,198, 10, 10,216,217, 2, 0, 0, - 10, 10,218,219,220,221, 0, 0, 10, 10, 10,222,223, 2, 0, 0, - 10, 10,224,225, 2, 0, 0, 0, 10,226,227,104,228, 0, 0, 0, - 10, 10,229,230, 0, 0, 0, 0,231,232, 10,233,234, 2, 0, 0, - 0, 0,235, 10, 10,236,237, 0,238, 10, 10,239,240,241, 10, 10, - 242,243, 0, 0, 0, 0, 0, 0, 22, 10,218,244, 8, 10, 71, 19, - 10,245, 74,246, 0, 0, 0, 0,247, 10, 10,248,249, 2,250, 10, - 251,252, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,253, - 254, 49, 10,255,256, 2, 0, 0,257,257,257,257,257,257,257,257, - 257,257,257,258,259,260, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, - 10, 10, 10,261, 0, 0, 0, 0, 10, 10, 10, 10,262,263,264,264, - 265,266, 0, 0, 0, 0,267, 0, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10,268, 0, 0, 10, 10, 10, 10, 10, 10,106, 71, - 95,269, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,270, - 10, 10, 71,271,272, 0, 0, 0, 0, 10,273, 0, 10, 10,274, 2, - 0, 0, 0, 0, 0, 10,275, 2, 10, 10, 10, 10,276, 2, 0, 0, + 10,173,174, 0, 0, 0, 0, 0, 10, 10,175, 2, 9, 10,176, 10, + 177, 0, 0, 0, 0, 0, 0, 0, 10, 10,178,173, 0, 0, 0, 0, + 0, 0, 0, 10,179,180, 0, 10,181, 0, 0,182,183, 0, 0, 0, + 184, 10, 10,185,186,187,188,189,190, 10, 10,191,192, 0, 0, 0, + 193, 10,194,195,196, 10, 10,197,190, 10, 10,198,199,106,200,103, + 10, 34,201,202,203, 0, 0, 0,204,205, 95, 10, 10,206,207, 2, + 208, 21, 22,209,210,211,212,213,214, 10, 10,215,216,217,218, 0, + 10, 10, 10,219,220,221,222, 0,200, 10, 10,223,224, 2, 0, 0, + 10, 10,225,226,227,228, 0, 0, 10, 10, 10,229,230, 2, 0, 0, + 10, 10,231,232, 2, 10,141, 0, 10,233,234,104,235, 0, 0, 0, + 10, 10,236,237, 0, 0, 0, 0,238,239, 10,240,241, 2, 0, 0, + 0, 0,242, 10, 10,243,244, 0,245, 10, 10,246,247,248, 10, 10, + 249,250, 0, 0, 0, 0, 0, 0, 22, 10,225,251, 8, 10, 71, 19, + 10,252, 74,253, 0, 0, 0, 0,254, 10, 10,255,256, 2,257, 10, + 258,259, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10,260, + 261, 49, 10,262,263,264, 0, 0,265,265,265,265,265,265,265,265, + 265,265,265,266,267,268,265,265,265,265,265,265,265,265,265,269, + 10,270,271, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, + 10, 10, 10,272, 0, 0, 0, 0, 0, 0, 0, 0,273, 10,274, 2, + 10, 10, 10, 10,275,276,277,277,278,279, 0, 0, 0, 0,280, 0, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,177, 0,281, + 10, 10, 10, 10, 10, 10,106, 71, 95,282, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0,283, 10, 10, 71,284,285, 0, 0, 0, + 0, 10,286, 0, 10, 10,287, 2, 0, 0, 0, 0, 0, 10,288, 2, + 0, 0, 0, 0, 0, 10,289,106, 10, 10, 10, 10,290, 2, 0, 0, 130,130,130,130,130,130,130,130,163,163,163,163,163,163,163,163, 163,163,163,163,163,163,163,130, ]; @@ -308,7 +327,7 @@ fn hb_use_b4(a: &[u8], i: u32) -> u32 { pub(crate) fn hb_use_get_category(u: u32) -> u8 { if u<921600 { - hb_use_u8[2809+(((hb_use_u8[593+(((hb_use_u16[((hb_use_u8[113+(((hb_use_b4(&hb_use_u8, u>>1>>3>>3>>5) as usize)<<5)+((u as usize>>1>>3>>3)&31usize))] as usize)<<3)+((u as usize>>1>>3)&7usize)])<<3) as usize +((u as usize>>1)&7usize))] as usize)<<1)+((u as usize)&1usize))] + hb_use_u8[2953+(((hb_use_u8[625+(((hb_use_u16[((hb_use_u8[113+(((hb_use_b4(&hb_use_u8, u>>1>>3>>3>>5) as usize)<<5)+((u as usize>>1>>3>>3)&31usize))] as usize)<<3)+((u as usize>>1>>3)&7usize)])<<3) as usize +((u as usize>>1)&7usize))] as usize)<<1)+((u as usize)&1usize))] } else { O } From cc3c2792d3a06d8939092cbfa0085e0f25748c99 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:45:48 +0200 Subject: [PATCH 15/29] [Unicode 16] Send the new scripts to USE --- src/hb/ot_shaper.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/hb/ot_shaper.rs b/src/hb/ot_shaper.rs index 4eda4a58..ea8f7c27 100644 --- a/src/hb/ot_shaper.rs +++ b/src/hb/ot_shaper.rs @@ -324,6 +324,15 @@ pub fn hb_ot_shape_complex_categorize( | script::KAWI | script::NAG_MUNDARI + // Unicode-16.0 additions + | script::GARAY + | script::GURUNG_KHEMA + | script::KIRAT_RAI + | script::OL_ONAL + | script::SUNUWAR + | script::TODHRI + | script::TULU_TIGALARI + => { // If the designer designed the font for the 'DFLT' script, // (or we ended up arbitrarily pick 'latn'), use the default shaper. From 46c8339a736ddd64d9b4364303543a8f4df13f97 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:57:56 +0200 Subject: [PATCH 16/29] [buffer] Add hb_buffer_[sg]et_not_found_variation_selector_glyph() --- src/hb/buffer.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hb/buffer.rs b/src/hb/buffer.rs index 82402f93..fd69413c 100644 --- a/src/hb/buffer.rs +++ b/src/hb/buffer.rs @@ -341,6 +341,7 @@ pub struct hb_buffer_t { pub flags: BufferFlags, pub cluster_level: hb_buffer_cluster_level_t, pub invisible: Option, + pub not_found_variation_selector: Option, // Buffer contents. pub direction: Direction, @@ -398,6 +399,7 @@ impl hb_buffer_t { cluster_level: HB_BUFFER_CLUSTER_LEVEL_DEFAULT, invisible: None, scratch_flags: HB_BUFFER_SCRATCH_FLAG_DEFAULT, + not_found_variation_selector: None, max_len: Self::MAX_LEN_DEFAULT, max_ops: Self::MAX_OPS_DEFAULT, direction: Direction::Invalid, From a7c5485603a9e1e1a2c888ce63c50f60077aa55a Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 12:09:48 +0200 Subject: [PATCH 17/29] [buffer] Hook up not-found-variation-selector-glyph --- examples/shape.rs | 60 ++++++++++-------- scripts/gen-shaping-tests.py | 2 + src/hb/buffer.rs | 7 ++ src/hb/ot_layout.rs | 7 ++ src/hb/ot_shape_normalize.rs | 10 ++- ...c24004e776f348a0f72287d24b0124867ee750.ttf | Bin 0 -> 1656 bytes tests/shaping/in_house.rs | 27 ++++++++ tests/shaping/main.rs | 7 ++ 8 files changed, 92 insertions(+), 28 deletions(-) create mode 100644 tests/fonts/in-house/bbc24004e776f348a0f72287d24b0124867ee750.ttf diff --git a/examples/shape.rs b/examples/shape.rs index 1faf57cb..e7a66a78 100644 --- a/examples/shape.rs +++ b/examples/shape.rs @@ -6,32 +6,33 @@ USAGE: shape [OPTIONS] [TEXT] OPTIONS: - -h, --help Show help options - --version Show version number - --font-file PATH Set font file-name - --face-index INDEX Set face index [default: 0] - --font-ptem NUMBER Set font point-size - --variations LIST Set comma-separated list of font variations - --text TEXT Set input text - --text-file PATH Set input text file - -u, --unicodes LIST Set comma-separated list of input Unicode codepoints - Examples: 'U+0056,U+0057' - --direction DIRECTION Set text direction - [possible values: ltr, rtl, ttb, btt] - --language LANG Set text language [default: LC_CTYPE] - --script TAG Set text script as ISO-15924 tag - --utf8-clusters Use UTF-8 byte indices, not char indices - --cluster-level N Cluster merging level [default: 0] - [possible values: 0, 1, 2] - --features LIST Set comma-separated list of font features - --no-glyph-names Output glyph indices instead of names - --no-positions Do not output glyph positions - --no-advances Do not output glyph advances - --no-clusters Do not output cluster indices - --show-extents Output glyph extents - --show-flags Output glyph flags - --single-par Treat the input string as a single paragraph - --ned No Extra Data; Do not output clusters or advances + -h, --help Show help options + --version Show version number + --font-file PATH Set font file-name + --face-index INDEX Set face index [default: 0] + --font-ptem NUMBER Set font point-size + --variations LIST Set comma-separated list of font variations + --text TEXT Set input text + --text-file PATH Set input text file + -u, --unicodes LIST Set comma-separated list of input Unicode codepoints + Examples: 'U+0056,U+0057' + --direction DIRECTION Set text direction + [possible values: ltr, rtl, ttb, btt] + --language LANG Set text language [default: LC_CTYPE] + --script TAG Set text script as ISO-15924 tag + --not-found-variation-selector-glyph N Glyph value to replace not-found variation-selector characters with + --utf8-clusters Use UTF-8 byte indices, not char indices + --cluster-level N Cluster merging level [default: 0] + [possible values: 0, 1, 2] + --features LIST Set comma-separated list of font features + --no-glyph-names Output glyph indices instead of names + --no-positions Do not output glyph positions + --no-advances Do not output glyph advances + --no-clusters Do not output cluster indices + --show-extents Output glyph extents + --show-flags Output glyph flags + --single-par Treat the input string as a single paragraph + --ned No Extra Data; Do not output clusters or advances ARGS: A font file @@ -51,6 +52,7 @@ struct Args { direction: Option, language: rustybuzz::Language, script: Option, + not_found_variation_selector_glyph: Option, utf8_clusters: bool, cluster_level: rustybuzz::BufferClusterLevel, features: Vec, @@ -85,6 +87,8 @@ fn parse_args() -> Result { .unwrap_or(system_language()), script: args.opt_value_from_str("--script")?, utf8_clusters: args.contains("--utf8-clusters"), + not_found_variation_selector_glyph: args + .opt_value_from_str("--not-found-variation-selector-glyph")?, cluster_level: args .opt_value_from_fn("--cluster-level", parse_cluster)? .unwrap_or_default(), @@ -194,6 +198,10 @@ fn main() { buffer.reset_clusters(); } + if let Some(g) = args.not_found_variation_selector_glyph { + buffer.set_not_found_variation_selector_glyph(g); + } + let glyph_buffer = rustybuzz::shape(&face, &args.features, buffer); let mut format_flags = rustybuzz::SerializeFlags::default(); diff --git a/scripts/gen-shaping-tests.py b/scripts/gen-shaping-tests.py index 3d457393..53cb0a8f 100755 --- a/scripts/gen-shaping-tests.py +++ b/scripts/gen-shaping-tests.py @@ -88,6 +88,8 @@ def prune_test_options(options): options = options.replace(" --font-funcs=ot", "").replace("--font-funcs=ot", "") # we don't support font scaling options = options.replace("--font-size=1000", "") + # We don't support glyphs > u16 + options = options.replace("--not-found-variation-selector-glyph=1000000", "--not-found-variation-selector-glyph=64000") options = options.strip() return options diff --git a/src/hb/buffer.rs b/src/hb/buffer.rs index fd69413c..72391997 100644 --- a/src/hb/buffer.rs +++ b/src/hb/buffer.rs @@ -512,6 +512,7 @@ impl hb_buffer_t { self.serial = 0; self.scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT; self.cluster_level = HB_BUFFER_CLUSTER_LEVEL_DEFAULT; + self.not_found_variation_selector = None; } #[inline] @@ -1640,6 +1641,12 @@ impl UnicodeBuffer { self.0.language = Some(lang); } + /// Set the glyph value to replace not-found variation-selector characters with. + #[inline] + pub fn set_not_found_variation_selector_glyph(&mut self, glyph: u32) { + self.0.not_found_variation_selector = Some(glyph) + } + /// Get the buffer language. #[inline] pub fn language(&self) -> Option { diff --git a/src/hb/ot_layout.rs b/src/hb/ot_layout.rs index 649181c3..5e5fb3f8 100644 --- a/src/hb/ot_layout.rs +++ b/src/hb/ot_layout.rs @@ -493,6 +493,13 @@ pub(crate) fn _hb_glyph_info_is_default_ignorable(info: &hb_glyph_info_t) -> boo n != 0 && !_hb_glyph_info_substituted(info) } +#[inline] +pub(crate) fn _hb_glyph_info_clear_default_ignorable(info: &mut hb_glyph_info_t) { + let mut n = info.unicode_props(); + n &= !UnicodeProps::IGNORABLE.bits(); + info.set_unicode_props(n); +} + // static inline bool // _hb_glyph_info_is_default_ignorable_and_not_hidden (const hb_glyph_info_t *info) // { diff --git a/src/hb/ot_shape_normalize.rs b/src/hb/ot_shape_normalize.rs index b747f623..3fea8ae0 100644 --- a/src/hb/ot_shape_normalize.rs +++ b/src/hb/ot_shape_normalize.rs @@ -225,8 +225,14 @@ fn handle_variation_selector_cluster( // Just pass on the two characters separately, let GSUB do its magic. set_glyph(buffer.cur_mut(0), face); buffer.next_glyph(); - set_glyph(buffer.cur_mut(0), face); - buffer.next_glyph(); + + if let Some(not_found_variation_selector) = buffer.not_found_variation_selector { + _hb_glyph_info_clear_default_ignorable(buffer.cur_mut(0)); + next_char(buffer, not_found_variation_selector); + } else { + set_glyph(buffer.cur_mut(0), face); + buffer.next_glyph(); + } } // Skip any further variation selectors. diff --git a/tests/fonts/in-house/bbc24004e776f348a0f72287d24b0124867ee750.ttf b/tests/fonts/in-house/bbc24004e776f348a0f72287d24b0124867ee750.ttf new file mode 100644 index 0000000000000000000000000000000000000000..6967986c330fe03e29c1303a72d8ac024764a146 GIT binary patch literal 1656 zcmZ`(Urbw782_DnZwqbErATLRDz|iDMh9c<_!F!X2BTr?)~&!|$V5uZpTaHFwsT-& z;*$?%gvAFpafz?GE@m$-wo#v!XmG}8#)A(m`e5+E7|nE9#%TSWdkf;6=jMLr`igHJ-=w{Dk_*m)?IYrI#JeYx**SfW@-y*OiH+GN&*4IxGAo0^$!WXtxp2W>9CpN&E{gs9)o4(@XJIH+DSr+bcT)f1#FF%5nX7 zSCgDqMC8wVyEaxF&6OEK2eaQCQj=NMP|DNPumbIzow9Cxbc`dhF3gH->7}1amr6@E z?O)ld+FRm0s{q@Q&CeUCvu5=>j7yS#C<#O-#pVcx!$EXHUl(>1f$@qZNh>~(cCEv9 zl|V0N=foVYKr`=ZBsQNmFfx%Io5qL{&CcKn-fczK&|w`%x+4RWy^$^*O~=9^%Hv(9 zC?nxxlmn5Yw3_!U&u-omPbhVTal(uXOgiL*`%i&Rv%a$A(4y` zLVdZL1ZRO(zH-+1$ht}nYlXdM*1C!vV>5m2^p`QLUXvWw4kuSD%^s8yWVUP{49T6E zgZeN|mvzCeJyF&be(i_Ky1H#$+os#lz;D2cW2Xj0FpE*XS-y=#W0=4h7>FW`Q$%TG zkf3dbcjH47_QQ`hJa3|AqP7Qow#VzHUkdY#kzmwGa>y9cYd^Iht+QM=$fBc<3^B&n z(Zv;mb7adaW1=x;BJ+8TGR~a>0bb*>-3>S3cX?a#PXBO^?^*R}ZxMcGlSI8^mBf z|5cmO-fpyM9ip!B;K89lfW1Rb?G8B`=q+zWQTB3HP{+^f8#9;M|I0?kU(V&e{XCVc zd*ju!iPcr5uTWrCr$1SK;gWB9dYD;h9WokO>^?@@!@uA|&gS{gNxd$QOUB=RVsYN0 zN}o;Kx|J`cw0H9P?KQAyrwmD}=1gjV3#`aVZGjar>mg1S3Pghc9;okAWB&<{i*#-h Ysz*2O=2h3mhf1ZmU3^4ac&~WzZ_BD4^#A|> literal 0 HcmV?d00001 diff --git a/tests/shaping/in_house.rs b/tests/shaping/in_house.rs index a5a0ea56..8170e21b 100644 --- a/tests/shaping/in_house.rs +++ b/tests/shaping/in_house.rs @@ -17114,6 +17114,33 @@ fn use_017() { ); } +#[test] +fn variation_selectors_001() { + assert_eq!( + shape( + "tests/fonts/in-house/bbc24004e776f348a0f72287d24b0124867ee750.ttf", + "\u{0066}\u{FE00}\u{0069}", + "", + ), + "gid5=0+1134|\ + gid1=0+0" + ); +} + +#[test] +fn variation_selectors_002() { + assert_eq!( + shape( + "tests/fonts/in-house/bbc24004e776f348a0f72287d24b0124867ee750.ttf", + "\u{0066}\u{FE00}\u{0069}", + "--not-found-variation-selector-glyph=64000", + ), + "gid2=0+711|\ + gid64000=0+0|\ + gid3=2+497" + ); +} + #[test] fn variations_rvrn_001() { assert_eq!( diff --git a/tests/shaping/main.rs b/tests/shaping/main.rs index 8918e6f5..2aa8dc4d 100644 --- a/tests/shaping/main.rs +++ b/tests/shaping/main.rs @@ -20,6 +20,7 @@ struct Args { #[allow(dead_code)] remove_default_ignorables: bool, unsafe_to_concat: bool, + not_found_variation_selector_glyph: Option, cluster_level: rustybuzz::BufferClusterLevel, features: Vec, pre_context: Option, @@ -48,6 +49,8 @@ fn parse_args(args: Vec) -> Result { script: parser.opt_value_from_str("--script")?, remove_default_ignorables: parser.contains("--remove-default-ignorables"), unsafe_to_concat: parser.contains("--unsafe-to-concat"), + not_found_variation_selector_glyph: parser + .opt_value_from_str("--not-found-variation-selector-glyph")?, cluster_level: parser .opt_value_from_fn("--cluster-level", parse_cluster)? .unwrap_or_default(), @@ -133,6 +136,10 @@ pub fn shape(font_path: &str, text: &str, options: &str) -> String { buffer.set_direction(d); } + if let Some(g) = args.not_found_variation_selector_glyph { + buffer.set_not_found_variation_selector_glyph(g); + } + if let Some(lang) = args.language { buffer.set_language(lang); } From e43c9e752f9e8e7e12430bdcce18be196342c06b Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 19:38:21 +0200 Subject: [PATCH 18/29] Add Cargo.lock --- Cargo.lock | 342 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 Cargo.lock diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 00000000..4ae143d6 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,342 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bytemuck" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "core_maths" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b02505ccb8c50b0aa21ace0fc08c3e53adebd4e58caa18a36152803c7709a3" +dependencies = [ + "libm", +] + +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + +[[package]] +name = "indexmap-nostd" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e04e2fd2b8188ea827b32ef11de88377086d690286ab35747ef7f9bf3ccb590" + +[[package]] +name = "libc" +version = "0.2.159" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "multi-stash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "685a9ac4b61f4e728e1d2c6a7844609c16527aeb5e6c865915c08e619c16410f" + +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustybuzz" +version = "0.18.0" +dependencies = [ + "bitflags", + "bytemuck", + "core_maths", + "libc", + "log", + "pico-args", + "smallvec", + "ttf-parser", + "unicode-bidi-mirroring", + "unicode-ccc", + "unicode-properties", + "unicode-script", + "wasmi", +] + +[[package]] +name = "serde" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "string-interner" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c6a0d765f5807e98a091107bae0a56ea3799f66a5de47b2c84c94a39c09974e" +dependencies = [ + "cfg-if", + "hashbrown", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "ttf-parser" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be21190ff5d38e8b4a2d3b6a3ae57f612cc39c96e83cedeaf7abc338a8bac4a" +dependencies = [ + "core_maths", +] + +[[package]] +name = "unicode-bidi-mirroring" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64af057ad7466495ca113126be61838d8af947f41d93a949980b2389a118082f" + +[[package]] +name = "unicode-ccc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "260bc6647b3893a9a90668360803a15f96b85a5257b1c3a0c3daf6ae2496de42" + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-properties" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ea75f83c0137a9b98608359a5f1af8144876eb67bcb1ce837368e906a9f524" + +[[package]] +name = "unicode-script" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb421b350c9aff471779e262955939f565ec18b86c15364e6bdf0d662ca7c1f" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasmi" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81eacbefcfb4fc0d0af5424752e10895a131e1d0edb4b87554aac024bd294bdd" +dependencies = [ + "arrayvec", + "multi-stash", + "num-derive", + "num-traits", + "smallvec", + "spin", + "wasmi_collections", + "wasmi_core", + "wasmparser-nostd", +] + +[[package]] +name = "wasmi_collections" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d1ff23df2c456c8b5d9a0ae7eed03a40f0c4520466b4aa87135c5fc557476e8" +dependencies = [ + "ahash", + "hashbrown", + "string-interner", +] + +[[package]] +name = "wasmi_core" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac1b21ded145eb313d44a5895442c28e18904fb95718dc83893779f55945d342" +dependencies = [ + "downcast-rs", + "libm", + "num-traits", + "paste", +] + +[[package]] +name = "wasmparser-nostd" +version = "0.100.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5a015fe95f3504a94bb1462c717aae75253e39b9dd6c3fb1062c934535c64aa" +dependencies = [ + "indexmap-nostd", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] From 6edbf4ecbca2698a1efc8a8eaeffb69a426b9314 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 22:54:05 +0200 Subject: [PATCH 19/29] Follow up to variation-selector-not-found glyph --- src/hb/buffer.rs | 1 + src/hb/ot_layout.rs | 5 ++ src/hb/ot_shape.rs | 31 ++++++++- src/hb/ot_shape_normalize.rs | 17 +++-- src/hb/unicode.rs | 123 ++++++++++++++++++++++++++++++++++- tests/shaping/in_house.rs | 5 +- 6 files changed, 166 insertions(+), 16 deletions(-) diff --git a/src/hb/buffer.rs b/src/hb/buffer.rs index 72391997..1f7a1bb4 100644 --- a/src/hb/buffer.rs +++ b/src/hb/buffer.rs @@ -1555,6 +1555,7 @@ pub const HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT: u32 = 0x00000008; pub const HB_BUFFER_SCRATCH_FLAG_HAS_CGJ: u32 = 0x00000010; pub const HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS: u32 = 0x00000020; pub const HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE: u32 = 0x00000040; +pub const HB_BUFFER_SCRATCH_FLAG_HAS_VARIATION_SELECTOR_FALLBACK: u32 = 0x00000080; /* Reserved for shapers' internal use. */ pub const HB_BUFFER_SCRATCH_FLAG_SHAPER0: u32 = 0x01000000; diff --git a/src/hb/ot_layout.rs b/src/hb/ot_layout.rs index 5e5fb3f8..75212806 100644 --- a/src/hb/ot_layout.rs +++ b/src/hb/ot_layout.rs @@ -412,6 +412,11 @@ pub fn _hb_glyph_info_set_general_category( ) { /* Clears top-byte. */ let gen_cat = gen_cat.to_rb(); + _hb_glyph_info_set_general_category_from_u32(info, gen_cat); +} + +#[inline] +pub fn _hb_glyph_info_set_general_category_from_u32(info: &mut hb_glyph_info_t, gen_cat: u32) { let n = (gen_cat as u16) | (info.unicode_props() & (0xFF & !UnicodeProps::GENERAL_CATEGORY.bits())); info.set_unicode_props(n); diff --git a/src/hb/ot_shape.rs b/src/hb/ot_shape.rs index ef6aa8d8..5162b6ea 100644 --- a/src/hb/ot_shape.rs +++ b/src/hb/ot_shape.rs @@ -11,9 +11,9 @@ use crate::hb::aat_layout::hb_aat_layout_remove_deleted_glyphs; use crate::hb::algs::{rb_flag, rb_flag_unsafe}; use crate::hb::buffer::glyph_flag::{SAFE_TO_INSERT_TATWEEL, UNSAFE_TO_BREAK, UNSAFE_TO_CONCAT}; use crate::hb::unicode::hb_gc::{ - RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, - RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR, RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, - RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, + HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR, RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, + RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR, + RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, }; use crate::BufferFlags; use crate::{Direction, Feature, Language, Script}; @@ -355,6 +355,7 @@ fn substitute_post(ctx: &mut hb_ot_shape_context_t) { aat_layout::hb_aat_layout_remove_deleted_glyphs(ctx.buffer); } + deal_with_variation_selectors(ctx.buffer); hide_default_ignorables(ctx.buffer, ctx.face); if let Some(func) = ctx.plan.shaper.postprocess_glyphs { @@ -856,6 +857,30 @@ fn zero_width_default_ignorables(buffer: &mut hb_buffer_t) { } } +fn deal_with_variation_selectors(buffer: &mut hb_buffer_t) { + if !(buffer.scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_VARIATION_SELECTOR_FALLBACK != 0) { + return; + } + + // Note: In harfbuzz, this is part of the condition above (with OR), so it needs to stay + // in sync. + let Some(nf) = buffer.not_found_variation_selector else { + return; + }; + + let count = buffer.len; + let info = &mut buffer.info; + + for i in 0..count { + if _hb_glyph_info_get_general_category(&info[i]).to_rb() + == HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR + { + _hb_glyph_info_clear_default_ignorable(&mut info[i]); + info[i].glyph_id = nf; + } + } +} + fn zero_mark_widths_by_gdef(buffer: &mut hb_buffer_t, adjust_offsets: bool) { let len = buffer.len; for (info, pos) in buffer.info[..len].iter().zip(&mut buffer.pos[..len]) { diff --git a/src/hb/ot_shape_normalize.rs b/src/hb/ot_shape_normalize.rs index 3fea8ae0..b868b1ec 100644 --- a/src/hb/ot_shape_normalize.rs +++ b/src/hb/ot_shape_normalize.rs @@ -5,6 +5,7 @@ use super::ot_layout::*; use super::ot_shape_plan::hb_ot_shape_plan_t; use super::ot_shaper::{ComposeFn, DecomposeFn, MAX_COMBINING_MARKS}; use super::unicode::{hb_unicode_funcs_t, CharExt}; +use crate::hb::unicode::hb_gc::HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR; pub struct hb_ot_shape_normalize_context_t<'a> { pub plan: &'a hb_ot_shape_plan_t, @@ -226,13 +227,15 @@ fn handle_variation_selector_cluster( set_glyph(buffer.cur_mut(0), face); buffer.next_glyph(); - if let Some(not_found_variation_selector) = buffer.not_found_variation_selector { - _hb_glyph_info_clear_default_ignorable(buffer.cur_mut(0)); - next_char(buffer, not_found_variation_selector); - } else { - set_glyph(buffer.cur_mut(0), face); - buffer.next_glyph(); - } + buffer.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_VARIATION_SELECTOR_FALLBACK; + + _hb_glyph_info_set_general_category_from_u32( + buffer.cur_mut(0), + HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR, + ); + + set_glyph(buffer.cur_mut(0), face); + buffer.next_glyph(); } // Skip any further variation selectors. diff --git a/src/hb/unicode.rs b/src/hb/unicode.rs index cce460f0..d4222879 100644 --- a/src/hb/unicode.rs +++ b/src/hb/unicode.rs @@ -1,8 +1,121 @@ use core::convert::TryFrom; pub use unicode_ccc::CanonicalCombiningClass; +use unicode_properties::GeneralCategory; // TODO: prefer unic-ucd-normal::CanonicalCombiningClass -pub use unicode_properties::GeneralCategory as hb_unicode_general_category_t; + +// The reason this is duplicated from unicode_properties::GeneralCategory is +// that harfbuzz has a custom non-standard `VariationSelector` property which +// we need to add on our side, too. +#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)] +/// The most general classification of a character. +pub enum hb_unicode_general_category_t { + /// `Lu`, an uppercase letter + UppercaseLetter, + /// `Ll`, a lowercase letter + LowercaseLetter, + /// `Lt`, a digraphic character, with first part uppercase + TitlecaseLetter, + /// `Lm`, a modifier letter + ModifierLetter, + /// `Lo`, other letters, including syllables and ideographs + OtherLetter, + /// `Mn`, a nonspacing combining mark (zero advance width) + NonspacingMark, + /// `Mc`, a spacing combining mark (positive advance width) + SpacingMark, + /// `Me`, an enclosing combining mark + EnclosingMark, + /// `Nd`, a decimal digit + DecimalNumber, + /// `Nl`, a letterlike numeric character + LetterNumber, + /// `No`, a numeric character of other type + OtherNumber, + /// `Pc`, a connecting punctuation mark, like a tie + ConnectorPunctuation, + /// `Pd`, a dash or hyphen punctuation mark + DashPunctuation, + /// `Ps`, an opening punctuation mark (of a pair) + OpenPunctuation, + /// `Pe`, a closing punctuation mark (of a pair) + ClosePunctuation, + /// `Pi`, an initial quotation mark + InitialPunctuation, + /// `Pf`, a final quotation mark + FinalPunctuation, + /// `Po`, a punctuation mark of other type + OtherPunctuation, + /// `Sm`, a symbol of mathematical use + MathSymbol, + /// `Sc`, a currency sign + CurrencySymbol, + /// `Sk`, a non-letterlike modifier symbol + ModifierSymbol, + /// `So`, a symbol of other type + OtherSymbol, + /// `Zs`, a space character (of various non-zero widths) + SpaceSeparator, + /// `Zl`, U+2028 LINE SEPARATOR only + LineSeparator, + /// `Zp`, U+2029 PARAGRAPH SEPARATOR only + ParagraphSeparator, + /// `Cc`, a C0 or C1 control code + Control, + /// `Cf`, a format control character + Format, + /// `Cs`, a surrogate code point + Surrogate, + /// `Co`, a private-use character + PrivateUse, + /// `Cn`, a reserved unassigned code point or a noncharacter + Unassigned, + /// harfbuzz-private category. + VariationSelector, +} + +impl From for hb_unicode_general_category_t { + fn from(value: GeneralCategory) -> Self { + match value { + GeneralCategory::UppercaseLetter => hb_unicode_general_category_t::UppercaseLetter, + GeneralCategory::LowercaseLetter => hb_unicode_general_category_t::LowercaseLetter, + GeneralCategory::TitlecaseLetter => hb_unicode_general_category_t::TitlecaseLetter, + GeneralCategory::ModifierLetter => hb_unicode_general_category_t::ModifierLetter, + GeneralCategory::OtherLetter => hb_unicode_general_category_t::OtherLetter, + GeneralCategory::NonspacingMark => hb_unicode_general_category_t::NonspacingMark, + GeneralCategory::SpacingMark => hb_unicode_general_category_t::SpacingMark, + GeneralCategory::EnclosingMark => hb_unicode_general_category_t::EnclosingMark, + GeneralCategory::DecimalNumber => hb_unicode_general_category_t::DecimalNumber, + GeneralCategory::LetterNumber => hb_unicode_general_category_t::LetterNumber, + GeneralCategory::OtherNumber => hb_unicode_general_category_t::OtherNumber, + GeneralCategory::ConnectorPunctuation => { + hb_unicode_general_category_t::ConnectorPunctuation + } + GeneralCategory::DashPunctuation => hb_unicode_general_category_t::DashPunctuation, + GeneralCategory::OpenPunctuation => hb_unicode_general_category_t::OpenPunctuation, + GeneralCategory::ClosePunctuation => hb_unicode_general_category_t::ClosePunctuation, + GeneralCategory::InitialPunctuation => { + hb_unicode_general_category_t::InitialPunctuation + } + GeneralCategory::FinalPunctuation => hb_unicode_general_category_t::FinalPunctuation, + GeneralCategory::OtherPunctuation => hb_unicode_general_category_t::OtherPunctuation, + GeneralCategory::MathSymbol => hb_unicode_general_category_t::MathSymbol, + GeneralCategory::CurrencySymbol => hb_unicode_general_category_t::CurrencySymbol, + GeneralCategory::ModifierSymbol => hb_unicode_general_category_t::ModifierSymbol, + GeneralCategory::OtherSymbol => hb_unicode_general_category_t::OtherSymbol, + GeneralCategory::SpaceSeparator => hb_unicode_general_category_t::SpaceSeparator, + GeneralCategory::LineSeparator => hb_unicode_general_category_t::LineSeparator, + GeneralCategory::ParagraphSeparator => { + hb_unicode_general_category_t::ParagraphSeparator + } + GeneralCategory::Control => hb_unicode_general_category_t::Control, + GeneralCategory::Format => hb_unicode_general_category_t::Format, + GeneralCategory::Surrogate => hb_unicode_general_category_t::Surrogate, + GeneralCategory::PrivateUse => hb_unicode_general_category_t::PrivateUse, + GeneralCategory::Unassigned => hb_unicode_general_category_t::Unassigned, + } + } +} use crate::Script; @@ -258,6 +371,7 @@ impl GeneralCategoryExt for hb_unicode_general_category_t { hb_unicode_general_category_t::TitlecaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, hb_unicode_general_category_t::Unassigned => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, hb_unicode_general_category_t::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, + hb_unicode_general_category_t::VariationSelector => hb_gc::HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR } } @@ -293,7 +407,8 @@ impl GeneralCategoryExt for hb_unicode_general_category_t { hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER => hb_unicode_general_category_t::TitlecaseLetter, hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED => hb_unicode_general_category_t::Unassigned, hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER => hb_unicode_general_category_t::UppercaseLetter, - _ => unreachable!(), + hb_gc::HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR => hb_unicode_general_category_t::VariationSelector, + _ => unreachable!() } } @@ -497,7 +612,7 @@ impl CharExt for char { } fn general_category(self) -> hb_unicode_general_category_t { - unicode_properties::general_category::UnicodeGeneralCategory::general_category(self) + unicode_properties::general_category::UnicodeGeneralCategory::general_category(self).into() } fn space_fallback(self) -> hb_unicode_funcs_t::space_t { @@ -880,4 +995,6 @@ pub mod hb_gc { pub const RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR: u32 = 27; pub const RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR: u32 = 28; pub const RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR: u32 = 29; + // Hack. See: https://github.com/harfbuzz/harfbuzz/pull/4529#discussion_r1769638033 + pub const HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR: u32 = 30; } diff --git a/tests/shaping/in_house.rs b/tests/shaping/in_house.rs index 8170e21b..e0fb22b9 100644 --- a/tests/shaping/in_house.rs +++ b/tests/shaping/in_house.rs @@ -17135,9 +17135,8 @@ fn variation_selectors_002() { "\u{0066}\u{FE00}\u{0069}", "--not-found-variation-selector-glyph=64000", ), - "gid2=0+711|\ - gid64000=0+0|\ - gid3=2+497" + "gid5=0+1134|\ + gid64000=0+0" ); } From 7f9e05aa364312c30693aa5bd7216dbafde1d6e5 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:01:48 +0200 Subject: [PATCH 20/29] Don't make variation-selectors default-ignorable if not-found set --- src/hb/ot_shape.rs | 10 +++++++++- src/hb/ot_shape_normalize.rs | 4 ++++ tests/shaping/in_house.rs | 5 +++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/hb/ot_shape.rs b/src/hb/ot_shape.rs index 5162b6ea..4911e572 100644 --- a/src/hb/ot_shape.rs +++ b/src/hb/ot_shape.rs @@ -870,13 +870,21 @@ fn deal_with_variation_selectors(buffer: &mut hb_buffer_t) { let count = buffer.len; let info = &mut buffer.info; + let pos = &mut buffer.pos; for i in 0..count { if _hb_glyph_info_get_general_category(&info[i]).to_rb() == HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR { - _hb_glyph_info_clear_default_ignorable(&mut info[i]); info[i].glyph_id = nf; + pos[i].x_advance = 0; + pos[i].y_advance = 0; + pos[i].x_offset = 0; + pos[i].y_offset = 0; + _hb_glyph_info_set_general_category( + &mut info[i], + hb_unicode_general_category_t::NonspacingMark, + ) } } } diff --git a/src/hb/ot_shape_normalize.rs b/src/hb/ot_shape_normalize.rs index b868b1ec..6af4fe5e 100644 --- a/src/hb/ot_shape_normalize.rs +++ b/src/hb/ot_shape_normalize.rs @@ -234,6 +234,10 @@ fn handle_variation_selector_cluster( HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR, ); + if buffer.not_found_variation_selector.is_some() { + _hb_glyph_info_clear_default_ignorable(buffer.cur_mut(0)) + } + set_glyph(buffer.cur_mut(0), face); buffer.next_glyph(); } diff --git a/tests/shaping/in_house.rs b/tests/shaping/in_house.rs index e0fb22b9..8170e21b 100644 --- a/tests/shaping/in_house.rs +++ b/tests/shaping/in_house.rs @@ -17135,8 +17135,9 @@ fn variation_selectors_002() { "\u{0066}\u{FE00}\u{0069}", "--not-found-variation-selector-glyph=64000", ), - "gid5=0+1134|\ - gid64000=0+0" + "gid2=0+711|\ + gid64000=0+0|\ + gid3=2+497" ); } From fd7e2d4c6ccc968fb8ca415fd4e7c68302764632 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:08:02 +0200 Subject: [PATCH 21/29] Re-add _hb_glyph_info_is_default_ignorable_and_not_hidden --- src/hb/ot_layout.rs | 16 +++++++--------- src/hb/ot_layout_gsubgpos.rs | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/hb/ot_layout.rs b/src/hb/ot_layout.rs index 75212806..98a649c8 100644 --- a/src/hb/ot_layout.rs +++ b/src/hb/ot_layout.rs @@ -330,12 +330,12 @@ fn apply_backward(ctx: &mut OT::hb_ot_apply_context_t, lookup: &impl Apply) -> b /* Design: * unicode_props() is a two-byte number. The low byte includes: - * - General_Category: 5 bits. + * - Extended General_Category: 5 bits. * - A bit each for: * * Is it Default_Ignorable(); we have a modified Default_Ignorable(). * * Whether it's one of the four Mongolian Free Variation Selectors, * CGJ, or other characters that are hidden but should not be ignored - * like most other Default_Ignorable()s do during matching. + * like most other Default_Ignorable()s do during GSUB matching. * * Whether it's a grapheme continuation. * * The high-byte has different meanings, switched by the Gen-Cat: @@ -505,13 +505,11 @@ pub(crate) fn _hb_glyph_info_clear_default_ignorable(info: &mut hb_glyph_info_t) info.set_unicode_props(n); } -// static inline bool -// _hb_glyph_info_is_default_ignorable_and_not_hidden (const hb_glyph_info_t *info) -// { -// return ((info->unicode_props() & (UPROPS_MASK_IGNORABLE|UPROPS_MASK_HIDDEN)) -// == UPROPS_MASK_IGNORABLE) && -// !_hb_glyph_info_substituted (info); -// } +#[inline] +pub(crate) fn _hb_glyph_info_is_default_ignorable_and_not_hidden(info: &hb_glyph_info_t) -> bool { + let n = info.unicode_props() & (UnicodeProps::IGNORABLE.bits() | UnicodeProps::HIDDEN.bits()); + n == UnicodeProps::IGNORABLE.bits() && !_hb_glyph_info_substituted(info) +} // static inline void // _hb_glyph_info_unhide (hb_glyph_info_t *info) diff --git a/src/hb/ot_layout_gsubgpos.rs b/src/hb/ot_layout_gsubgpos.rs index 2ca248f7..1ad88535 100644 --- a/src/hb/ot_layout_gsubgpos.rs +++ b/src/hb/ot_layout_gsubgpos.rs @@ -384,7 +384,7 @@ impl<'a, 'b> skipping_iterator_t<'a, 'b> { return may_skip_t::SKIP_YES; } - if _hb_glyph_info_is_default_ignorable(info) + if _hb_glyph_info_is_default_ignorable_and_not_hidden(info) && !info.is_hidden() && (self.ignore_zwnj || !_hb_glyph_info_is_zwnj(info)) && (self.ignore_zwj || !_hb_glyph_info_is_zwj(info)) From a1f2bcefed051426ca10cda607ec0182ad851c83 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:21:42 +0200 Subject: [PATCH 22/29] Ignore CGJ and Mongolian Variation Selectors during GPOS --- src/hb/ot_layout.rs | 5 ++- src/hb/ot_layout_gsubgpos.rs | 7 ++-- ...e442574141a0304e780b27dd872519f7d229db.ttf | Bin 0 -> 6076 bytes tests/shaping/in_house.rs | 32 +++++++++++++----- 4 files changed, 31 insertions(+), 13 deletions(-) create mode 100644 tests/fonts/in-house/cee442574141a0304e780b27dd872519f7d229db.ttf diff --git a/src/hb/ot_layout.rs b/src/hb/ot_layout.rs index 98a649c8..11f10411 100644 --- a/src/hb/ot_layout.rs +++ b/src/hb/ot_layout.rs @@ -506,9 +506,8 @@ pub(crate) fn _hb_glyph_info_clear_default_ignorable(info: &mut hb_glyph_info_t) } #[inline] -pub(crate) fn _hb_glyph_info_is_default_ignorable_and_not_hidden(info: &hb_glyph_info_t) -> bool { - let n = info.unicode_props() & (UnicodeProps::IGNORABLE.bits() | UnicodeProps::HIDDEN.bits()); - n == UnicodeProps::IGNORABLE.bits() && !_hb_glyph_info_substituted(info) +pub(crate) fn _hb_glyph_info_is_hidden(info: &hb_glyph_info_t) -> bool { + (info.unicode_props() & UnicodeProps::HIDDEN.bits()) != 0 } // static inline void diff --git a/src/hb/ot_layout_gsubgpos.rs b/src/hb/ot_layout_gsubgpos.rs index 1ad88535..94b9b4cc 100644 --- a/src/hb/ot_layout_gsubgpos.rs +++ b/src/hb/ot_layout_gsubgpos.rs @@ -199,6 +199,7 @@ pub struct skipping_iterator_t<'a, 'b> { lookup_props: u32, ignore_zwnj: bool, ignore_zwj: bool, + ignore_hidden: bool, mask: hb_mask_t, syllable: u8, matching: Option<&'a match_func_t<'a>>, @@ -241,6 +242,8 @@ impl<'a, 'b> skipping_iterator_t<'a, 'b> { ignore_zwnj: ctx.table_index == TableIndex::GPOS || (context_match && ctx.auto_zwnj), // Ignore ZWJ if we are matching context, or asked to. ignore_zwj: context_match || ctx.auto_zwj, + // Ignore hidden glyphs (like CGJ) during GPOS. + ignore_hidden: ctx.table_index == TableIndex::GPOS, mask: if context_match { u32::MAX } else { @@ -384,10 +387,10 @@ impl<'a, 'b> skipping_iterator_t<'a, 'b> { return may_skip_t::SKIP_YES; } - if _hb_glyph_info_is_default_ignorable_and_not_hidden(info) - && !info.is_hidden() + if _hb_glyph_info_is_default_ignorable(info) && (self.ignore_zwnj || !_hb_glyph_info_is_zwnj(info)) && (self.ignore_zwj || !_hb_glyph_info_is_zwj(info)) + && (self.ignore_hidden || !_hb_glyph_info_is_hidden(info)) { return may_skip_t::SKIP_MAYBE; } diff --git a/tests/fonts/in-house/cee442574141a0304e780b27dd872519f7d229db.ttf b/tests/fonts/in-house/cee442574141a0304e780b27dd872519f7d229db.ttf new file mode 100644 index 0000000000000000000000000000000000000000..5842bf764c09f4591faa1bf5569baa9a7f616d55 GIT binary patch literal 6076 zcmb_g3vg7`8UD|`ckkZpBQJIn5@BZ{F9N&dv1}GAYJenUvWrBBl&USR5Rynn_33aU4gSmfC7NVmlqDIMivUGo98Molfg?DudR{bQqNyQAB8fRv^=mY_{Kj?&gKB zIQDGLJ?B5?KmX(V|8vg0AwfhgJT$7^P*>kzd->^qfcOPQf3=}`)0Xyq-(N~(jT0%m zHf*_Lo#OT1M5McjL}%mHwVPks^B(5Z7r;NT5%fo6Z&iRd0=i+-mWt{}mexIp`9{$9 zY-w5B^3aj}KZSfR=mXKc(Z0ZsYoT2H9(3uBu4u3G4rdkC{{Zy1U6?=gLC23kkAS|r zE!y9UHXHmb-)QUHzw3>L*B&8qG!t2rHt-kkf9EPuP77>oZ*Pt6EIYq;JJ!Qqs^I}+ ziMAQ*S$=VQS8QLwqv9u^he5CE?AZ~ul^Jh=#(MH}MfdemjbQs?puOGEuGX3}t=}ce z_!u&G_4f3~ns12f&<*|cXA&e*Dat~h$W7I>fp*fn;$HE0rAq!fm3>MAbC5`x)I&~s z5-n3R#lcd5*vFH11E=Gut;rf~ScSehg}Jy0vc5{M(lCAS1e4ihJL;vvqm{J$Xf4%Z z1V}d1N7O)PQPZvVJMwGC$*+DpHKI18M(ADeZR7Rn!m@>U{mdJ?`?qJUxk{?3|8$9Z z7jvAxf2<33U4vbhKoY&92YC9mfy{%ed| zcvMmpaXsa)^<6P zjr9U@tz_Ka)*hq3g7HbE?)L*x>SW*8UTh@zdarEhytB7#0=sJ>wte2X*@^)>MZa>2~F=j_#;)is*0e>5Jvgp(KO3DiAbR z1%h&Ln&@iX*)dIYba%vz{@&=0)|Ihc=DJ{&*{-Tj)0NUa8*q7?)VhX93%R0w(R<0& z8I5%VDaM$v?>HM6Q57ScbJl3IGe*s9GBvRxs$<=BOK}aFv825QU;N+4| zmL{;;l*Lbu4o94yIOXXzdXb){y@=R1kZrl?d5%5jIsYeI9-yAb21G~QiPnZdgR**Y!HhJK#p;~C~;t+P4!JY&t)S*w`lliggwlxd#=d$_X2 z3Z85w;du-EGqEn7g>~^X)|~Ao(qM;&=AHubyIcFZsZu_dcSZYl)6%ZpUAtxe^Et`8 z)bxe*PptwLVjWODNcYle*g1s^w1{F+jAthH1D`4?#gyHk*iI4Wv);rMScn(I3wS<0 z&ABJ_Tk#Y)fh0KjEMi;~tl*M|=&=uGyx6AeITq4FjG6f3iUlWEwqh!_j|$e_2aN{Z zO`CDIG`L*hPYFJuXE9LjL!KImr470@Vns?@0*rB;!Y1KArgiVdJtok)h0@Es~U-f`ys}!^Ji`!9#d2`2#cw z4L2Dbqseq%!0u1deFf`3O!rlsU%yHBHJti~(|rpN{$9GTWBeYokNdzuE$O}vd~4Hv zEBvZS_l+6(IQ1OxSK5O+H5*dRz8xMRnrtWTRzCIXX&=Q9^(f-qL!Ef1nrgRDD|J!F zG`$h@PS67wji7C#cC4IPKQs1Ca8AuPfWIGnQB*&&suk-7peG8QTS4_9wEf!0baz4YD zbd+zdFV94bnR6VR8e)#O=0F+P)nLF~6|By4yYxfhMm2oIVHiR=mtiE`;c(cMlbtuC zM zu1iaU0asq2x(1D|i(n|F+T7Re@?2EuehdwkwcnH)DS_(I&zd!jhTb{D0los-8idCoX(F z5YoZ>+E_U7O+@ORSy&X32WNT(t)ey9O--Lh0#S7rAqiI11VZVY2+cF}%AcluKXT-V zJtM=89N;{7`Ab4R$J=l?FFVH-lkty!aine1yDWHT?mLcjF_q(_szGj>d-iL%v$awE zYJQmEn!f{hcM!+uBV{$dvGFyq0H_B8`EFOBI=HG7n+hAvrJJz`1@d*BXKF(E`p=h^ ztWXv%R8?h#susn)MXI_&QPqV;j$Ysv$9$!$l~8%PyjEM3j8`LI{`R^W|MRTKqPeg(lsfsW9a#5WNWI(mw*fF(Eg%wTcSa`L>$ zJ#+rw7R$+#7;%mai}B&%q9Xga-Cj^2{^J}TJ`-0gInR0<@9H#!bp5YD(17F=gBK?PzpQQ3FkKs@u>;v2votj|)%rZ7 z`h1C8Orwxz2`_Lj_jroz56f8G34DI(6;+k+9N>HfkgPewDDy^Y7;spS9`k@JRKvzt zI3`^1<7$T4g}^(La{k)IPZ~9 z<(8H@fr8ASaT19FaS>(w?Gu;Ioj8$`Gj?ul%xb;#-Z)sM@ftdyUis&cA4{%FE5DBw zF;1M>rcxX-3L%4i$eJ_D48Du zU)Ax4;_CdIHgkc$dHFj%PNRvzIC&C-=6EH>!(Op4AODL|`Q%KeGCB$pS_sG4D_2gR zzIgG%J18Fv>K89uy>j{7NL)`&Q1Wsznb0PZl$=O_zI=5wNsz>XuohvqZ~~x6N%m|< zMt;6w7#VqadLkp!nVFfHVX-*u33q0uW*GL&O#I&pgLcEvEEe1-GG6$fBH6rx65`~^ bK~=f*ugO79pg2C)ohAq0Dg1?e5QY8+ Date: Wed, 25 Sep 2024 23:24:32 +0200 Subject: [PATCH 23/29] [USE] Update the data files --- .../IndicPositionalCategory-Additional.txt | 1 + src/hb/ot_shaper_use_table.rs | 153 +++++++++--------- 2 files changed, 77 insertions(+), 77 deletions(-) diff --git a/scripts/ms-use/IndicPositionalCategory-Additional.txt b/scripts/ms-use/IndicPositionalCategory-Additional.txt index b607241e..42dd1547 100644 --- a/scripts/ms-use/IndicPositionalCategory-Additional.txt +++ b/scripts/ms-use/IndicPositionalCategory-Additional.txt @@ -94,6 +94,7 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN 07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE 07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order 1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1B6C ; Top # Mn BALINESE MUSICAL SYMBOL COMBINING ENDEP 1CF8..1CF9 ; Top # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE 10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK diff --git a/src/hb/ot_shaper_use_table.rs b/src/hb/ot_shaper_use_table.rs index 8b4d40ec..1ca8fc69 100644 --- a/src/hb/ot_shaper_use_table.rs +++ b/src/hb/ot_shaper_use_table.rs @@ -49,7 +49,7 @@ use super::ot_shaper_use::category::*; -const hb_use_u8: [u8; 3345] = +const hb_use_u8: [u8; 3343] = [ 16, 50, 51, 51, 51, 52, 51, 83, 118, 131, 57, 58, 59, 195, 211, 62, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, @@ -147,91 +147,91 @@ const hb_use_u8: [u8; 3345] = 0, 2, 2, 100, 101, 102, 103, 61, 63, 104, 16, 45, 22, 59, 21, 80, 48, 48, 76, 11, 11, 11, 105, 46, 40, 11, 106, 74, 2, 2, 2, 2, 2, 2, 2, 107, 22, 20, 20, 22, 48, 48, 22, 108, 2, 2, 2, 9, - 0, 0, 0, 0, 0, 0, 109, 110, 111, 111, 111, 0, 0, 0, 0, 0, - 0, 106, 74, 2, 2, 2, 2, 2, 2, 60, 61, 59, 25, 22, 112, 61, + 0, 0, 0, 0, 0, 0, 109, 110, 110, 110, 110, 0, 0, 0, 0, 0, + 0, 106, 74, 2, 2, 2, 2, 2, 2, 60, 61, 59, 25, 22, 111, 61, 2, 2, 2, 2, 107, 22, 23, 45, 45, 102, 14, 0, 0, 0, 0, 0, - 0, 2, 2, 61, 18, 48, 23, 113, 102, 102, 102, 114, 115, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 0, 30, 2, 11, 46, 116, 116, 116, 11, 116, - 116, 15, 116, 116, 116, 26, 0, 40, 0, 0, 0, 117, 51, 11, 5, 0, - 0, 0, 0, 0, 0, 0, 118, 0, 0, 0, 0, 0, 0, 0, 6, 119, - 120, 42, 42, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 120, - 121, 120, 120, 120, 120, 120, 120, 120, 120, 0, 0, 122, 0, 0, 0, 0, - 0, 0, 7, 122, 0, 0, 0, 0, 0, 46, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 123, 123, 0, 0, + 0, 2, 2, 61, 18, 48, 23, 112, 102, 102, 102, 113, 114, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 0, 30, 2, 11, 46, 115, 115, 115, 11, 115, + 115, 15, 115, 115, 115, 26, 0, 40, 0, 0, 0, 116, 51, 11, 5, 0, + 0, 0, 0, 0, 0, 0, 117, 0, 0, 0, 0, 0, 0, 0, 6, 118, + 119, 42, 42, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 119, + 120, 119, 119, 119, 119, 119, 119, 119, 119, 0, 0, 121, 0, 0, 0, 0, + 0, 0, 7, 121, 0, 0, 0, 0, 0, 46, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 122, 122, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, - 124, 0, 123, 123, 0, 0, 0, 0, 0, 2, 53, 2, 108, 2, 10, 2, + 123, 0, 122, 122, 0, 0, 0, 0, 0, 2, 53, 2, 108, 2, 10, 2, 2, 2, 65, 19, 16, 0, 0, 31, 0, 2, 2, 0, 0, 0, 0, 0, - 0, 29, 2, 2, 2, 2, 2, 2, 2, 2, 2, 125, 23, 23, 23, 23, - 23, 23, 23, 126, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 2, 0, 0, 0, 0, 0, 52, 2, 2, 2, 22, 22, 127, 116, - 0, 2, 2, 2, 128, 20, 59, 20, 113, 102, 129, 0, 0, 0, 0, 0, - 0, 11, 130, 2, 2, 2, 2, 2, 2, 2, 131, 23, 22, 20, 48, 132, - 133, 134, 0, 0, 0, 0, 0, 0, 0, 2, 2, 52, 30, 2, 2, 2, - 2, 2, 2, 2, 2, 10, 22, 59, 99, 76, 135, 136, 137, 0, 0, 0, - 0, 2, 138, 2, 2, 2, 2, 139, 0, 30, 2, 42, 5, 0, 79, 15, - 2, 53, 22, 140, 52, 53, 2, 2, 105, 10, 9, 0, 0, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 141, 21, 25, 0, 0, 142, 143, 0, 0, 0, - 0, 2, 65, 45, 23, 80, 47, 144, 0, 81, 81, 81, 81, 81, 81, 81, - 81, 0, 0, 0, 0, 0, 0, 0, 6, 120, 120, 120, 120, 121, 0, 0, + 0, 29, 2, 2, 2, 2, 2, 2, 2, 2, 2, 124, 23, 23, 23, 23, + 23, 23, 23, 125, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 2, 0, 0, 0, 0, 0, 52, 2, 2, 2, 22, 22, 126, 115, + 0, 2, 2, 2, 127, 20, 59, 20, 112, 102, 128, 0, 0, 0, 0, 0, + 0, 11, 129, 2, 2, 2, 2, 2, 2, 2, 130, 23, 22, 20, 48, 131, + 132, 133, 0, 0, 0, 0, 0, 0, 0, 2, 2, 52, 30, 2, 2, 2, + 2, 2, 2, 2, 2, 10, 22, 59, 99, 76, 134, 135, 136, 0, 0, 0, + 0, 2, 137, 2, 2, 2, 2, 138, 0, 30, 2, 42, 5, 0, 79, 15, + 2, 53, 22, 139, 52, 53, 2, 2, 105, 10, 9, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 140, 21, 25, 0, 0, 141, 142, 0, 0, 0, + 0, 2, 65, 45, 23, 80, 47, 143, 0, 81, 81, 81, 81, 81, 81, 81, + 81, 0, 0, 0, 0, 0, 0, 0, 6, 119, 119, 119, 119, 120, 0, 0, 0, 2, 2, 2, 2, 2, 9, 2, 2, 2, 9, 2, 30, 2, 2, 2, - 2, 2, 30, 2, 2, 2, 30, 9, 0, 128, 20, 27, 31, 0, 0, 145, - 146, 2, 2, 30, 2, 30, 2, 2, 2, 2, 2, 2, 0, 14, 37, 0, - 147, 2, 2, 13, 37, 0, 30, 2, 2, 2, 0, 0, 0, 0, 0, 0, + 2, 2, 30, 2, 2, 2, 30, 9, 0, 127, 20, 27, 31, 0, 0, 144, + 145, 2, 2, 30, 2, 30, 2, 2, 2, 2, 2, 2, 0, 14, 37, 0, + 146, 2, 2, 13, 37, 0, 30, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 2, 2, 9, 2, 2, 11, 41, 0, 0, 0, 0, 2, 2, 2, 0, 27, 22, 22, 30, 2, 2, 2, 0, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 27, 38, 0, 2, 2, 2, 116, 116, 116, 116, - 116, 148, 2, 9, 0, 0, 0, 0, 0, 2, 14, 14, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 27, 38, 0, 2, 2, 2, 115, 115, 115, 115, + 115, 147, 2, 9, 0, 0, 0, 0, 0, 2, 14, 14, 0, 0, 0, 0, 0, 9, 2, 2, 9, 2, 2, 2, 2, 30, 2, 9, 0, 30, 2, 0, - 0, 149, 150, 151, 2, 2, 2, 2, 2, 2, 2, 2, 2, 22, 22, 20, - 20, 20, 22, 22, 134, 0, 0, 0, 0, 0, 152, 152, 152, 152, 152, 152, - 152, 152, 152, 152, 2, 2, 2, 2, 2, 53, 52, 53, 0, 0, 0, 0, - 153, 11, 74, 2, 2, 2, 2, 2, 2, 18, 19, 21, 16, 24, 37, 0, + 0, 148, 149, 150, 2, 2, 2, 2, 2, 2, 2, 2, 2, 22, 22, 20, + 20, 20, 22, 22, 133, 0, 0, 0, 0, 0, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 2, 2, 2, 2, 2, 53, 52, 53, 0, 0, 0, 0, + 152, 11, 74, 2, 2, 2, 2, 2, 2, 18, 19, 21, 16, 24, 37, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 11, 49, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 128, 20, 22, 154, 22, 21, 155, 156, 2, 2, 2, 2, - 2, 0, 0, 65, 157, 0, 0, 0, 0, 2, 13, 0, 0, 0, 0, 0, - 0, 2, 65, 25, 20, 20, 20, 22, 22, 108, 158, 0, 0, 56, 159, 31, - 160, 30, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 23, - 19, 22, 22, 161, 44, 0, 0, 0, 49, 128, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 127, 20, 22, 153, 22, 21, 154, 155, 2, 2, 2, 2, + 2, 0, 0, 65, 156, 0, 0, 0, 0, 2, 13, 0, 0, 0, 0, 0, + 0, 2, 65, 25, 20, 20, 20, 22, 22, 108, 157, 0, 0, 56, 158, 31, + 159, 30, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 23, + 19, 22, 22, 160, 44, 0, 0, 0, 49, 127, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 9, 9, 2, 2, 30, 2, 2, 2, 2, 2, 2, 2, - 30, 2, 2, 2, 2, 2, 2, 2, 10, 18, 19, 21, 22, 162, 31, 0, + 30, 2, 2, 2, 2, 2, 2, 2, 10, 18, 19, 21, 22, 161, 31, 0, 0, 11, 11, 30, 2, 2, 2, 9, 30, 9, 2, 30, 2, 2, 58, 17, 23, 16, 23, 47, 32, 33, 32, 34, 0, 0, 0, 0, 35, 0, 0, 0, 2, 2, 23, 0, 11, 11, 11, 46, 0, 11, 11, 46, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 30, 0, 9, 2, 2, 2, 30, 45, 59, 20, - 20, 31, 33, 32, 32, 25, 163, 29, 164, 165, 37, 0, 0, 0, 0, 0, + 20, 31, 33, 32, 32, 25, 162, 29, 163, 164, 37, 0, 0, 0, 0, 0, 0, 12, 26, 0, 0, 0, 0, 0, 0, 2, 2, 65, 25, 20, 20, 20, - 22, 23, 126, 15, 17, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, - 166, 167, 0, 0, 0, 0, 0, 0, 0, 18, 19, 20, 20, 66, 99, 25, - 160, 11, 168, 9, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, - 65, 25, 20, 20, 0, 48, 48, 11, 169, 37, 0, 0, 0, 0, 0, 0, + 22, 23, 125, 15, 17, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, + 165, 166, 0, 0, 0, 0, 0, 0, 0, 18, 19, 20, 20, 66, 99, 25, + 159, 11, 167, 9, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, + 65, 25, 20, 20, 0, 48, 48, 11, 168, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 20, 0, 23, 19, 20, 20, 21, 16, 82, - 169, 38, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 10, 170, - 25, 20, 22, 22, 168, 9, 0, 0, 0, 2, 2, 2, 2, 2, 9, 43, - 136, 23, 22, 20, 76, 21, 22, 0, 0, 2, 2, 2, 9, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 2, 18, 19, 20, 21, 22, 105, 169, 37, 0, + 168, 38, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 10, 169, + 25, 20, 22, 22, 167, 9, 0, 0, 0, 2, 2, 2, 2, 2, 9, 43, + 135, 23, 22, 20, 76, 21, 22, 0, 0, 2, 2, 2, 9, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 18, 19, 20, 21, 22, 105, 168, 37, 0, 0, 2, 2, 2, 9, 30, 0, 2, 2, 2, 2, 30, 9, 2, 2, 2, - 2, 23, 23, 18, 32, 33, 12, 171, 165, 172, 173, 0, 0, 0, 0, 0, + 2, 23, 23, 18, 32, 33, 12, 170, 164, 171, 172, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 65, 25, 20, 20, 0, 22, 23, - 29, 108, 0, 33, 0, 0, 0, 0, 0, 52, 20, 22, 22, 22, 140, 2, - 2, 2, 174, 175, 11, 15, 176, 61, 177, 0, 0, 1, 147, 0, 0, 0, - 0, 52, 20, 22, 16, 19, 20, 2, 2, 2, 2, 158, 158, 158, 178, 178, - 178, 178, 178, 178, 15, 179, 0, 30, 0, 22, 20, 20, 31, 22, 22, 11, - 169, 0, 61, 61, 61, 61, 61, 61, 61, 66, 21, 82, 46, 0, 0, 0, + 29, 108, 0, 33, 0, 0, 0, 0, 0, 52, 20, 22, 22, 22, 139, 2, + 2, 2, 173, 174, 11, 15, 175, 61, 176, 0, 0, 1, 146, 0, 0, 0, + 0, 52, 20, 22, 16, 19, 20, 2, 2, 2, 2, 157, 157, 157, 177, 177, + 177, 177, 177, 177, 15, 178, 0, 30, 0, 22, 20, 20, 31, 22, 22, 11, + 168, 0, 61, 61, 61, 61, 61, 61, 61, 66, 21, 82, 46, 0, 0, 0, 0, 2, 2, 2, 9, 2, 30, 2, 2, 52, 22, 22, 31, 0, 38, 22, - 27, 11, 159, 180, 181, 0, 0, 0, 0, 2, 2, 2, 30, 9, 2, 2, + 27, 11, 158, 179, 180, 0, 0, 0, 0, 2, 2, 2, 30, 9, 2, 2, 2, 2, 2, 2, 2, 2, 23, 23, 47, 22, 35, 82, 68, 0, 0, 0, - 0, 2, 182, 66, 47, 0, 0, 0, 0, 11, 183, 2, 2, 2, 2, 2, - 2, 2, 2, 23, 22, 20, 31, 0, 48, 16, 143, 0, 0, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 156, 0, 0, 184, 184, 184, 184, 184, 184, 184, - 184, 185, 185, 185, 186, 187, 185, 184, 184, 188, 184, 184, 189, 190, 190, 190, - 190, 190, 190, 190, 0, 0, 0, 0, 0, 184, 184, 184, 184, 184, 191, 0, - 0, 2, 2, 2, 2, 2, 2, 2, 22, 22, 22, 22, 22, 22, 192, 193, - 194, 11, 11, 11, 46, 0, 0, 0, 0, 29, 74, 2, 2, 2, 2, 2, + 0, 2, 181, 66, 47, 0, 0, 0, 0, 11, 182, 2, 2, 2, 2, 2, + 2, 2, 2, 23, 22, 20, 31, 0, 48, 16, 142, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 155, 0, 0, 183, 183, 183, 183, 183, 183, 183, + 183, 184, 184, 184, 185, 186, 184, 183, 183, 187, 183, 183, 188, 189, 189, 189, + 189, 189, 189, 189, 0, 0, 0, 0, 0, 183, 183, 183, 183, 183, 190, 0, + 0, 2, 2, 2, 2, 2, 2, 2, 22, 22, 22, 22, 22, 22, 191, 192, + 193, 11, 11, 11, 46, 0, 0, 0, 0, 29, 74, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 65, 47, 0, 2, 2, 2, 2, 2, 9, 0, - 58, 195, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 0, 0, 0, 40, 116, 26, 0, 0, 0, 0, 0, + 58, 194, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 0, 0, 0, 40, 115, 26, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 30, 2, 2, 2, 2, 2, 0, 58, 37, 0, 6, 120, 120, 120, 121, 0, + 30, 2, 2, 2, 2, 2, 0, 58, 37, 0, 6, 119, 119, 119, 120, 0, 0, 11, 11, 11, 49, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 46, 2, 2, 2, 2, 2, 2, 11, 11, 2, 2, 2, 2, 2, 2, 22, 22, 2, 2, 2, 2, 2, 2, 2, @@ -249,18 +249,17 @@ const hb_use_u8: [u8; 3345] = FMAbv, FAbv,CMAbv,FMAbv,VMAbv,FMAbv, VAbv, IS,FMAbv, B,FMAbv, B, CGJ, WJ, CGJ, GB, CMAbv,CMAbv, B, GB, B, VAbv, SUB, FPst, FPst,VMBlw, FPst, FPst, FBlw,VMAbv,FMBlw, VAbv, VPre, B, MPre, MBlw, SUB, FAbv, FAbv, MAbv, SUB, Sk, VPst, VAbv,VMAbv,VMAbv, FAbv,CMAbv, - VPst, H, B, O,SMAbv,SMBlw,SMAbv,SMAbv,SMAbv, VPst, IS, VBlw, FAbv,VMPre,VMPre,FMAbv, - CMBlw,VMBlw,VMBlw,VMAbv, CS, O,FMAbv, ZWNJ, CGJ, WJ, WJ, WJ, O,FMPst, O, SB, - SE, O, H, MPst, VPst, H,VMAbv, VAbv,VMBlw, B, VBlw, FPst, VPst, FAbv,VMPst, B, - CMAbv, VAbv, MBlw, MPst, MBlw, H, O, VBlw, MPst, MPre, MAbv, MBlw, O, B, FAbv, FAbv, - FPst, VBlw, B, B, VPre, O,VMPst, IS, O,VMPst, VBlw, VPst,VMBlw,VMBlw,VMAbv, O, - IS,VMBlw, B,VMPst,VMAbv,VMPst, CS, CS, B, N, N, O, HN, VPre, VBlw, VAbv, - IS,CMAbv, O, VPst, B, R, R,CMBlw, VAbv, VPre,VMAbv,VMAbv, H, VAbv,CMBlw,VMPst, - O,VMAbv,CMBlw, IS, R,FMAbv, B, CS, CS, H,CMBlw,VMPst, H,VMPst, VAbv,VMAbv, - VPst, MPst, R, MPst,CMBlw, B,FMBlw, VBlw,VMAbv, CS, SUB, SUB, GB, FBlw, FBlw,CMAbv, - IS, VBlw, IS, R, MBlw, GB, VAbv, R,VMPst, G, G, J, J, J, SB, SE, - J, HR, G, G, HM, HM, HM, G, O, MPre, MPre, MPst,VMAbv, MBlw, VBlw, O, - VBlw, + VPst, H, B, O,SMAbv,SMAbv,SMAbv, VPst, IS, VBlw, FAbv,VMPre,VMPre,FMAbv,CMBlw,VMBlw, + VMBlw,VMAbv, CS, O,FMAbv, ZWNJ, CGJ, WJ, WJ, WJ, O,FMPst, O, SB, SE, O, + H, MPst, VPst, H,VMAbv, VAbv,VMBlw, B, VBlw, FPst, VPst, FAbv,VMPst, B,CMAbv, VAbv, + MBlw, MPst, MBlw, H, O, VBlw, MPst, MPre, MAbv, MBlw, O, B, FAbv, FAbv, FPst, VBlw, + B, B, VPre, O,VMPst, IS, O,VMPst, VBlw, VPst,VMBlw,VMBlw,VMAbv, O, IS,VMBlw, + B,VMPst,VMAbv,VMPst, CS, CS, B, N, N, O, HN, VPre, VBlw, VAbv, IS,CMAbv, + O, VPst, B, R, R,CMBlw, VAbv, VPre,VMAbv,VMAbv, H, VAbv,CMBlw,VMPst, O,VMAbv, + CMBlw, IS, R,FMAbv, B, CS, CS, H,CMBlw,VMPst, H,VMPst, VAbv,VMAbv, VPst, MPst, + R, MPst,CMBlw, B,FMBlw, VBlw,VMAbv, CS, SUB, SUB, GB, FBlw, FBlw,CMAbv, IS, VBlw, + IS, R, MBlw, GB, VAbv, R,VMPst, G, G, J, J, J, SB, SE, J, HR, + G, G, HM, HM, HM, G, O, MPre, MPre, MPst,VMAbv, MBlw, VBlw, O, VBlw, ]; const hb_use_u16: [u16; 856] = From ec0e44453c1a0f268e0116da4484ee8dbafb51a9 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:24:37 +0200 Subject: [PATCH 24/29] Remove unused method --- src/hb/buffer.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/hb/buffer.rs b/src/hb/buffer.rs index 1f7a1bb4..b6373f9d 100644 --- a/src/hb/buffer.rs +++ b/src/hb/buffer.rs @@ -268,11 +268,6 @@ impl hb_glyph_info_t { self.set_unicode_props(props); } - #[inline] - pub(crate) fn is_hidden(&self) -> bool { - self.unicode_props() & UnicodeProps::HIDDEN.bits() != 0 - } - #[inline] pub(crate) fn unhide(&mut self) { let mut n = self.unicode_props(); From ee9c923355d1578c8654cce9e7c4874d9037c873 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:27:29 +0200 Subject: [PATCH 25/29] Bump MacOS in CI --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cc2572c5..98ee8edd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -10,7 +10,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-14] + os: [ubuntu-latest, macos-15] rust: [stable] steps: - name: Checkout From 1b89c9e0abb435134bab98c09bb07cc8a80ded05 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Thu, 26 Sep 2024 10:07:24 +0200 Subject: [PATCH 26/29] Bump more unicode crates --- Cargo.lock | 8 ++++---- Cargo.toml | 4 ++-- src/hb/unicode.rs | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4ae143d6..a85bbeab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -238,15 +238,15 @@ dependencies = [ [[package]] name = "unicode-bidi-mirroring" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64af057ad7466495ca113126be61838d8af947f41d93a949980b2389a118082f" +checksum = "5dfa6e8c60bb66d49db113e0125ee8711b7647b5579dc7f5f19c42357ed039fe" [[package]] name = "unicode-ccc" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "260bc6647b3893a9a90668360803a15f96b85a5257b1c3a0c3daf6ae2496de42" +checksum = "ce61d488bcdc9bc8b5d1772c404828b17fc481c0a582b5581e95fb233aef503e" [[package]] name = "unicode-ident" diff --git a/Cargo.toml b/Cargo.toml index c0ed8bbc..4d395c7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,8 +17,8 @@ bitflags = "2.4.1" bytemuck = { version = "1.5", features = ["extern_crate_alloc"] } core_maths = "0.1.0" # only for no_std builds smallvec = "1.6" -unicode-bidi-mirroring = "0.3.0" -unicode-ccc = "0.3.0" +unicode-bidi-mirroring = "0.4.0" +unicode-ccc = "0.4.0" unicode-properties = { version = "0.1.2", default-features = false, features = ["general-category"] } unicode-script = "0.5.2" wasmi = { version = "0.36.0", optional = true } diff --git a/src/hb/unicode.rs b/src/hb/unicode.rs index d4222879..f8a72554 100644 --- a/src/hb/unicode.rs +++ b/src/hb/unicode.rs @@ -955,8 +955,8 @@ pub fn decompose_hangul(ab: char) -> Option<(char, char)> { mod tests { #[test] fn check_unicode_version() { - assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (15, 1, 0)); - assert_eq!(unicode_ccc::UNICODE_VERSION, (15, 0, 0)); + assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (16, 0, 0)); + assert_eq!(unicode_ccc::UNICODE_VERSION, (16, 0, 0)); assert_eq!(unicode_properties::UNICODE_VERSION, (15, 1, 0)); assert_eq!(unicode_script::UNICODE_VERSION, (16, 0, 0)); assert_eq!(crate::hb::unicode_norm::UNICODE_VERSION, (16, 0, 0)); From d4bebc6eb3b0d4811522f52a4338de76ad5c8be3 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Thu, 26 Sep 2024 10:25:55 +0200 Subject: [PATCH 27/29] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b50f5a79..e8e8244b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ `rustybuzz` is a complete [harfbuzz](https://github.com/harfbuzz/harfbuzz)'s shaping algorithm port to Rust. -Matches `harfbuzz` v9.0.0 +Matches `harfbuzz` v10.0.1, commit [c7ef6a2e](https://github.com/harfbuzz/harfbuzz/commit/c7ef6a2ed58ae8ec108ee0962bef46f42c73a60c) (one commit after v10.0.1) ## Why? From f67f55506fb932e8299fa39cd7e6129ac75dbf98 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Thu, 26 Sep 2024 10:45:05 +0200 Subject: [PATCH 28/29] Remove the hack re variation-selectors --- src/hb/buffer.rs | 5 +- src/hb/ot_layout.rs | 30 +++++++-- src/hb/ot_shape.rs | 15 ++--- src/hb/ot_shape_normalize.rs | 6 +- src/hb/unicode.rs | 121 +---------------------------------- 5 files changed, 35 insertions(+), 142 deletions(-) diff --git a/src/hb/buffer.rs b/src/hb/buffer.rs index b6373f9d..f1853242 100644 --- a/src/hb/buffer.rs +++ b/src/hb/buffer.rs @@ -248,7 +248,7 @@ impl hb_glyph_info_t { // https://github.com/harfbuzz/harfbuzz/issues/463 0xE0020..=0xE007F => props |= UnicodeProps::HIDDEN.bits(), - // COMBINING GRAPHEME JOINER should not be skipped; at least some times. + // COMBINING GRAPHEME JOINER should not be skipped during GSUB either. // https://github.com/harfbuzz/harfbuzz/issues/554 0x034F => { props |= UnicodeProps::HIDDEN.bits(); @@ -1513,13 +1513,14 @@ bitflags::bitflags! { pub struct UnicodeProps: u16 { const GENERAL_CATEGORY = 0x001F; const IGNORABLE = 0x0020; - // MONGOLIAN FREE VARIATION SELECTOR 1..4, or TAG characters + // MONGOLIAN FREE VARIATION SELECTOR 1..4, or TAG characters, or CGJ sometimes const HIDDEN = 0x0040; const CONTINUATION = 0x0080; // If GEN_CAT=FORMAT, top byte masks: const CF_ZWJ = 0x0100; const CF_ZWNJ = 0x0200; + const CF_VS = 0x0400; } } diff --git a/src/hb/ot_layout.rs b/src/hb/ot_layout.rs index 11f10411..d7cdf30a 100644 --- a/src/hb/ot_layout.rs +++ b/src/hb/ot_layout.rs @@ -330,7 +330,7 @@ fn apply_backward(ctx: &mut OT::hb_ot_apply_context_t, lookup: &impl Apply) -> b /* Design: * unicode_props() is a two-byte number. The low byte includes: - * - Extended General_Category: 5 bits. + * - Modified General_Category: 5 bits. * - A bit each for: * * Is it Default_Ignorable(); we have a modified Default_Ignorable(). * * Whether it's one of the four Mongolian Free Variation Selectors, @@ -343,6 +343,11 @@ fn apply_backward(ctx: &mut OT::hb_ot_apply_context_t, lookup: &impl Apply) -> b * - For Cf: whether it's ZWJ, ZWNJ, or something else. * - For Ws: index of which space character this is, if space fallback * is needed, ie. we don't set this by default, only if asked to. + * + * Above I said "modified" General_Category. This is because we need to + * remember Variation Selectors, and we don't have bits left. So we + * change their Gen_Cat from Mn to Cf, and use a bit of the high byte to + * remember them. */ // enum hb_unicode_props_flags_t { @@ -412,11 +417,6 @@ pub fn _hb_glyph_info_set_general_category( ) { /* Clears top-byte. */ let gen_cat = gen_cat.to_rb(); - _hb_glyph_info_set_general_category_from_u32(info, gen_cat); -} - -#[inline] -pub fn _hb_glyph_info_set_general_category_from_u32(info: &mut hb_glyph_info_t, gen_cat: u32) { let n = (gen_cat as u16) | (info.unicode_props() & (0xFF & !UnicodeProps::GENERAL_CATEGORY.bits())); info.set_unicode_props(n); @@ -492,6 +492,24 @@ pub(crate) fn _hb_glyph_info_get_unicode_space_fallback_type( } } +#[inline] +pub(crate) fn _hb_glyph_info_is_variation_selector(info: &hb_glyph_info_t) -> bool { + let a = _hb_glyph_info_get_general_category(info) == hb_unicode_general_category_t::Format; + let b = (info.unicode_props() & UnicodeProps::CF_VS.bits()) != 0; + a && b +} + +#[inline] +pub(crate) fn _hb_glyph_info_set_variation_selector(info: &mut hb_glyph_info_t, customize: bool) { + if customize { + _hb_glyph_info_set_general_category(info, hb_unicode_general_category_t::Format); + info.set_unicode_props(info.unicode_props() | UnicodeProps::CF_VS.bits()) + } else { + // Reset to their original condition + _hb_glyph_info_set_general_category(info, hb_unicode_general_category_t::NonspacingMark); + } +} + #[inline] pub(crate) fn _hb_glyph_info_is_default_ignorable(info: &hb_glyph_info_t) -> bool { let n = info.unicode_props() & UnicodeProps::IGNORABLE.bits(); diff --git a/src/hb/ot_shape.rs b/src/hb/ot_shape.rs index 4911e572..5b3131b9 100644 --- a/src/hb/ot_shape.rs +++ b/src/hb/ot_shape.rs @@ -11,9 +11,9 @@ use crate::hb::aat_layout::hb_aat_layout_remove_deleted_glyphs; use crate::hb::algs::{rb_flag, rb_flag_unsafe}; use crate::hb::buffer::glyph_flag::{SAFE_TO_INSERT_TATWEEL, UNSAFE_TO_BREAK, UNSAFE_TO_CONCAT}; use crate::hb::unicode::hb_gc::{ - HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR, RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, - RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR, - RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, + RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, + RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR, RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, + RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, }; use crate::BufferFlags; use crate::{Direction, Feature, Language, Script}; @@ -873,18 +873,13 @@ fn deal_with_variation_selectors(buffer: &mut hb_buffer_t) { let pos = &mut buffer.pos; for i in 0..count { - if _hb_glyph_info_get_general_category(&info[i]).to_rb() - == HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR - { + if _hb_glyph_info_is_variation_selector(&info[i]) { info[i].glyph_id = nf; pos[i].x_advance = 0; pos[i].y_advance = 0; pos[i].x_offset = 0; pos[i].y_offset = 0; - _hb_glyph_info_set_general_category( - &mut info[i], - hb_unicode_general_category_t::NonspacingMark, - ) + _hb_glyph_info_set_variation_selector(&mut info[i], false); } } } diff --git a/src/hb/ot_shape_normalize.rs b/src/hb/ot_shape_normalize.rs index 6af4fe5e..8167680d 100644 --- a/src/hb/ot_shape_normalize.rs +++ b/src/hb/ot_shape_normalize.rs @@ -5,7 +5,6 @@ use super::ot_layout::*; use super::ot_shape_plan::hb_ot_shape_plan_t; use super::ot_shaper::{ComposeFn, DecomposeFn, MAX_COMBINING_MARKS}; use super::unicode::{hb_unicode_funcs_t, CharExt}; -use crate::hb::unicode::hb_gc::HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR; pub struct hb_ot_shape_normalize_context_t<'a> { pub plan: &'a hb_ot_shape_plan_t, @@ -229,10 +228,7 @@ fn handle_variation_selector_cluster( buffer.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_VARIATION_SELECTOR_FALLBACK; - _hb_glyph_info_set_general_category_from_u32( - buffer.cur_mut(0), - HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR, - ); + _hb_glyph_info_set_variation_selector(buffer.cur_mut(0), true); if buffer.not_found_variation_selector.is_some() { _hb_glyph_info_clear_default_ignorable(buffer.cur_mut(0)) diff --git a/src/hb/unicode.rs b/src/hb/unicode.rs index f8a72554..660bbfd0 100644 --- a/src/hb/unicode.rs +++ b/src/hb/unicode.rs @@ -1,121 +1,8 @@ use core::convert::TryFrom; pub use unicode_ccc::CanonicalCombiningClass; -use unicode_properties::GeneralCategory; // TODO: prefer unic-ucd-normal::CanonicalCombiningClass - -// The reason this is duplicated from unicode_properties::GeneralCategory is -// that harfbuzz has a custom non-standard `VariationSelector` property which -// we need to add on our side, too. -#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)] -/// The most general classification of a character. -pub enum hb_unicode_general_category_t { - /// `Lu`, an uppercase letter - UppercaseLetter, - /// `Ll`, a lowercase letter - LowercaseLetter, - /// `Lt`, a digraphic character, with first part uppercase - TitlecaseLetter, - /// `Lm`, a modifier letter - ModifierLetter, - /// `Lo`, other letters, including syllables and ideographs - OtherLetter, - /// `Mn`, a nonspacing combining mark (zero advance width) - NonspacingMark, - /// `Mc`, a spacing combining mark (positive advance width) - SpacingMark, - /// `Me`, an enclosing combining mark - EnclosingMark, - /// `Nd`, a decimal digit - DecimalNumber, - /// `Nl`, a letterlike numeric character - LetterNumber, - /// `No`, a numeric character of other type - OtherNumber, - /// `Pc`, a connecting punctuation mark, like a tie - ConnectorPunctuation, - /// `Pd`, a dash or hyphen punctuation mark - DashPunctuation, - /// `Ps`, an opening punctuation mark (of a pair) - OpenPunctuation, - /// `Pe`, a closing punctuation mark (of a pair) - ClosePunctuation, - /// `Pi`, an initial quotation mark - InitialPunctuation, - /// `Pf`, a final quotation mark - FinalPunctuation, - /// `Po`, a punctuation mark of other type - OtherPunctuation, - /// `Sm`, a symbol of mathematical use - MathSymbol, - /// `Sc`, a currency sign - CurrencySymbol, - /// `Sk`, a non-letterlike modifier symbol - ModifierSymbol, - /// `So`, a symbol of other type - OtherSymbol, - /// `Zs`, a space character (of various non-zero widths) - SpaceSeparator, - /// `Zl`, U+2028 LINE SEPARATOR only - LineSeparator, - /// `Zp`, U+2029 PARAGRAPH SEPARATOR only - ParagraphSeparator, - /// `Cc`, a C0 or C1 control code - Control, - /// `Cf`, a format control character - Format, - /// `Cs`, a surrogate code point - Surrogate, - /// `Co`, a private-use character - PrivateUse, - /// `Cn`, a reserved unassigned code point or a noncharacter - Unassigned, - /// harfbuzz-private category. - VariationSelector, -} - -impl From for hb_unicode_general_category_t { - fn from(value: GeneralCategory) -> Self { - match value { - GeneralCategory::UppercaseLetter => hb_unicode_general_category_t::UppercaseLetter, - GeneralCategory::LowercaseLetter => hb_unicode_general_category_t::LowercaseLetter, - GeneralCategory::TitlecaseLetter => hb_unicode_general_category_t::TitlecaseLetter, - GeneralCategory::ModifierLetter => hb_unicode_general_category_t::ModifierLetter, - GeneralCategory::OtherLetter => hb_unicode_general_category_t::OtherLetter, - GeneralCategory::NonspacingMark => hb_unicode_general_category_t::NonspacingMark, - GeneralCategory::SpacingMark => hb_unicode_general_category_t::SpacingMark, - GeneralCategory::EnclosingMark => hb_unicode_general_category_t::EnclosingMark, - GeneralCategory::DecimalNumber => hb_unicode_general_category_t::DecimalNumber, - GeneralCategory::LetterNumber => hb_unicode_general_category_t::LetterNumber, - GeneralCategory::OtherNumber => hb_unicode_general_category_t::OtherNumber, - GeneralCategory::ConnectorPunctuation => { - hb_unicode_general_category_t::ConnectorPunctuation - } - GeneralCategory::DashPunctuation => hb_unicode_general_category_t::DashPunctuation, - GeneralCategory::OpenPunctuation => hb_unicode_general_category_t::OpenPunctuation, - GeneralCategory::ClosePunctuation => hb_unicode_general_category_t::ClosePunctuation, - GeneralCategory::InitialPunctuation => { - hb_unicode_general_category_t::InitialPunctuation - } - GeneralCategory::FinalPunctuation => hb_unicode_general_category_t::FinalPunctuation, - GeneralCategory::OtherPunctuation => hb_unicode_general_category_t::OtherPunctuation, - GeneralCategory::MathSymbol => hb_unicode_general_category_t::MathSymbol, - GeneralCategory::CurrencySymbol => hb_unicode_general_category_t::CurrencySymbol, - GeneralCategory::ModifierSymbol => hb_unicode_general_category_t::ModifierSymbol, - GeneralCategory::OtherSymbol => hb_unicode_general_category_t::OtherSymbol, - GeneralCategory::SpaceSeparator => hb_unicode_general_category_t::SpaceSeparator, - GeneralCategory::LineSeparator => hb_unicode_general_category_t::LineSeparator, - GeneralCategory::ParagraphSeparator => { - hb_unicode_general_category_t::ParagraphSeparator - } - GeneralCategory::Control => hb_unicode_general_category_t::Control, - GeneralCategory::Format => hb_unicode_general_category_t::Format, - GeneralCategory::Surrogate => hb_unicode_general_category_t::Surrogate, - GeneralCategory::PrivateUse => hb_unicode_general_category_t::PrivateUse, - GeneralCategory::Unassigned => hb_unicode_general_category_t::Unassigned, - } - } -} +pub use unicode_properties::GeneralCategory as hb_unicode_general_category_t; use crate::Script; @@ -370,8 +257,7 @@ impl GeneralCategoryExt for hb_unicode_general_category_t { hb_unicode_general_category_t::Surrogate => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE, hb_unicode_general_category_t::TitlecaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, hb_unicode_general_category_t::Unassigned => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, - hb_unicode_general_category_t::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, - hb_unicode_general_category_t::VariationSelector => hb_gc::HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR + hb_unicode_general_category_t::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER } } @@ -407,7 +293,6 @@ impl GeneralCategoryExt for hb_unicode_general_category_t { hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER => hb_unicode_general_category_t::TitlecaseLetter, hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED => hb_unicode_general_category_t::Unassigned, hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER => hb_unicode_general_category_t::UppercaseLetter, - hb_gc::HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR => hb_unicode_general_category_t::VariationSelector, _ => unreachable!() } } @@ -995,6 +880,4 @@ pub mod hb_gc { pub const RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR: u32 = 27; pub const RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR: u32 = 28; pub const RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR: u32 = 29; - // Hack. See: https://github.com/harfbuzz/harfbuzz/pull/4529#discussion_r1769638033 - pub const HB_UNICODE_GENERAL_CATEGORY_VARIATION_SELECTOR: u32 = 30; } From 5b7ad36ebb5b62660a78907decca1b63bf3b7591 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Mon, 30 Sep 2024 20:01:44 +0200 Subject: [PATCH 29/29] Bump unicode-properties --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- src/hb/unicode.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a85bbeab..35d70c0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -256,9 +256,9 @@ checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-properties" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ea75f83c0137a9b98608359a5f1af8144876eb67bcb1ce837368e906a9f524" +checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" [[package]] name = "unicode-script" diff --git a/Cargo.toml b/Cargo.toml index 4d395c7c..d13bdbab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ core_maths = "0.1.0" # only for no_std builds smallvec = "1.6" unicode-bidi-mirroring = "0.4.0" unicode-ccc = "0.4.0" -unicode-properties = { version = "0.1.2", default-features = false, features = ["general-category"] } +unicode-properties = { version = "0.1.3", default-features = false, features = ["general-category"] } unicode-script = "0.5.2" wasmi = { version = "0.36.0", optional = true } log = "0.4.22" diff --git a/src/hb/unicode.rs b/src/hb/unicode.rs index 660bbfd0..346e775b 100644 --- a/src/hb/unicode.rs +++ b/src/hb/unicode.rs @@ -842,7 +842,7 @@ mod tests { fn check_unicode_version() { assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (16, 0, 0)); assert_eq!(unicode_ccc::UNICODE_VERSION, (16, 0, 0)); - assert_eq!(unicode_properties::UNICODE_VERSION, (15, 1, 0)); + assert_eq!(unicode_properties::UNICODE_VERSION, (16, 0, 0)); assert_eq!(unicode_script::UNICODE_VERSION, (16, 0, 0)); assert_eq!(crate::hb::unicode_norm::UNICODE_VERSION, (16, 0, 0)); }