From bad0531784bc13c255a3f1328a8323f83f58b893 Mon Sep 17 00:00:00 2001 From: Dmitry Atamanov Date: Sun, 6 Oct 2024 06:43:11 +0500 Subject: [PATCH] Update to Unicode 16.0 --- scripts/unicode/CaseFolding.txt | 35 +++- scripts/unicode/DerivedGeneralCategory.txt | 206 +++++++++++++++------ src/md4c.c | 91 ++++----- 3 files changed, 226 insertions(+), 106 deletions(-) diff --git a/scripts/unicode/CaseFolding.txt b/scripts/unicode/CaseFolding.txt index 69c5c64b..1b7a9c15 100644 --- a/scripts/unicode/CaseFolding.txt +++ b/scripts/unicode/CaseFolding.txt @@ -1,8 +1,8 @@ -# CaseFolding-15.1.0.txt -# Date: 2023-05-12, 21:53:10 GMT -# © 2023 Unicode®, Inc. +# CaseFolding-16.0.0.txt +# Date: 2024-04-30, 21:48:11 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -603,6 +603,7 @@ 1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN 1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT 1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK +1C89; C; 1C8A; # CYRILLIC CAPITAL LETTER TJE 1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN 1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN 1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN @@ -1240,9 +1241,13 @@ A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN +A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S +A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA +A7DC; C; 019B; # LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H AB70; C; 13A0; # CHEROKEE SMALL LETTER A AB71; C; 13A1; # CHEROKEE SMALL LETTER E @@ -1525,6 +1530,28 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS 10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN 10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US +10D50; C; 10D70; # GARAY CAPITAL LETTER A +10D51; C; 10D71; # GARAY CAPITAL LETTER CA +10D52; C; 10D72; # GARAY CAPITAL LETTER MA +10D53; C; 10D73; # GARAY CAPITAL LETTER KA +10D54; C; 10D74; # GARAY CAPITAL LETTER BA +10D55; C; 10D75; # GARAY CAPITAL LETTER JA +10D56; C; 10D76; # GARAY CAPITAL LETTER SA +10D57; C; 10D77; # GARAY CAPITAL LETTER WA +10D58; C; 10D78; # GARAY CAPITAL LETTER LA +10D59; C; 10D79; # GARAY CAPITAL LETTER GA +10D5A; C; 10D7A; # GARAY CAPITAL LETTER DA +10D5B; C; 10D7B; # GARAY CAPITAL LETTER XA +10D5C; C; 10D7C; # GARAY CAPITAL LETTER YA +10D5D; C; 10D7D; # GARAY CAPITAL LETTER TA +10D5E; C; 10D7E; # GARAY CAPITAL LETTER RA +10D5F; C; 10D7F; # GARAY CAPITAL LETTER NYA +10D60; C; 10D80; # GARAY CAPITAL LETTER FA +10D61; C; 10D81; # GARAY CAPITAL LETTER NA +10D62; C; 10D82; # GARAY CAPITAL LETTER PA +10D63; C; 10D83; # GARAY CAPITAL LETTER HA +10D64; C; 10D84; # GARAY CAPITAL LETTER OLD KA +10D65; C; 10D85; # GARAY CAPITAL LETTER OLD NA 118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA 118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A 118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI diff --git a/scripts/unicode/DerivedGeneralCategory.txt b/scripts/unicode/DerivedGeneralCategory.txt index 285ffa8f..07bf7bca 100644 --- a/scripts/unicode/DerivedGeneralCategory.txt +++ b/scripts/unicode/DerivedGeneralCategory.txt @@ -1,8 +1,8 @@ -# DerivedGeneralCategory-15.1.0.txt -# Date: 2023-07-28, 23:34:02 GMT -# © 2023 Unicode®, Inc. +# DerivedGeneralCategory-16.0.0.txt +# Date: 2024-04-30, 21:48:17 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -37,7 +37,7 @@ 085F ; Cn # 086B..086F ; Cn # [5] .. 088F ; Cn # -0892..0897 ; Cn # [6] .. +0892..0896 ; Cn # [5] .. 0984 ; Cn # 098D..098E ; Cn # [2] .. 0991..0992 ; Cn # [2] .. @@ -229,12 +229,11 @@ 1A9A..1A9F ; Cn # [6] .. 1AAE..1AAF ; Cn # [2] .. 1ACF..1AFF ; Cn # [49] .. -1B4D..1B4F ; Cn # [3] .. -1B7F ; Cn # +1B4D ; Cn # 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. 1C4A..1C4C ; Cn # [3] .. -1C89..1C8F ; Cn # [7] .. +1C8B..1C8F ; Cn # [5] .. 1CBB..1CBC ; Cn # [2] .. 1CC8..1CCF ; Cn # [8] .. 1CFB..1CFF ; Cn # [5] .. @@ -261,7 +260,7 @@ 20C1..20CF ; Cn # [15] .. 20F1..20FF ; Cn # [15] .. 218C..218F ; Cn # [4] .. -2427..243F ; Cn # [25] .. +242A..243F ; Cn # [22] .. 244B..245F ; Cn # [21] .. 2B74..2B75 ; Cn # [2] .. 2B96 ; Cn # @@ -289,16 +288,16 @@ 3100..3104 ; Cn # [5] .. 3130 ; Cn # 318F ; Cn # -31E4..31EE ; Cn # [11] .. +31E6..31EE ; Cn # [9] .. 321F ; Cn # A48D..A48F ; Cn # [3] .. A4C7..A4CF ; Cn # [9] .. A62C..A63F ; Cn # [20] .. A6F8..A6FF ; Cn # [8] .. -A7CB..A7CF ; Cn # [5] .. +A7CE..A7CF ; Cn # [2] .. A7D2 ; Cn # A7D4 ; Cn # -A7DA..A7F1 ; Cn # [24] .. +A7DD..A7F1 ; Cn # [21] .. A82D..A82F ; Cn # [3] .. A83A..A83F ; Cn # [6] .. A878..A87F ; Cn # [8] .. @@ -388,7 +387,8 @@ FFFE..FFFF ; Cn # [2] .. 105A2 ; Cn # 105B2 ; Cn # 105BA ; Cn # -105BD..105FF ; Cn # [67] .. +105BD..105BF ; Cn # [3] .. +105F4..105FF ; Cn # [12] .. 10737..1073F ; Cn # [9] .. 10756..1075F ; Cn # [10] .. 10768..1077F ; Cn # [24] .. @@ -431,11 +431,15 @@ FFFE..FFFF ; Cn # [2] .. 10CB3..10CBF ; Cn # [13] .. 10CF3..10CF9 ; Cn # [7] .. 10D28..10D2F ; Cn # [8] .. -10D3A..10E5F ; Cn # [294] .. +10D3A..10D3F ; Cn # [6] .. +10D66..10D68 ; Cn # [3] .. +10D86..10D8D ; Cn # [8] .. +10D90..10E5F ; Cn # [208] .. 10E7F ; Cn # 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. -10EB2..10EFC ; Cn # [75] .. +10EB2..10EC1 ; Cn # [16] .. +10EC5..10EFB ; Cn # [55] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -475,7 +479,18 @@ FFFE..FFFF ; Cn # [2] .. 11358..1135C ; Cn # [5] .. 11364..11365 ; Cn # [2] .. 1136D..1136F ; Cn # [3] .. -11375..113FF ; Cn # [139] .. +11375..1137F ; Cn # [11] .. +1138A ; Cn # +1138C..1138D ; Cn # [2] .. +1138F ; Cn # +113B6 ; Cn # +113C1 ; Cn # +113C3..113C4 ; Cn # [2] .. +113C6 ; Cn # +113CB ; Cn # +113D6 ; Cn # +113D9..113E0 ; Cn # [8] .. +113E3..113FF ; Cn # [29] .. 1145C ; Cn # 11462..1147F ; Cn # [30] .. 114C8..114CF ; Cn # [8] .. @@ -486,7 +501,8 @@ FFFE..FFFF ; Cn # [2] .. 1165A..1165F ; Cn # [6] .. 1166D..1167F ; Cn # [19] .. 116BA..116BF ; Cn # [6] .. -116CA..116FF ; Cn # [54] .. +116CA..116CF ; Cn # [6] .. +116E4..116FF ; Cn # [28] .. 1171B..1171C ; Cn # [2] .. 1172C..1172F ; Cn # [4] .. 11747..117FF ; Cn # [185] .. @@ -506,7 +522,9 @@ FFFE..FFFF ; Cn # [2] .. 11A48..11A4F ; Cn # [8] .. 11AA3..11AAF ; Cn # [13] .. 11AF9..11AFF ; Cn # [7] .. -11B0A..11BFF ; Cn # [246] .. +11B0A..11BBF ; Cn # [182] .. +11BE2..11BEF ; Cn # [14] .. +11BFA..11BFF ; Cn # [6] .. 11C09 ; Cn # 11C37 ; Cn # 11C46..11C4F ; Cn # [10] .. @@ -530,7 +548,7 @@ FFFE..FFFF ; Cn # [2] .. 11EF9..11EFF ; Cn # [7] .. 11F11 ; Cn # 11F3B..11F3D ; Cn # [3] .. -11F5A..11FAF ; Cn # [86] .. +11F5B..11FAF ; Cn # [85] .. 11FB1..11FBF ; Cn # [15] .. 11FF2..11FFE ; Cn # [13] .. 1239A..123FF ; Cn # [102] .. @@ -538,8 +556,10 @@ FFFE..FFFF ; Cn # [2] .. 12475..1247F ; Cn # [11] .. 12544..12F8F ; Cn # [2636] .. 12FF3..12FFF ; Cn # [13] .. -13456..143FF ; Cn # [4010] .. -14647..167FF ; Cn # [8633] .. +13456..1345F ; Cn # [10] .. +143FB..143FF ; Cn # [5] .. +14647..160FF ; Cn # [6841] .. +1613A..167FF ; Cn # [1734] .. 16A39..16A3F ; Cn # [7] .. 16A5F ; Cn # 16A6A..16A6D ; Cn # [4] .. @@ -551,7 +571,8 @@ FFFE..FFFF ; Cn # [2] .. 16B5A ; Cn # 16B62 ; Cn # 16B78..16B7C ; Cn # [5] .. -16B90..16E3F ; Cn # [688] .. +16B90..16D3F ; Cn # [432] .. +16D7A..16E3F ; Cn # [198] .. 16E9B..16EFF ; Cn # [101] .. 16F4B..16F4E ; Cn # [4] .. 16F88..16F8E ; Cn # [7] .. @@ -559,7 +580,7 @@ FFFE..FFFF ; Cn # [2] .. 16FE5..16FEF ; Cn # [11] .. 16FF2..16FFF ; Cn # [14] .. 187F8..187FF ; Cn # [8] .. -18CD6..18CFF ; Cn # [42] .. +18CD6..18CFE ; Cn # [41] .. 18D09..1AFEF ; Cn # [8935] .. 1AFF4 ; Cn # 1AFFC ; Cn # @@ -574,7 +595,9 @@ FFFE..FFFF ; Cn # [2] .. 1BC7D..1BC7F ; Cn # [3] .. 1BC89..1BC8F ; Cn # [7] .. 1BC9A..1BC9B ; Cn # [2] .. -1BCA4..1CEFF ; Cn # [4700] .. +1BCA4..1CBFF ; Cn # [3932] .. +1CCFA..1CCFF ; Cn # [6] .. +1CEB4..1CEFF ; Cn # [76] .. 1CF2E..1CF2F ; Cn # [2] .. 1CF47..1CF4F ; Cn # [9] .. 1CFC4..1CFFF ; Cn # [60] .. @@ -625,7 +648,9 @@ FFFE..FFFF ; Cn # [2] .. 1E2AF..1E2BF ; Cn # [17] .. 1E2FA..1E2FE ; Cn # [5] .. 1E300..1E4CF ; Cn # [464] .. -1E4FA..1E7DF ; Cn # [742] .. +1E4FA..1E5CF ; Cn # [214] .. +1E5FB..1E5FE ; Cn # [4] .. +1E600..1E7DF ; Cn # [480] .. 1E7E7 ; Cn # 1E7EC ; Cn # 1E7EF ; Cn # @@ -695,18 +720,17 @@ FFFE..FFFF ; Cn # [2] .. 1F85A..1F85F ; Cn # [6] .. 1F888..1F88F ; Cn # [8] .. 1F8AE..1F8AF ; Cn # [2] .. -1F8B2..1F8FF ; Cn # [78] .. +1F8BC..1F8BF ; Cn # [4] .. +1F8C2..1F8FF ; Cn # [62] .. 1FA54..1FA5F ; Cn # [12] .. 1FA6E..1FA6F ; Cn # [2] .. 1FA7D..1FA7F ; Cn # [3] .. -1FA89..1FA8F ; Cn # [7] .. -1FABE ; Cn # -1FAC6..1FACD ; Cn # [8] .. -1FADC..1FADF ; Cn # [4] .. -1FAE9..1FAEF ; Cn # [7] .. +1FA8A..1FA8E ; Cn # [5] .. +1FAC7..1FACD ; Cn # [7] .. +1FADD..1FADE ; Cn # [2] .. +1FAEA..1FAEF ; Cn # [6] .. 1FAF9..1FAFF ; Cn # [7] .. 1FB93 ; Cn # -1FBCB..1FBEF ; Cn # [37] .. 1FBFA..1FFFF ; Cn # [1030] .. 2A6E0..2A6FF ; Cn # [32] .. 2B73A..2B73F ; Cn # [6] .. @@ -723,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824718 +# Total code points: 819533 # ================================================ @@ -1005,6 +1029,7 @@ FFFFE..FFFFF ; Cn # [2] .. 10C7 ; Lu # GEORGIAN CAPITAL LETTER YN 10CD ; Lu # GEORGIAN CAPITAL LETTER AEN 13A0..13F5 ; Lu # [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Lu # CYRILLIC CAPITAL LETTER TJE 1C90..1CBA ; Lu # [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Lu # [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1E00 ; Lu # LATIN CAPITAL LETTER A WITH RING BELOW @@ -1329,9 +1354,12 @@ A7C0 ; Lu # LATIN CAPITAL LETTER OLD POLISH O A7C2 ; Lu # LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Lu # [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Lu # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB..A7CC ; Lu # [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7D0 ; Lu # LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Lu # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Lu # LATIN CAPITAL LETTER SIGMOID S +A7DA ; Lu # LATIN CAPITAL LETTER LAMBDA +A7DC ; Lu # LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Lu # LATIN CAPITAL LETTER REVERSED HALF H FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Lu # [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW @@ -1341,6 +1369,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1058C..10592 ; Lu # [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE 10594..10595 ; Lu # [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Lu # [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Lu # [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Lu # [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Lu # [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1D400..1D419 ; Lu # [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -1376,7 +1405,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1831 +# Total code points: 1858 # ================================================ @@ -1656,6 +1685,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 10FD..10FF ; Ll # [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13F8..13FD ; Ll # [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Ll # [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Ll # CYRILLIC SMALL LETTER TJE 1D00..1D2B ; Ll # [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D6B..1D77 ; Ll # [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D79..1D9A ; Ll # [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK @@ -1986,11 +2016,13 @@ A7C1 ; Ll # LATIN SMALL LETTER OLD POLISH O A7C3 ; Ll # LATIN SMALL LETTER ANGLICANA W A7C8 ; Ll # LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Ll # LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7CD ; Ll # LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D1 ; Ll # LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Ll # LATIN SMALL LETTER DOUBLE THORN A7D5 ; Ll # LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Ll # LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Ll # LATIN SMALL LETTER SIGMOID S +A7DB ; Ll # LATIN SMALL LETTER LAMBDA A7F6 ; Ll # LATIN SMALL LETTER REVERSED HALF H A7FA ; Ll # LATIN LETTER SMALL CAPITAL TURNED M AB30..AB5A ; Ll # [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG @@ -2006,6 +2038,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 105B3..105B9 ; Ll # [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; Ll # [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE 10CC0..10CF2 ; Ll # [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Ll # [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Ll # [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Ll # [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1D41A..1D433 ; Ll # [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z @@ -2041,7 +2074,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2233 +# Total code points: 2258 # ================================================ @@ -2124,7 +2157,11 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 10780..10785 ; Lm # [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Lm # [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Lm # [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10D4E ; Lm # GARAY VOWEL LENGTH MARK +10D6F ; Lm # GARAY REDUPLICATION MARK 16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16D40..16D42 ; Lm # [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D6B..16D6C ; Lm # [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16F93..16F9F ; Lm # [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; Lm # [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Lm # OLD CHINESE ITERATION MARK @@ -2136,7 +2173,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 397 +# Total code points: 404 # ================================================ @@ -2451,6 +2488,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10450..1049D ; Lo # [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO 10500..10527 ; Lo # [40] ELBASAN LETTER A..ELBASAN LETTER KHE 10530..10563 ; Lo # [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +105C0..105F3 ; Lo # [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; Lo # [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; Lo # [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; Lo # [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -2482,8 +2520,11 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10B80..10B91 ; Lo # [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW 10C00..10C48 ; Lo # [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 10D00..10D23 ; Lo # [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; Lo # [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4F ; Lo # GARAY SUKUN 10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2522,6 +2563,13 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1133D ; Lo # GRANTHA SIGN AVAGRAHA 11350 ; Lo # GRANTHA OM 1135D..11361 ; Lo # [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; Lo # [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Lo # TULU-TIGALARI LETTER EE +1138E ; Lo # TULU-TIGALARI LETTER AI +11390..113B5 ; Lo # [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; Lo # TULU-TIGALARI SIGN AVAGRAHA +113D1 ; Lo # TULU-TIGALARI REPHA +113D3 ; Lo # TULU-TIGALARI SIGN PLUTA 11400..11434 ; Lo # [53] NEWA LETTER A..NEWA LETTER HA 11447..1144A ; Lo # [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 1145F..11461 ; Lo # [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA @@ -2555,6 +2603,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 11A5C..11A89 ; Lo # [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; Lo # SOYOMBO MARK PLUTA 11AB0..11AF8 ; Lo # [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; Lo # [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; Lo # [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; Lo # [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; Lo # BHAIKSUKI SIGN AVAGRAHA @@ -2577,7 +2626,9 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 12F90..12FF0 ; Lo # [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 13000..1342F ; Lo # [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; Lo # [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; Lo # [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 14400..14646 ; Lo # [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; Lo # [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA 16800..16A38 ; Lo # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; Lo # [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; Lo # [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -2585,11 +2636,12 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16B00..16B2F ; Lo # [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU 16B63..16B77 ; Lo # [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; Lo # [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D43..16D6A ; Lo # [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION 17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Lo # [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; Lo # [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO 1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO @@ -2606,6 +2658,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1E290..1E2AD ; Lo # [30] TOTO LETTER PA..TOTO LETTER A 1E2C0..1E2EB ; Lo # [44] WANCHO LETTER AA..WANCHO LETTER YIH 1E4D0..1E4EA ; Lo # [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E5D0..1E5ED ; Lo # [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; Lo # OL ONAL SIGN HODDOND 1E7E0..1E7E6 ; Lo # [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; Lo # [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; Lo # [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -2654,7 +2708,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132234 +# Total code points: 136477 # ================================================ @@ -2684,7 +2738,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 0825..0827 ; Mn # [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Mn # [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Mn # [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; Mn # [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; Mn # [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; Mn # [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Mn # [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 093A ; Mn # DEVANAGARI VOWEL SIGN OE @@ -2882,8 +2936,9 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 10A3F ; Mn # KHAROSHTHI VIRAMA 10AE5..10AE6 ; Mn # [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; Mn # [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; Mn # [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; Mn # [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Mn # BRAHMI SIGN ANUSVARA @@ -2914,6 +2969,11 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11340 ; Mn # GRANTHA VOWEL SIGN II 11366..1136C ; Mn # [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Mn # [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; Mn # [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; Mn # TULU-TIGALARI SIGN VIRAMA +113D0 ; Mn # TULU-TIGALARI CONJOINER +113D2 ; Mn # TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; Mn # [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; Mn # [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; Mn # [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; Mn # NEWA SIGN NUKTA @@ -2933,7 +2993,8 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 116AD ; Mn # TAKRI VOWEL SIGN AA 116B0..116B5 ; Mn # [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; Mn # TAKRI SIGN NUKTA -1171D..1171F ; Mn # [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Mn # AHOM CONSONANT SIGN MEDIAL LA +1171F ; Mn # AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; Mn # [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; Mn # [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; Mn # [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA @@ -2972,8 +3033,11 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 11F36..11F3A ; Mn # [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Mn # KAWI VOWEL SIGN EU 11F42 ; Mn # KAWI CONJOINER +11F5A ; Mn # KAWI SIGN NUKTA 13440 ; Mn # EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Mn # [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Mn # [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Mn # [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Mn # [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Mn # [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Mn # MIAO SIGN CONSONANT MODIFIER BAR @@ -3003,11 +3067,12 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E2AE ; Mn # TOTO SIGN RISING TONE 1E2EC..1E2EF ; Mn # [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; Mn # [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; Mn # [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR 1E8D0..1E8D6 ; Mn # [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1985 +# Total code points: 2020 # ================================================ @@ -3159,6 +3224,12 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 1134B..1134D ; Mc # [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA 11357 ; Mc # GRANTHA AU LENGTH MARK 11362..11363 ; Mc # [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; Mc # [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113C2 ; Mc # TULU-TIGALARI VOWEL SIGN EE +113C5 ; Mc # TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Mc # [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Mc # [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; Mc # TULU-TIGALARI SIGN LOOPED VIRAMA 11435..11437 ; Mc # [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11440..11441 ; Mc # [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU 11445 ; Mc # NEWA SIGN VISARGA @@ -3175,6 +3246,7 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 116AC ; Mc # TAKRI SIGN VISARGA 116AE..116AF ; Mc # [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II 116B6 ; Mc # TAKRI SIGN VIRAMA +1171E ; Mc # AHOM CONSONANT SIGN MEDIAL RA 11720..11721 ; Mc # [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11726 ; Mc # AHOM VOWEL SIGN E 1182C..1182E ; Mc # [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II @@ -3203,12 +3275,13 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 11F34..11F35 ; Mc # [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA 11F3E..11F3F ; Mc # [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F41 ; Mc # KAWI SIGN KILLER +1612A..1612C ; Mc # [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 16F51..16F87 ; Mc # [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16FF0..16FF1 ; Mc # [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 452 +# Total code points: 468 # ================================================ @@ -3253,6 +3326,7 @@ ABF0..ABF9 ; Nd # [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Nd # [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10D30..10D39 ; Nd # [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; Nd # [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 11066..1106F ; Nd # [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE 110F0..110F9 ; Nd # [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE 11136..1113F ; Nd # [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE @@ -3262,24 +3336,30 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 114D0..114D9 ; Nd # [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Nd # [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Nd # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; Nd # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11730..11739 ; Nd # [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; Nd # [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE 11950..11959 ; Nd # [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +11BF0..11BF9 ; Nd # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C50..11C59 ; Nd # [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE 11F50..11F59 ; Nd # [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16130..16139 ; Nd # [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Nd # [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Nd # [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Nd # [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16D70..16D79 ; Nd # [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +1CCF0..1CCF9 ; Nd # [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Nd # [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Nd # [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE 1E2F0..1E2F9 ; Nd # [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE 1E4F0..1E4F9 ; Nd # [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5F1..1E5FA ; Nd # [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 680 +# Total code points: 760 # ================================================ @@ -3486,9 +3566,10 @@ FE31..FE32 ; Pd # [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FE58 ; Pd # SMALL EM DASH FE63 ; Pd # SMALL HYPHEN-MINUS FF0D ; Pd # FULLWIDTH HYPHEN-MINUS +10D6E ; Pd # GARAY HYPHEN 10EAD ; Pd # YEZIDI HYPHENATION MARK -# Total code points: 26 +# Total code points: 27 # ================================================ @@ -3735,8 +3816,9 @@ FF3F ; Pc # FULLWIDTH LOW LINE 1A1E..1A1F ; Po # [2] BUGINESE PALLAWA..BUGINESE END OF SECTION 1AA0..1AA6 ; Po # [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA 1AA8..1AAD ; Po # [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1B4E..1B4F ; Po # [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN 1B5A..1B60 ; Po # [7] BALINESE PANTI..BALINESE PAMENENG -1B7D..1B7E ; Po # [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B7D..1B7F ; Po # [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK 1BFC..1BFF ; Po # [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT 1C3B..1C3F ; Po # [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK 1C7E..1C7F ; Po # [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD @@ -3831,6 +3913,8 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 111DD..111DF ; Po # [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 11238..1123D ; Po # [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN 112A9 ; Po # MULTANI SECTION MARK +113D4..113D5 ; Po # [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; Po # [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 1144B..1144F ; Po # [5] NEWA DANDA..NEWA ABBREVIATION SIGN 1145A..1145B ; Po # [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK 1145D ; Po # NEWA INSERTION SIGN @@ -3847,6 +3931,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 11A9A..11A9C ; Po # [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD 11A9E..11AA2 ; Po # [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11B00..11B09 ; Po # [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BE1 ; Po # SUNUWAR SIGN PVO 11C41..11C45 ; Po # [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 11C70..11C71 ; Po # [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD 11EF7..11EF8 ; Po # [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION @@ -3858,13 +3943,15 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 16AF5 ; Po # BASSA VAH FULL STOP 16B37..16B3B ; Po # [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM 16B44 ; Po # PAHAWH HMONG SIGN XAUS +16D6D..16D6F ; Po # [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16E97..16E9A ; Po # [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH 16FE2 ; Po # OLD CHINESE HOOK MARK 1BC9F ; Po # DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8B ; Po # [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1E5FF ; Po # OL ONAL ABBREVIATION SIGN 1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# Total code points: 628 +# Total code points: 640 # ================================================ @@ -3923,6 +4010,7 @@ FF5C ; Sm # FULLWIDTH VERTICAL LINE FF5E ; Sm # FULLWIDTH TILDE FFE2 ; Sm # FULLWIDTH NOT SIGN FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +10D8E..10D8F ; Sm # [2] GARAY PLUS SIGN..GARAY MINUS SIGN 1D6C1 ; Sm # MATHEMATICAL BOLD NABLA 1D6DB ; Sm # MATHEMATICAL BOLD PARTIAL DIFFERENTIAL 1D6FB ; Sm # MATHEMATICAL ITALIC NABLA @@ -3935,7 +4023,7 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 1D7C3 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1EEF0..1EEF1 ; Sm # [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 948 +# Total code points: 950 # ================================================ @@ -4073,7 +4161,7 @@ FFE3 ; Sk # FULLWIDTH MACRON 232B..237B ; So # [81] ERASE TO THE LEFT..NOT CHECK MARK 237D..239A ; So # [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL 23B4..23DB ; So # [40] TOP SQUARE BRACKET..FUSE -23E2..2426 ; So # [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO +23E2..2429 ; So # [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM 2440..244A ; So # [11] OCR HOOK..OCR DOUBLE BACKSLASH 249C..24E9 ; So # [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z 2500..25B6 ; So # [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE @@ -4101,7 +4189,7 @@ FFE3 ; Sk # FULLWIDTH MACRON 303E..303F ; So # [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE 3190..3191 ; So # [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK 3196..319F ; So # [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK -31C0..31E3 ; So # [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; So # [38] CJK STROKE T..CJK STROKE SZP 31EF ; So # IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3200..321E ; So # [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 322A..3247 ; So # [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO @@ -4136,6 +4224,8 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16B3C..16B3F ; So # [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB 16B45 ; So # PAHAWH HMONG SIGN CIM TSOV ROG 1BC9C ; So # DUPLOYAN SIGN O WITH CROSS +1CC00..1CCEF ; So # [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z +1CD00..1CEB3 ; So # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CF50..1CFC3 ; So # [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; So # [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -4180,20 +4270,20 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F850..1F859 ; So # [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; So # [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; So # [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1 ; So # [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B0..1F8BB ; So # [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR +1F8C0..1F8C1 ; So # [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1FA53 ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; So # [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; So # [9] YO-YO..FLUTE -1FA90..1FABD ; So # [46] RINGED PLANET..WING -1FABF..1FAC5 ; So # [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB ; So # [14] MOOSE..PEA POD -1FAE0..1FAE8 ; So # [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; So # [10] YO-YO..HARP +1FA8F..1FAC6 ; So # [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; So # [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; So # [11] SPLATTER..FACE WITH BAGS UNDER EYES 1FAF0..1FAF8 ; So # [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK -1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON +1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 6639 +# Total code points: 7376 # ================================================ diff --git a/src/md4c.c b/src/md4c.c index e3f5cf9d..1cd51b24 100644 --- a/src/md4c.c +++ b/src/md4c.c @@ -621,17 +621,17 @@ struct MD_UNICODE_FOLD_INFO_tag { R(0x109e,0x109f), S(0x10fb), R(0x1360,0x1368), R(0x1390,0x1399), S(0x1400), R(0x166d,0x166e), R(0x169b,0x169c), R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17db), R(0x1800,0x180a), S(0x1940), R(0x1944,0x1945), R(0x19de,0x19ff), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6), - R(0x1aa8,0x1aad), R(0x1b5a,0x1b6a), R(0x1b74,0x1b7e), R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f), - R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), S(0x1fbd), R(0x1fbf,0x1fc1), R(0x1fcd,0x1fcf), - R(0x1fdd,0x1fdf), R(0x1fed,0x1fef), R(0x1ffd,0x1ffe), R(0x2010,0x2027), R(0x2030,0x205e), - R(0x207a,0x207e), R(0x208a,0x208e), R(0x20a0,0x20c0), R(0x2100,0x2101), R(0x2103,0x2106), - R(0x2108,0x2109), S(0x2114), R(0x2116,0x2118), R(0x211e,0x2123), S(0x2125), S(0x2127), S(0x2129), - S(0x212e), R(0x213a,0x213b), R(0x2140,0x2144), R(0x214a,0x214d), S(0x214f), R(0x218a,0x218b), - R(0x2190,0x2426), R(0x2440,0x244a), R(0x249c,0x24e9), R(0x2500,0x2775), R(0x2794,0x2b73), + R(0x1aa8,0x1aad), R(0x1b4e,0x1b4f), R(0x1b5a,0x1b6a), R(0x1b74,0x1b7f), R(0x1bfc,0x1bff), + R(0x1c3b,0x1c3f), R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), S(0x1fbd), R(0x1fbf,0x1fc1), + R(0x1fcd,0x1fcf), R(0x1fdd,0x1fdf), R(0x1fed,0x1fef), R(0x1ffd,0x1ffe), R(0x2010,0x2027), + R(0x2030,0x205e), R(0x207a,0x207e), R(0x208a,0x208e), R(0x20a0,0x20c0), R(0x2100,0x2101), + R(0x2103,0x2106), R(0x2108,0x2109), S(0x2114), R(0x2116,0x2118), R(0x211e,0x2123), S(0x2125), S(0x2127), + S(0x2129), S(0x212e), R(0x213a,0x213b), R(0x2140,0x2144), R(0x214a,0x214d), S(0x214f), R(0x218a,0x218b), + R(0x2190,0x2429), R(0x2440,0x244a), R(0x249c,0x24e9), R(0x2500,0x2775), R(0x2794,0x2b73), R(0x2b76,0x2b95), R(0x2b97,0x2bff), R(0x2ce5,0x2cea), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70), R(0x2e00,0x2e2e), R(0x2e30,0x2e5d), R(0x2e80,0x2e99), R(0x2e9b,0x2ef3), R(0x2f00,0x2fd5), R(0x2ff0,0x2fff), R(0x3001,0x3004), R(0x3008,0x3020), S(0x3030), R(0x3036,0x3037), R(0x303d,0x303f), - R(0x309b,0x309c), S(0x30a0), S(0x30fb), R(0x3190,0x3191), R(0x3196,0x319f), R(0x31c0,0x31e3), S(0x31ef), + R(0x309b,0x309c), S(0x30a0), S(0x30fb), R(0x3190,0x3191), R(0x3196,0x319f), R(0x31c0,0x31e5), S(0x31ef), R(0x3200,0x321e), R(0x322a,0x3247), S(0x3250), R(0x3260,0x327f), R(0x328a,0x32b0), R(0x32c0,0x33ff), R(0x4dc0,0x4dff), R(0xa490,0xa4c6), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e), R(0xa6f2,0xa6f7), R(0xa700,0xa716), R(0xa720,0xa721), R(0xa789,0xa78a), R(0xa828,0xa82b), @@ -643,28 +643,30 @@ struct MD_UNICODE_FOLD_INFO_tag { R(0xffe8,0xffee), R(0xfffc,0xfffd), R(0x10100,0x10102), R(0x10137,0x1013f), R(0x10179,0x10189), R(0x1018c,0x1018e), R(0x10190,0x1019c), S(0x101a0), R(0x101d0,0x101fc), S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), R(0x10877,0x10878), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f), - S(0x10ac8), R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59), - R(0x10f86,0x10f89), R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), - R(0x11174,0x11175), R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), - S(0x112a9), R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), - R(0x11641,0x11643), R(0x11660,0x1166c), S(0x116b9), R(0x1173c,0x1173f), S(0x1183b), R(0x11944,0x11946), - S(0x119e2), R(0x11a3f,0x11a46), R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11b00,0x11b09), + S(0x10ac8), R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10d6e), R(0x10d8e,0x10d8f), + S(0x10ead), R(0x10f55,0x10f59), R(0x10f86,0x10f89), R(0x11047,0x1104d), R(0x110bb,0x110bc), + R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175), R(0x111c5,0x111c8), S(0x111cd), S(0x111db), + R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9), R(0x113d4,0x113d5), R(0x113d7,0x113d8), + R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643), + R(0x11660,0x1166c), S(0x116b9), R(0x1173c,0x1173f), S(0x1183b), R(0x11944,0x11946), S(0x119e2), + R(0x11a3f,0x11a46), R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11b00,0x11b09), S(0x11be1), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8), R(0x11f43,0x11f4f), R(0x11fd5,0x11ff1), S(0x11fff), R(0x12470,0x12474), R(0x12ff1,0x12ff2), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3f), - R(0x16b44,0x16b45), R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9c), S(0x1bc9f), R(0x1cf50,0x1cfc3), - R(0x1d000,0x1d0f5), R(0x1d100,0x1d126), R(0x1d129,0x1d164), R(0x1d16a,0x1d16c), R(0x1d183,0x1d184), - R(0x1d18c,0x1d1a9), R(0x1d1ae,0x1d1ea), R(0x1d200,0x1d241), S(0x1d245), R(0x1d300,0x1d356), S(0x1d6c1), - S(0x1d6db), S(0x1d6fb), S(0x1d715), S(0x1d735), S(0x1d74f), S(0x1d76f), S(0x1d789), S(0x1d7a9), - S(0x1d7c3), R(0x1d800,0x1d9ff), R(0x1da37,0x1da3a), R(0x1da6d,0x1da74), R(0x1da76,0x1da83), - R(0x1da85,0x1da8b), S(0x1e14f), S(0x1e2ff), R(0x1e95e,0x1e95f), S(0x1ecac), S(0x1ecb0), S(0x1ed2e), - R(0x1eef0,0x1eef1), R(0x1f000,0x1f02b), R(0x1f030,0x1f093), R(0x1f0a0,0x1f0ae), R(0x1f0b1,0x1f0bf), - R(0x1f0c1,0x1f0cf), R(0x1f0d1,0x1f0f5), R(0x1f10d,0x1f1ad), R(0x1f1e6,0x1f202), R(0x1f210,0x1f23b), - R(0x1f240,0x1f248), R(0x1f250,0x1f251), R(0x1f260,0x1f265), R(0x1f300,0x1f6d7), R(0x1f6dc,0x1f6ec), - R(0x1f6f0,0x1f6fc), R(0x1f700,0x1f776), R(0x1f77b,0x1f7d9), R(0x1f7e0,0x1f7eb), S(0x1f7f0), - R(0x1f800,0x1f80b), R(0x1f810,0x1f847), R(0x1f850,0x1f859), R(0x1f860,0x1f887), R(0x1f890,0x1f8ad), - R(0x1f8b0,0x1f8b1), R(0x1f900,0x1fa53), R(0x1fa60,0x1fa6d), R(0x1fa70,0x1fa7c), R(0x1fa80,0x1fa88), - R(0x1fa90,0x1fabd), R(0x1fabf,0x1fac5), R(0x1face,0x1fadb), R(0x1fae0,0x1fae8), R(0x1faf0,0x1faf8), - R(0x1fb00,0x1fb92), R(0x1fb94,0x1fbca) + R(0x16b44,0x16b45), R(0x16d6d,0x16d6f), R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9c), S(0x1bc9f), + R(0x1cc00,0x1ccef), R(0x1cd00,0x1ceb3), R(0x1cf50,0x1cfc3), R(0x1d000,0x1d0f5), R(0x1d100,0x1d126), + R(0x1d129,0x1d164), R(0x1d16a,0x1d16c), R(0x1d183,0x1d184), R(0x1d18c,0x1d1a9), R(0x1d1ae,0x1d1ea), + R(0x1d200,0x1d241), S(0x1d245), R(0x1d300,0x1d356), S(0x1d6c1), S(0x1d6db), S(0x1d6fb), S(0x1d715), + S(0x1d735), S(0x1d74f), S(0x1d76f), S(0x1d789), S(0x1d7a9), S(0x1d7c3), R(0x1d800,0x1d9ff), + R(0x1da37,0x1da3a), R(0x1da6d,0x1da74), R(0x1da76,0x1da83), R(0x1da85,0x1da8b), S(0x1e14f), S(0x1e2ff), + S(0x1e5ff), R(0x1e95e,0x1e95f), S(0x1ecac), S(0x1ecb0), S(0x1ed2e), R(0x1eef0,0x1eef1), + R(0x1f000,0x1f02b), R(0x1f030,0x1f093), R(0x1f0a0,0x1f0ae), R(0x1f0b1,0x1f0bf), R(0x1f0c1,0x1f0cf), + R(0x1f0d1,0x1f0f5), R(0x1f10d,0x1f1ad), R(0x1f1e6,0x1f202), R(0x1f210,0x1f23b), R(0x1f240,0x1f248), + R(0x1f250,0x1f251), R(0x1f260,0x1f265), R(0x1f300,0x1f6d7), R(0x1f6dc,0x1f6ec), R(0x1f6f0,0x1f6fc), + R(0x1f700,0x1f776), R(0x1f77b,0x1f7d9), R(0x1f7e0,0x1f7eb), S(0x1f7f0), R(0x1f800,0x1f80b), + R(0x1f810,0x1f847), R(0x1f850,0x1f859), R(0x1f860,0x1f887), R(0x1f890,0x1f8ad), R(0x1f8b0,0x1f8bb), + R(0x1f8c0,0x1f8c1), R(0x1f900,0x1fa53), R(0x1fa60,0x1fa6d), R(0x1fa70,0x1fa7c), R(0x1fa80,0x1fa89), + R(0x1fa8f,0x1fac6), R(0x1face,0x1fadc), R(0x1fadf,0x1fae9), R(0x1faf0,0x1faf8), R(0x1fb00,0x1fb92), + R(0x1fb94,0x1fbef) }; #undef R #undef S @@ -699,7 +701,7 @@ struct MD_UNICODE_FOLD_INFO_tag { S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f), R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e), R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81), - S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba), + S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), S(0x1c89), R(0x1c90,0x1cba), R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d), R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f), R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8), @@ -710,10 +712,10 @@ struct MD_UNICODE_FOLD_INFO_tag { S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e), S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792), R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2), - S(0xa7b3), R(0xa7b4,0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7d0), S(0xa7d6), - S(0xa7d8), S(0xa7f5), R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), - R(0x10570,0x1057a), R(0x1057c,0x1058a), R(0x1058c,0x10592), S(0x10594), S(0x10595), R(0x10c80,0x10cb2), - R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921) + S(0xa7b3), R(0xa7b4,0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7cb), S(0xa7cc), + S(0xa7d0), R(0xa7d6,0xa7da), S(0xa7dc), S(0xa7f5), R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), + R(0x104b0,0x104d3), R(0x10570,0x1057a), R(0x1057c,0x1058a), R(0x1058c,0x10592), S(0x10594), S(0x10595), + R(0x10c80,0x10cb2), R(0x10d50,0x10d65), R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921) }; static const unsigned FOLD_MAP_1_DATA[] = { 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148, @@ -727,17 +729,18 @@ struct MD_UNICODE_FOLD_INFO_tag { 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a, - 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, - 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, - 0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0, - 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, - 0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5f, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, - 0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641, - 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c, - 0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, - 0xab53, 0xa7b5, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7d1, 0xa7d7, 0xa7d9, 0xa7f6, 0x13a0, - 0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10597, 0x105a1, 0x105a3, 0x105b1, 0x105b3, - 0x105b9, 0x105bb, 0x105bc, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922, 0x1e943 + 0x0463, 0xa64b, 0x1c8a, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, + 0x1f07, 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, + 0x1f60, 0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, + 0x1fe0, 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, + 0x2170, 0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5f, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, + 0x0251, 0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, + 0xa641, 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, + 0xa78c, 0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, + 0x029d, 0xab53, 0xa7b5, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0x0264, 0xa7cd, 0xa7d1, 0xa7d7, + 0xa7db, 0x019b, 0xa7f6, 0x13a0, 0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10597, + 0x105a1, 0x105a3, 0x105b1, 0x105b3, 0x105b9, 0x105bb, 0x105bc, 0x10cc0, 0x10cf2, 0x10d70, 0x10d85, + 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922, 0x1e943 }; static const unsigned FOLD_MAP_2[] = { S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),