From 653308d29651ae3800259bc257caec3cd2446263 Mon Sep 17 00:00:00 2001 From: Jeff Muizelaar Date: Fri, 3 May 2024 21:36:06 -0400 Subject: [PATCH] Expand the glyph name list and remove duplicates Fixes #88 --- src/glyphlist-export.py | 43 ++++++++++++++++++++++++-------- src/glyphnames.rs | 55 ++++++++++++++++++++++++----------------- 2 files changed, 66 insertions(+), 32 deletions(-) diff --git a/src/glyphlist-export.py b/src/glyphlist-export.py index c055156..1064a90 100644 --- a/src/glyphlist-export.py +++ b/src/glyphlist-export.py @@ -1,18 +1,41 @@ -f = open("glyphlist-extended.txt") -lines = f.readlines() -import re glyphlist = [] -for l in lines: - if l[0] == '#' or l[0] == '\n': - continue - name, code = re.split('[; ]+', l)[0:2] - glyphlist.append((name,int(code,16))) +glyphs_seen = {} +def read_glyphs(name): + f = open(name) + lines = f.readlines() + import re + for l in lines: + if l[0] == '#' or l[0] == '\n': + continue + split = re.split('[; ,]+', l) + name = split[0] + val = int(split[1], 16) + if val > 0xffff: + val = int(split[-1], 16) + if val == 0xf766 and name != "Fsmall": + continue + if name in glyphs_seen: + continue + glyphs_seen[name] = True + glyphlist.append((name,val)) +read_glyphs("glyphlist-extended.txt") +read_glyphs("texglyphlist.txt") +read_glyphs("additional.txt") +# there are some conflicts between these files +# e.g. tildewide=0x02dc, vs tildewide=0x0303 +# for now we just ignore the subsequent ones +glyphlist.append(('mapsto', 0x21A6)) +glyphlist = list(set(glyphlist)) glyphlist.sort() -print "/* Autogenerated from https://github.com/michal-h21/htfgen/commits/master/glyphlist-extended.txt */" +print "/* Autogenerated from:" +print " https://github.com/michal-h21/htfgen/commits/master/glyphlist-extended.txt" +print " https://github.com/2ion/lcdf-typetools/blob/master/texglyphlist.txt" +print " https://github.com/apache/pdfbox/blob/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional.txt" +print " */" print "pub fn name_to_unicode(name: &str) -> Option {" print " let names = [" print ",\n".join('(\"%s\", 0x%04x)' % (g[0], g[1]) for g in glyphlist) print " ];" -print " let result = names.binary_search_by_key(&name, |&(name,code)| &name);" +print " let result = names.binary_search_by_key(&name, |&(name,_code)| &name);" print " result.ok().map(|indx| names[indx].1)" print "}" diff --git a/src/glyphnames.rs b/src/glyphnames.rs index dc0f6b2..39f8d62 100644 --- a/src/glyphnames.rs +++ b/src/glyphnames.rs @@ -1,5 +1,8 @@ -/* Autogenerated from https://github.com/michal-h21/htfgen/commits/master/glyphlist-extended.txt - and https://github.com/2ion/lcdf-typetools/blob/master/texglyphlist.txt */ +/* Autogenerated from: + https://github.com/michal-h21/htfgen/commits/master/glyphlist-extended.txt + https://github.com/2ion/lcdf-typetools/blob/master/texglyphlist.txt + https://github.com/apache/pdfbox/blob/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional.txt + */ pub fn name_to_unicode(name: &str) -> Option { let names = [ ("A", 0x0041), @@ -228,10 +231,8 @@ pub fn name_to_unicode(name: &str) -> Option { ("Gdot", 0x0120), ("Gdotaccent", 0x0120), ("Gecyrillic", 0x0413), -("Germandbls", 0x0053), ("Germandbls", 0x1e9e), ("Germandblssmall", 0xd803), -("Germandblssmall", 0xf773), ("Ghadarmenian", 0x0542), ("Ghemiddlehookcyrillic", 0x0494), ("Ghestrokecyrillic", 0x0492), @@ -561,7 +562,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("SF540000", 0x256a), ("SS", 0x0053), ("SSsmall", 0xd803), -("SSsmall", 0xf773), ("Sacute", 0x015a), ("Sacutedotaccent", 0x1e64), ("Sampigreek", 0x03e0), @@ -1108,7 +1108,15 @@ pub fn name_to_unicode(name: &str) -> Option { ("anbopomofo", 0x3122), ("angbopomofo", 0x3124), ("angbracketleft", 0x27e8), +("angbracketleftBig", 0x2329), +("angbracketleftBigg", 0x2329), +("angbracketleftbig", 0x2329), +("angbracketleftbigg", 0x2329), ("angbracketright", 0x27e9), +("angbracketrightBig", 0x232a), +("angbracketrightBigg", 0x232a), +("angbracketrightbig", 0x232a), +("angbracketrightbigg", 0x232a), ("angkhankhuthai", 0x0e5a), ("angle", 0x2220), ("anglebracketleft", 0x3008), @@ -1175,6 +1183,8 @@ pub fn name_to_unicode(name: &str) -> Option { ("arrowheadleftmod", 0x02c2), ("arrowheadrightmod", 0x02c3), ("arrowheadupmod", 0x02c4), +("arrowhookleft", 0x21aa), +("arrowhookright", 0x21a9), ("arrowhorizex", 0xf8e7), ("arrowleft", 0x2190), ("arrowleftbothalf", 0x21bd), @@ -1211,7 +1221,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("arrowupleftofdown", 0x21c5), ("arrowupright", 0x2197), ("arrowupwhite", 0x21e7), -("arrowvertex", 0x2195), ("arrowvertex", 0xf8e6), ("ascendercompwordmark", 0xd80a), ("asciicircum", 0x005e), @@ -1339,15 +1348,17 @@ pub fn name_to_unicode(name: &str) -> Option { ("bokatakana", 0x30dc), ("bparen", 0x249d), ("bqsquare", 0x33c3), -("braceex", 0x007c), ("braceex", 0xf8f4), +("bracehtipdownleft", 0xfe37), +("bracehtipdownright", 0xfe37), +("bracehtipupleft", 0xfe38), +("bracehtipupright", 0xfe38), ("braceleft", 0x007b), ("braceleftBig", 0x007b), ("braceleftBigg", 0x007b), ("braceleftbig", 0x007b), ("braceleftbigg", 0x007b), ("braceleftbt", 0xf8f3), -("braceleftmid", 0x007c), ("braceleftmid", 0xf8f2), ("braceleftmonospace", 0xff5b), ("braceleftsmall", 0xfe5b), @@ -1359,7 +1370,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("bracerightbig", 0x007d), ("bracerightbigg", 0x007d), ("bracerightbt", 0xf8fe), -("bracerightmid", 0x2016), ("bracerightmid", 0xf8fd), ("bracerightmonospace", 0xff5d), ("bracerightsmall", 0xfe5c), @@ -1372,6 +1382,7 @@ pub fn name_to_unicode(name: &str) -> Option { ("bracketleftbigg", 0x005b), ("bracketleftbt", 0xf8f0), ("bracketleftex", 0xf8ef), +("bracketleftmath", 0x005b), ("bracketleftmonospace", 0xff3b), ("bracketlefttp", 0xf8ee), ("bracketright", 0x005d), @@ -1381,6 +1392,7 @@ pub fn name_to_unicode(name: &str) -> Option { ("bracketrightbigg", 0x005d), ("bracketrightbt", 0xf8fb), ("bracketrightex", 0xf8fa), +("bracketrightmath", 0x005d), ("bracketrightmonospace", 0xff3d), ("bracketrighttp", 0xf8f9), ("breve", 0x02d8), @@ -1571,6 +1583,7 @@ pub fn name_to_unicode(name: &str) -> Option { ("controlHT", 0x0009), ("controlLF", 0x000a), ("controlNAK", 0x0015), +("controlNULL", 0x0000), ("controlRS", 0x001e), ("controlSI", 0x000f), ("controlSO", 0x000e), @@ -1727,7 +1740,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("dhook", 0x0257), ("dialytikatonos", 0x0385), ("dialytikatonoscmb", 0x0344), -("diamond", 0x2662), ("diamond", 0x2666), ("diamondmath", 0x22c4), ("diamondsolid", 0x2666), @@ -1771,7 +1783,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("dotbelowcomb", 0x0323), ("dotkatakana", 0x30fb), ("dotlessi", 0x0131), -("dotlessj", 0x0237), ("dotlessj", 0xf6be), ("dotlessjstrokehook", 0x0284), ("dotmath", 0x22c5), @@ -1908,6 +1919,7 @@ pub fn name_to_unicode(name: &str) -> Option { ("equal", 0x003d), ("equaldotleftright", 0x2252), ("equaldotrightleft", 0x2253), +("equalmath", 0x003d), ("equalmonospace", 0xff1d), ("equalorfollows", 0x22df), ("equalorgreater", 0x2a96), @@ -2059,7 +2071,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("followornoteqvlnt", 0x22e9), ("follows", 0x227b), ("followsequal", 0x227d), -("followsequal", 0x2ab0), ("followsorcurly", 0x227d), ("followsorequal", 0x227f), ("fongmanthai", 0x0e4f), @@ -2279,6 +2290,7 @@ pub fn name_to_unicode(name: &str) -> Option { ("hatwide", 0x0302), ("hatwider", 0x0302), ("hatwiderr", 0x0302), +("hatwidest", 0x0302), ("hbar", 0x0127), ("hbopomofo", 0x310f), ("hbrevebelow", 0x1e2b), @@ -2289,7 +2301,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("hdotaccent", 0x1e23), ("hdotbelow", 0x1e25), ("he", 0x05d4), -("heart", 0x2661), ("heart", 0x2665), ("heartsuitblack", 0x2665), ("heartsuitwhite", 0x2661), @@ -2496,7 +2507,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("integraltp", 0x2320), ("intercal", 0x22ba), ("interrobang", 0x203d), -("interrobangdown", 0x2e18), ("interrobangdown", 0xd80b), ("intersection", 0x2229), ("intersectiondbl", 0x22d2), @@ -2839,6 +2849,7 @@ pub fn name_to_unicode(name: &str) -> Option { ("male", 0x2642), ("maltesecross", 0x2720), ("mansyonsquare", 0x3347), +("mapsto", 0x21a6), ("maqafhebrew", 0x05be), ("mars", 0x2642), ("masoracirclehebrew", 0x05af), @@ -3281,9 +3292,9 @@ pub fn name_to_unicode(name: &str) -> Option { ("parenleftbig", 0x0028), ("parenleftbigg", 0x0028), ("parenleftbt", 0xf8ed), -("parenleftex", 0x007c), ("parenleftex", 0xf8ec), ("parenleftinferior", 0x208d), +("parenleftmath", 0x0028), ("parenleftmonospace", 0xff08), ("parenleftsmall", 0xfe59), ("parenleftsuperior", 0x207d), @@ -3296,9 +3307,9 @@ pub fn name_to_unicode(name: &str) -> Option { ("parenrightbig", 0x0029), ("parenrightbigg", 0x0029), ("parenrightbt", 0xf8f8), -("parenrightex", 0x007c), ("parenrightex", 0xf8f7), ("parenrightinferior", 0x208e), +("parenrightmath", 0x0029), ("parenrightmonospace", 0xff09), ("parenrightsmall", 0xfe5a), ("parenrightsuperior", 0x207e), @@ -3360,8 +3371,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("phagujarati", 0x0aab), ("phagurmukhi", 0x0a2b), ("phi", 0x03c6), -("phi", 0x03d5), -("phi1", 0x03c6), ("phi1", 0x03d5), ("phieuphacirclekorean", 0x327a), ("phieuphaparenkorean", 0x321a), @@ -3398,6 +3407,7 @@ pub fn name_to_unicode(name: &str) -> Option { ("plus", 0x002b), ("plusbelowcmb", 0x031f), ("pluscircle", 0x2295), +("plusmath", 0x002b), ("plusminus", 0x00b1), ("plusmod", 0x02d6), ("plusmonospace", 0xff0b), @@ -3420,7 +3430,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("precedeornoteqvlnt", 0x22e8), ("precedes", 0x227a), ("precedesequal", 0x227c), -("precedesequal", 0x2aaf), ("precedesorcurly", 0x227c), ("precedesorequal", 0x227e), ("prescription", 0x211e), @@ -3544,6 +3553,8 @@ pub fn name_to_unicode(name: &str) -> Option { ("radicalbigg", 0x221a), ("radicalbt", 0x221a), ("radicalex", 0xf8e5), +("radicaltp", 0x221a), +("radicalvertex", 0x221a), ("radoverssquare", 0x33ae), ("radoverssquaredsquare", 0x33af), ("radsquare", 0x33ad), @@ -4170,6 +4181,7 @@ pub fn name_to_unicode(name: &str) -> Option { ("tildewide", 0x0303), ("tildewider", 0x0303), ("tildewiderr", 0x0303), +("tildewidest", 0x02dc), ("timescircle", 0x2297), ("tipehahebrew", 0x0596), ("tipehalefthebrew", 0x0596), @@ -4214,10 +4226,8 @@ pub fn name_to_unicode(name: &str) -> Option { ("triangledownsld", 0x25bc), ("triangleinv", 0x25bd), ("triangleleft", 0x25b9), -("triangleleft", 0x25c1), ("triangleleftequal", 0x22b4), ("triangleleftsld", 0x25c0), -("triangleright", 0x25b7), ("triangleright", 0x25c3), ("trianglerightequal", 0x22b5), ("trianglerightsld", 0x25b6), @@ -4263,7 +4273,6 @@ pub fn name_to_unicode(name: &str) -> Option { ("twelveparen", 0x247f), ("twelveperiod", 0x2493), ("twelveroman", 0x217b), -("twelveudash", 0xd80c), ("twelveudash", 0xf6de), ("twentycircle", 0x2473), ("twentyhangzhou", 0x5344), @@ -4422,6 +4431,8 @@ pub fn name_to_unicode(name: &str) -> Option { ("verticallinelowmod", 0x02cc), ("verticallinemod", 0x02c8), ("vewarmenian", 0x057e), +("vextenddouble", 0x2225), +("vextendsingle", 0x2223), ("vhook", 0x028b), ("vikatakana", 0x30f8), ("viramabengali", 0x09cd),