Skip to content

Commit

Permalink
Expand the glyph name list and remove duplicates
Browse files Browse the repository at this point in the history
Fixes #88
  • Loading branch information
jrmuizel committed May 4, 2024
1 parent d91dd43 commit 653308d
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 32 deletions.
43 changes: 33 additions & 10 deletions src/glyphlist-export.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,41 @@
f = open("glyphlist-extended.txt")
lines = f.readlines()
import re
glyphlist = []
for l in lines:
if l[0] == '#' or l[0] == '\n':
continue
name, code = re.split('[; ]+', l)[0:2]
glyphlist.append((name,int(code,16)))
glyphs_seen = {}
def read_glyphs(name):
f = open(name)
lines = f.readlines()
import re
for l in lines:
if l[0] == '#' or l[0] == '\n':
continue
split = re.split('[; ,]+', l)
name = split[0]
val = int(split[1], 16)
if val > 0xffff:
val = int(split[-1], 16)
if val == 0xf766 and name != "Fsmall":
continue
if name in glyphs_seen:
continue
glyphs_seen[name] = True
glyphlist.append((name,val))
read_glyphs("glyphlist-extended.txt")
read_glyphs("texglyphlist.txt")
read_glyphs("additional.txt")
# there are some conflicts between these files
# e.g. tildewide=0x02dc, vs tildewide=0x0303
# for now we just ignore the subsequent ones
glyphlist.append(('mapsto', 0x21A6))
glyphlist = list(set(glyphlist))
glyphlist.sort()
print "/* Autogenerated from https://github.com/michal-h21/htfgen/commits/master/glyphlist-extended.txt */"
print "/* Autogenerated from:"
print " https://github.com/michal-h21/htfgen/commits/master/glyphlist-extended.txt"
print " https://github.com/2ion/lcdf-typetools/blob/master/texglyphlist.txt"
print " https://github.com/apache/pdfbox/blob/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional.txt"
print " */"
print "pub fn name_to_unicode(name: &str) -> Option<u16> {"
print " let names = ["
print ",\n".join('(\"%s\", 0x%04x)' % (g[0], g[1]) for g in glyphlist)
print " ];"
print " let result = names.binary_search_by_key(&name, |&(name,code)| &name);"
print " let result = names.binary_search_by_key(&name, |&(name,_code)| &name);"
print " result.ok().map(|indx| names[indx].1)"
print "}"
55 changes: 33 additions & 22 deletions src/glyphnames.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
/* Autogenerated from https://github.com/michal-h21/htfgen/commits/master/glyphlist-extended.txt
and https://github.com/2ion/lcdf-typetools/blob/master/texglyphlist.txt */
/* Autogenerated from:
https://github.com/michal-h21/htfgen/commits/master/glyphlist-extended.txt
https://github.com/2ion/lcdf-typetools/blob/master/texglyphlist.txt
https://github.com/apache/pdfbox/blob/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/glyphlist/additional.txt
*/
pub fn name_to_unicode(name: &str) -> Option<u16> {
let names = [
("A", 0x0041),
Expand Down Expand Up @@ -228,10 +231,8 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("Gdot", 0x0120),
("Gdotaccent", 0x0120),
("Gecyrillic", 0x0413),
("Germandbls", 0x0053),
("Germandbls", 0x1e9e),
("Germandblssmall", 0xd803),
("Germandblssmall", 0xf773),
("Ghadarmenian", 0x0542),
("Ghemiddlehookcyrillic", 0x0494),
("Ghestrokecyrillic", 0x0492),
Expand Down Expand Up @@ -561,7 +562,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("SF540000", 0x256a),
("SS", 0x0053),
("SSsmall", 0xd803),
("SSsmall", 0xf773),
("Sacute", 0x015a),
("Sacutedotaccent", 0x1e64),
("Sampigreek", 0x03e0),
Expand Down Expand Up @@ -1108,7 +1108,15 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("anbopomofo", 0x3122),
("angbopomofo", 0x3124),
("angbracketleft", 0x27e8),
("angbracketleftBig", 0x2329),
("angbracketleftBigg", 0x2329),
("angbracketleftbig", 0x2329),
("angbracketleftbigg", 0x2329),
("angbracketright", 0x27e9),
("angbracketrightBig", 0x232a),
("angbracketrightBigg", 0x232a),
("angbracketrightbig", 0x232a),
("angbracketrightbigg", 0x232a),
("angkhankhuthai", 0x0e5a),
("angle", 0x2220),
("anglebracketleft", 0x3008),
Expand Down Expand Up @@ -1175,6 +1183,8 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("arrowheadleftmod", 0x02c2),
("arrowheadrightmod", 0x02c3),
("arrowheadupmod", 0x02c4),
("arrowhookleft", 0x21aa),
("arrowhookright", 0x21a9),
("arrowhorizex", 0xf8e7),
("arrowleft", 0x2190),
("arrowleftbothalf", 0x21bd),
Expand Down Expand Up @@ -1211,7 +1221,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("arrowupleftofdown", 0x21c5),
("arrowupright", 0x2197),
("arrowupwhite", 0x21e7),
("arrowvertex", 0x2195),
("arrowvertex", 0xf8e6),
("ascendercompwordmark", 0xd80a),
("asciicircum", 0x005e),
Expand Down Expand Up @@ -1339,15 +1348,17 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("bokatakana", 0x30dc),
("bparen", 0x249d),
("bqsquare", 0x33c3),
("braceex", 0x007c),
("braceex", 0xf8f4),
("bracehtipdownleft", 0xfe37),
("bracehtipdownright", 0xfe37),
("bracehtipupleft", 0xfe38),
("bracehtipupright", 0xfe38),
("braceleft", 0x007b),
("braceleftBig", 0x007b),
("braceleftBigg", 0x007b),
("braceleftbig", 0x007b),
("braceleftbigg", 0x007b),
("braceleftbt", 0xf8f3),
("braceleftmid", 0x007c),
("braceleftmid", 0xf8f2),
("braceleftmonospace", 0xff5b),
("braceleftsmall", 0xfe5b),
Expand All @@ -1359,7 +1370,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("bracerightbig", 0x007d),
("bracerightbigg", 0x007d),
("bracerightbt", 0xf8fe),
("bracerightmid", 0x2016),
("bracerightmid", 0xf8fd),
("bracerightmonospace", 0xff5d),
("bracerightsmall", 0xfe5c),
Expand All @@ -1372,6 +1382,7 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("bracketleftbigg", 0x005b),
("bracketleftbt", 0xf8f0),
("bracketleftex", 0xf8ef),
("bracketleftmath", 0x005b),
("bracketleftmonospace", 0xff3b),
("bracketlefttp", 0xf8ee),
("bracketright", 0x005d),
Expand All @@ -1381,6 +1392,7 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("bracketrightbigg", 0x005d),
("bracketrightbt", 0xf8fb),
("bracketrightex", 0xf8fa),
("bracketrightmath", 0x005d),
("bracketrightmonospace", 0xff3d),
("bracketrighttp", 0xf8f9),
("breve", 0x02d8),
Expand Down Expand Up @@ -1571,6 +1583,7 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("controlHT", 0x0009),
("controlLF", 0x000a),
("controlNAK", 0x0015),
("controlNULL", 0x0000),
("controlRS", 0x001e),
("controlSI", 0x000f),
("controlSO", 0x000e),
Expand Down Expand Up @@ -1727,7 +1740,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("dhook", 0x0257),
("dialytikatonos", 0x0385),
("dialytikatonoscmb", 0x0344),
("diamond", 0x2662),
("diamond", 0x2666),
("diamondmath", 0x22c4),
("diamondsolid", 0x2666),
Expand Down Expand Up @@ -1771,7 +1783,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("dotbelowcomb", 0x0323),
("dotkatakana", 0x30fb),
("dotlessi", 0x0131),
("dotlessj", 0x0237),
("dotlessj", 0xf6be),
("dotlessjstrokehook", 0x0284),
("dotmath", 0x22c5),
Expand Down Expand Up @@ -1908,6 +1919,7 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("equal", 0x003d),
("equaldotleftright", 0x2252),
("equaldotrightleft", 0x2253),
("equalmath", 0x003d),
("equalmonospace", 0xff1d),
("equalorfollows", 0x22df),
("equalorgreater", 0x2a96),
Expand Down Expand Up @@ -2059,7 +2071,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("followornoteqvlnt", 0x22e9),
("follows", 0x227b),
("followsequal", 0x227d),
("followsequal", 0x2ab0),
("followsorcurly", 0x227d),
("followsorequal", 0x227f),
("fongmanthai", 0x0e4f),
Expand Down Expand Up @@ -2279,6 +2290,7 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("hatwide", 0x0302),
("hatwider", 0x0302),
("hatwiderr", 0x0302),
("hatwidest", 0x0302),
("hbar", 0x0127),
("hbopomofo", 0x310f),
("hbrevebelow", 0x1e2b),
Expand All @@ -2289,7 +2301,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("hdotaccent", 0x1e23),
("hdotbelow", 0x1e25),
("he", 0x05d4),
("heart", 0x2661),
("heart", 0x2665),
("heartsuitblack", 0x2665),
("heartsuitwhite", 0x2661),
Expand Down Expand Up @@ -2496,7 +2507,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("integraltp", 0x2320),
("intercal", 0x22ba),
("interrobang", 0x203d),
("interrobangdown", 0x2e18),
("interrobangdown", 0xd80b),
("intersection", 0x2229),
("intersectiondbl", 0x22d2),
Expand Down Expand Up @@ -2839,6 +2849,7 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("male", 0x2642),
("maltesecross", 0x2720),
("mansyonsquare", 0x3347),
("mapsto", 0x21a6),
("maqafhebrew", 0x05be),
("mars", 0x2642),
("masoracirclehebrew", 0x05af),
Expand Down Expand Up @@ -3281,9 +3292,9 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("parenleftbig", 0x0028),
("parenleftbigg", 0x0028),
("parenleftbt", 0xf8ed),
("parenleftex", 0x007c),
("parenleftex", 0xf8ec),
("parenleftinferior", 0x208d),
("parenleftmath", 0x0028),
("parenleftmonospace", 0xff08),
("parenleftsmall", 0xfe59),
("parenleftsuperior", 0x207d),
Expand All @@ -3296,9 +3307,9 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("parenrightbig", 0x0029),
("parenrightbigg", 0x0029),
("parenrightbt", 0xf8f8),
("parenrightex", 0x007c),
("parenrightex", 0xf8f7),
("parenrightinferior", 0x208e),
("parenrightmath", 0x0029),
("parenrightmonospace", 0xff09),
("parenrightsmall", 0xfe5a),
("parenrightsuperior", 0x207e),
Expand Down Expand Up @@ -3360,8 +3371,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("phagujarati", 0x0aab),
("phagurmukhi", 0x0a2b),
("phi", 0x03c6),
("phi", 0x03d5),
("phi1", 0x03c6),
("phi1", 0x03d5),
("phieuphacirclekorean", 0x327a),
("phieuphaparenkorean", 0x321a),
Expand Down Expand Up @@ -3398,6 +3407,7 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("plus", 0x002b),
("plusbelowcmb", 0x031f),
("pluscircle", 0x2295),
("plusmath", 0x002b),
("plusminus", 0x00b1),
("plusmod", 0x02d6),
("plusmonospace", 0xff0b),
Expand All @@ -3420,7 +3430,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("precedeornoteqvlnt", 0x22e8),
("precedes", 0x227a),
("precedesequal", 0x227c),
("precedesequal", 0x2aaf),
("precedesorcurly", 0x227c),
("precedesorequal", 0x227e),
("prescription", 0x211e),
Expand Down Expand Up @@ -3544,6 +3553,8 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("radicalbigg", 0x221a),
("radicalbt", 0x221a),
("radicalex", 0xf8e5),
("radicaltp", 0x221a),
("radicalvertex", 0x221a),
("radoverssquare", 0x33ae),
("radoverssquaredsquare", 0x33af),
("radsquare", 0x33ad),
Expand Down Expand Up @@ -4170,6 +4181,7 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("tildewide", 0x0303),
("tildewider", 0x0303),
("tildewiderr", 0x0303),
("tildewidest", 0x02dc),
("timescircle", 0x2297),
("tipehahebrew", 0x0596),
("tipehalefthebrew", 0x0596),
Expand Down Expand Up @@ -4214,10 +4226,8 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("triangledownsld", 0x25bc),
("triangleinv", 0x25bd),
("triangleleft", 0x25b9),
("triangleleft", 0x25c1),
("triangleleftequal", 0x22b4),
("triangleleftsld", 0x25c0),
("triangleright", 0x25b7),
("triangleright", 0x25c3),
("trianglerightequal", 0x22b5),
("trianglerightsld", 0x25b6),
Expand Down Expand Up @@ -4263,7 +4273,6 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("twelveparen", 0x247f),
("twelveperiod", 0x2493),
("twelveroman", 0x217b),
("twelveudash", 0xd80c),
("twelveudash", 0xf6de),
("twentycircle", 0x2473),
("twentyhangzhou", 0x5344),
Expand Down Expand Up @@ -4422,6 +4431,8 @@ pub fn name_to_unicode(name: &str) -> Option<u16> {
("verticallinelowmod", 0x02cc),
("verticallinemod", 0x02c8),
("vewarmenian", 0x057e),
("vextenddouble", 0x2225),
("vextendsingle", 0x2223),
("vhook", 0x028b),
("vikatakana", 0x30f8),
("viramabengali", 0x09cd),
Expand Down

0 comments on commit 653308d

Please sign in to comment.