From 4a08159a95a924ad01e23c1d6e27ba30486b8e3f Mon Sep 17 00:00:00 2001 From: pantuts Date: Thu, 12 Dec 2019 09:13:01 +0800 Subject: [PATCH] fix PSTypeError and encoding cmap PR: https://github.com/euske/pdfminer/pull/179 --- .gitignore | 1 + pdfminer/cmapdb.py | 15 +++++++++++++++ pdfminer/pdffont.py | 6 +++++- 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9f11b75 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea/ diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index fa72f0d..79f9217 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -247,6 +247,10 @@ def get_cmap(klass, name): return IdentityCMap(WMode=0) elif name == 'Identity-V': return IdentityCMap(WMode=1) + elif name == 'OneByteIdentityH': + return IdentityCMapByte(WMode=0) + elif name == 'OneByteIdentityV': + return IdentityCMapByte(WMode=1) try: return klass._cmap_cache[name] except KeyError: @@ -560,6 +564,17 @@ def dump_unicodemap(self, fp): fp.write(marshal.dumps(data)) return + +class IdentityCMapByte(IdentityCMap): + + def decode(self, code): + n = len(code) + if n: + return struct.unpack('>%dB' % n, code) + else: + return () + + # convert_cmap def convert_cmap(outdir, regname, enc2codec, paths): converter = CMapConverter(enc2codec) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 9653649..0ca2c2f 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -643,7 +643,11 @@ def __init__(self, rsrcmgr, spec): ordering = bytes_value(self.cidsysteminfo.get('Ordering', b'unknown')) self.cidcoding = (registry + b'-' + ordering).decode('ascii') try: - name = literal_name(spec['Encoding']) + spec_encoding = spec['Encoding'] + if hasattr(spec_encoding, 'name'): + name = literal_name(spec['Encoding']) + else: + name = literal_name(spec_encoding['CMapName']) except KeyError: if STRICT: raise PDFFontError('Encoding is unspecified')