Skip to content

Commit

Permalink
Implement OneByteIdentityH/V encoding cmap
Browse files Browse the repository at this point in the history
  • Loading branch information
eladkehat committed May 1, 2019
1 parent a1421ba commit a24b226
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 2 deletions.
13 changes: 13 additions & 0 deletions pdfminer/cmapdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,15 @@ def decode(self, code):
return ()


class IdentityCMapByte(IdentityCMap):
def decode(self, code):
n = len(code)
if n:
return struct.unpack('>%dB' % n, code)
else:
return ()


class UnicodeMap(CMapBase):
def __init__(self, **kwargs):
CMapBase.__init__(self, **kwargs)
Expand Down Expand Up @@ -224,6 +233,10 @@ def get_cmap(klass, name):
return IdentityCMap(WMode=0)
elif name == 'Identity-V':
return IdentityCMap(WMode=1)
elif name == 'OneByteIdentityH':
return IdentityCMapByte(WMode=0)
elif name == 'OneByteIdentityV':
return IdentityCMapByte(WMode=1)
try:
return klass._cmap_cache[name]
except KeyError:
Expand Down
6 changes: 5 additions & 1 deletion pdfminer/pdffont.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,11 @@ def __init__(self, rsrcmgr, spec, strict=settings.STRICT):
self.cidcoding = '%s-%s' % (resolve1(self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1"),
resolve1(self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1"))
try:
name = literal_name(spec['Encoding'])
spec_encoding = spec['Encoding']
if hasattr(spec_encoding, 'name'):
name = literal_name(spec['Encoding'])
else:
name = literal_name(spec_encoding['CMapName'])
except KeyError:
if strict:
raise PDFFontError('Encoding is unspecified')
Expand Down
3 changes: 2 additions & 1 deletion samples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ NONFREE= \
nonfree/i1040nr \
nonfree/kampo \
nonfree/naacl06-shinyama \
nonfree/nlp2004slides
nonfree/nlp2004slides \
nonfree/one-byte-identity

TESTS=$(FREE) $(NONFREE)

Expand Down
8 changes: 8 additions & 0 deletions samples/nonfree/one-byte-identity.html.ref
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head><body>
<span style="position:absolute; border: gray 1px solid; left:0px; top:50px; width:595px; height:841px;"></span>
<div style="position:absolute; top:50px;"><a name="1">Page 1</a></div>
<div style="position:absolute; border: textbox 1px solid; writing-mode:lr-tb; left:30px; top:161px; width:108px; height:76px;"><span style="font-family: VGOQEA+SegoeUI; font-size:76px">abc
<br></span></div><div style="position:absolute; top:0px;">Page: <a href="#1">1</a></div>
</body></html>
Binary file added samples/nonfree/one-byte-identity.pdf
Binary file not shown.
3 changes: 3 additions & 0 deletions samples/nonfree/one-byte-identity.txt.ref
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
abc


17 changes: 17 additions & 0 deletions samples/nonfree/one-byte-identity.xml.ref
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="utf-8" ?>
<pages>
<page id="1" bbox="0.000,0.000,595.280,841.890" rotate="0">
<textbox id="0" bbox="30.000,653.550,138.990,730.270">
<textline bbox="30.000,653.550,138.990,730.270">
<text font="VGOQEA+SegoeUI" bbox="30.000,653.550,65.630,730.270" colourspace="CalGray" ncolour="None" size="76.720">a</text>
<text font="VGOQEA+SegoeUI" bbox="65.560,653.550,106.720,730.270" colourspace="CalGray" ncolour="None" size="76.720">b</text>
<text font="VGOQEA+SegoeUI" bbox="106.650,653.550,138.990,730.270" colourspace="CalGray" ncolour="None" size="76.720">c</text>
<text>
</text>
</textline>
</textbox>
<layout>
<textbox id="0" bbox="30.000,653.550,138.990,730.270" />
</layout>
</page>
</pages>

0 comments on commit a24b226

Please sign in to comment.