diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..339582dd --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +*.class +*.pyc +*.pyo +.svn +_svn +.pythoscope +.ipynb_checkpoints +.settings +_update.bat +docs/_build +/Goulib.egg-info/ +/build/ +/dist/ +/pdfminer.six.egg-info/ +tests/*.xml +tests/*.txt +.idea/ +.tox/ diff --git a/.travis.yml b/.travis.yml index 4f3c2f0d..d4107e0d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,12 @@ language: python python: - "2.6" - "2.7" + - "3.4" + - "3.5" + - "3.6" install: - - pip install pycrypto + - pip install six + - pip install pycryptodome + - pip install chardet script: - - make test + nosetests --nologcapture diff --git a/MANIFEST.in b/MANIFEST.in index 910eee5d..26ba2aa9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,11 @@ include Makefile include LICENSE include *.txt +include *.md include *.py graft cmaprsrc graft docs graft pdfminer graft samples graft tools +global-exclude *.pyc diff --git a/Makefile b/Makefile index 0ffd84f7..640624cd 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ PACKAGE=pdfminer -PYTHON=python2 +PYTHON=python GIT=git RM=rm -f CP=cp -f @@ -55,12 +55,5 @@ $(CMAPDST)/to-unicode-Adobe-Korea1.pickle.gz: $(CMAPDST) $(CMAPDST) Adobe-Korea1 $(CMAPSRC)/cid2code_Adobe_Korea1.txt test: cmap - $(PYTHON) -m doctest \ - pdfminer/arcfour.py \ - pdfminer/lzw.py \ - pdfminer/ascii85.py \ - pdfminer/runlength.py \ - pdfminer/rijndael.py - $(PYTHON) -m pdfminer.ccitt - $(PYTHON) -m pdfminer.psparser + nosetests cd samples && $(MAKE) test diff --git a/README.md b/README.md index c74b1bd4..a323316a 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,21 @@ -PDFMiner -======== +PDFMiner.six +============ -[![Build Status](https://travis-ci.org/euske/pdfminer.svg?branch=master)](https://travis-ci.org/euske/pdfminer) +PDFMiner.six is a fork of PDFMiner using six for Python 2+3 compatibility + +[![Build Status](https://travis-ci.org/pdfminer/pdfminer.six.svg?branch=master)](https://travis-ci.org/pdfminer/pdfminer.six) [![PyPI version](https://img.shields.io/pypi/v/pdfminer.six.svg)](https://pypi.python.org/pypi/pdfminer.six/) PDFMiner is a tool for extracting information from PDF documents. -Unlike other PDF-related tools, it focuses entirely on getting +Unlike other PDF-related tools, it focuses entirely on getting and analyzing text data. PDFMiner allows one to obtain -the exact location of text in a page, as well as +the exact location of text in a page, as well as other information such as fonts or lines. It includes a PDF converter that can transform PDF files into other text formats (such as HTML). It has an extensible PDF parser that can be used for other purposes than text analysis. - * Webpage: https://euske.github.io/pdfminer/ - * Download (PyPI): https://pypi.python.org/pypi/pdfminer/ - * Demo WebApp: http://pdf2html.tabesugi.net:8080/ + * Webpage: https://github.com/pdfminer/ + * Download (PyPI): https://pypi.python.org/pypi/pdfminer.six/ Features @@ -34,42 +35,16 @@ Features How to Install -------------- - * Install Python 2.6 or newer. (**For Python 3 support have a look at [pdfminer.six](https://github.com/goulu/pdfminer)**). - * Download the source code. - * Unpack it. - * Run `setup.py`: + * Install Python 2.7 or newer. (Python 3.x is supported in pdfminer.six) + * Install - $ python setup.py install + $ pip install pdfminer.six - * Do the following test: + * Run the following test: $ pdf2txt.py samples/simple1.pdf -For CJK Languages ------------------ - -In order to process CJK languages, do the following before -running setup.py install: - - $ make cmap - python tools/conv_cmap.py pdfminer/cmap Adobe-CNS1 cmaprsrc/cid2code_Adobe_CNS1.txt - reading 'cmaprsrc/cid2code_Adobe_CNS1.txt'... - writing 'CNS1_H.py'... - ... - $ python setup.py install - -On Windows machines which don't have `make` command, -paste the following commands on a command line prompt: - - mkdir pdfminer\cmap - python tools\conv_cmap.py -c B5=cp950 -c UniCNS-UTF8=utf-8 pdfminer\cmap Adobe-CNS1 cmaprsrc\cid2code_Adobe_CNS1.txt - python tools\conv_cmap.py -c GBK-EUC=cp936 -c UniGB-UTF8=utf-8 pdfminer\cmap Adobe-GB1 cmaprsrc\cid2code_Adobe_GB1.txt - python tools\conv_cmap.py -c RKSJ=cp932 -c EUC=euc-jp -c UniJIS-UTF8=utf-8 pdfminer\cmap Adobe-Japan1 cmaprsrc\cid2code_Adobe_Japan1.txt - python tools\conv_cmap.py -c KSC-EUC=euc-kr -c KSC-Johab=johab -c KSCms-UHC=cp949 -c UniKS-UTF8=utf-8 pdfminer\cmap Adobe-Korea1 cmaprsrc\cid2code_Adobe_Korea1.txt - python setup.py install - - Command Line Tools ------------------ @@ -91,45 +66,18 @@ You cannot extract any text from a PDF document which does not have extraction p **dumppdf.py** -dumppdf.py dumps the internal contents of a PDF file in pseudo-XML format. +dumppdf.py dumps the internal contents of a PDF file in pseudo-XML format. This program is primarily for debugging purposes, but it's also possible to extract some meaningful contents (e.g. images). (For details, refer to the html document.) -API Changes ------------ - -As of November 2013, there were a few changes made to the PDFMiner API -prior to October 2013. This is the result of code restructuring. Here -is a list of the changes: - - * PDFDocument class is moved to pdfdocument.py. - * PDFDocument class now takes a PDFParser object as an argument. - PDFDocument.set_parser() and PDFParser.set_document() is removed. - * PDFPage class is moved to pdfpage.py - * process_pdf function is implemented as a class method PDFPage.get_pages. - - TODO ---- - * Replace STRICT variable with something better. - * Use logging module instead of sys.stderr. - * Proper test cases. * PEP-8 and PEP-257 conformance. * Better documentation. - * Crypt stream filter support. - - -Related Projects ----------------- - - * pyPdf - * xpdf - * pdfbox - * mupdf Terms and Conditions @@ -137,7 +85,7 @@ Terms and Conditions (This is so-called MIT/X License) -Copyright (c) 2004-2016 Yusuke Shinyama +Copyright (c) 2004-2014 Yusuke Shinyama Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation diff --git a/docs/index.html b/docs/index.html index 1e13cf8f..8037bf8f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -9,7 +9,7 @@
-Last Modified: Mon Sep 26 09:04:15 UTC 2016 +Last Modified: Wed Jun 25 10:27:52 UTC 2014
@@ -82,14 +82,14 @@

Features

Download

Source distribution:
- -http://pypi.python.org/pypi/pdfminer/ + +http://pypi.python.org/pypi/pdfminer_six/

github:
- -https://github.com/euske/pdfminer/ + +https://github.com/goulu/pdfminer/

Where to Ask

@@ -100,11 +100,9 @@

Where to Ask

http://groups.google.com/group/pdfminer-users/ -

How to Install

  1. Install Python 2.6 or newer. - (Python 3 is not supported.)
  2. Download the PDFMiner source.
  3. Unpack it.
  4. Run setup.py to install:
    @@ -268,7 +266,6 @@

    Options

    Specifies how much a horizontal and vertical position of a text matters when determining a text order. The value should be within the range of -1.0 (only horizontal position matters) to +1.0 (only vertical position matters). -When this value is out of the range (e.g. +2), a simpler ordering rule is used. The default value is 0.5.

    -C @@ -373,82 +370,10 @@

    Options

    Increases the debug level. -

    Changes

    +

    Changes:

      -
    • 2014/03/28: Further bugfixes. -
    • 2014/03/24: Bugfixes and improvements for fauly PDFs.
      -API changes: -
        -
      • PDFDocument.initialize() method is removed and no longer needed. - A password is given as an argument of a PDFDocument constructor. -
      -
    • 2013/11/13: Bugfixes and minor improvements.
      -As of November 2013, there were a few changes made to the PDFMiner API -prior to October 2013. This is the result of code restructuring. Here -is a list of the changes: -
        -
      • PDFDocument class is moved to pdfdocument.py. -
      • PDFDocument class now takes a PDFParser object as an argument. -
      • PDFDocument.set_parser() and PDFParser.set_document() is removed. -
      • PDFPage class is moved to pdfpage.py. -
      • process_pdf function is implemented as PDFPage.get_pages. -
      -
    • 2013/10/22: Sudden resurge of interests. API changes. -Incorporated a lot of patches and robust handling of broken PDFs. -
    • 2011/05/15: Speed improvements for layout analysis. -
    • 2011/05/15: API changes. LTText.get_text() is added. -
    • 2011/04/20: API changes. LTPolygon class was renamed as LTCurve. -
    • 2011/04/20: LTLine now represents horizontal/vertical lines only. Thanks to Koji Nakagawa. -
    • 2011/03/07: Documentation improvements by Jakub Wilk. Memory usage patch by Jonathan Hunt. -
    • 2011/02/27: Bugfixes and layout analysis improvements. Thanks to fujimoto.report. -
    • 2010/12/26: A couple of bugfixes and minor improvements. Thanks to Kevin Brubeck Unhammer and Daniel Gerber. -
    • 2010/10/17: A couple of bugfixes and minor improvements. Thanks to standardabweichung and Alastair Irving. -
    • 2010/09/07: A minor bugfix. Thanks to Alexander Garden. -
    • 2010/08/29: A couple of bugfixes. Thanks to Sahan Malagi, pk, and Humberto Pereira. -
    • 2010/07/06: Minor bugfixes. Thanks to Federico Brega. -
    • 2010/06/13: Bugfixes and improvements on CMap data compression. Thanks to Jakub Wilk. -
    • 2010/04/24: Bugfixes and improvements on TOC extraction. Thanks to Jose Maria. -
    • 2010/03/26: Bugfixes. Thanks to Brian Berry and Lubos Pintes. -
    • 2010/03/22: Improved layout analysis. Added regression tests. -
    • 2010/03/12: A couple of bugfixes. Thanks to Sean Manefield. -
    • 2010/02/27: Changed the way of internal layout handling. (LTTextItem -> LTChar) -
    • 2010/02/15: Several bugfixes. Thanks to Sean. -
    • 2010/02/13: Bugfix and enhancement. Thanks to André Auzi. -
    • 2010/02/07: Several bugfixes. Thanks to Hiroshi Manabe. -
    • 2010/01/31: JPEG image extraction supported. Page rotation bug fixed. -
    • 2010/01/04: Python 2.6 warning removal. More doctest conversion. -
    • 2010/01/01: CMap bug fix. Thanks to Winfried Plappert. -
    • 2009/12/24: RunLengthDecode filter added. Thanks to Troy Bollinger. -
    • 2009/12/20: Experimental polygon shape extraction added. Thanks to Yusuf Dewaswala for reporting. -
    • 2009/12/19: CMap resources are now the part of the package. Thanks to Adobe for open-sourcing them. -
    • 2009/11/29: Password encryption bug fixed. Thanks to Yannick Gingras. -
    • 2009/10/31: SGML output format is changed and renamed as XML. -
    • 2009/10/24: Charspace bug fixed. Adjusted for 4-space indentation. -
    • 2009/10/04: Another matrix operation bug fixed. Thanks to Vitaly Sedelnik. -
    • 2009/09/12: Fixed rectangle handling. Able to extract image boundaries. -
    • 2009/08/30: Fixed page rotation handling. -
    • 2009/08/26: Fixed zlib decoding bug. Thanks to Shon Urbas. -
    • 2009/08/24: Fixed a bug in character placing. Thanks to Pawan Jain. -
    • 2009/07/21: Improvement in layout analysis. -
    • 2009/07/11: Improvement in layout analysis. Thanks to Lubos Pintes. -
    • 2009/05/17: Bugfixes, massive code restructuring, and simple graphic element support added. setup.py is supported. -
    • 2009/03/30: Text output mode added. -
    • 2009/03/25: Encoding problems fixed. Word splitting option added. -
    • 2009/02/28: Robust handling of corrupted PDFs. Thanks to Troy Bollinger. -
    • 2009/02/01: Various bugfixes. Thanks to Hiroshi Manabe. -
    • 2009/01/17: Handling a trailer correctly that contains both /XrefStm and /Prev entries. -
    • 2009/01/10: Handling Type3 font metrics correctly. -
    • 2008/12/28: Better handling of word spacing. Thanks to Christian Nentwich. -
    • 2008/09/06: A sample pdf2html webapp added. -
    • 2008/08/30: ASCII85 encoding filter support. -
    • 2008/07/27: Tagged contents extraction support. -
    • 2008/07/10: Outline (TOC) extraction support. -
    • 2008/06/29: HTML output added. Reorganized the directory structure. -
    • 2008/04/29: Bugfix for Win32. Thanks to Chris Clark. -
    • 2008/04/27: Basic encryption and LZW decoding support added. -
    • 2008/01/07: Several bugfixes. Thanks to Nick Fabry for his vast contribution. -
    • 2007/12/31: Initial release. -
    • 2004/12/24: Start writing the code out of boredom... +
    • 2014/09/15: pushed on PyPi
    • +
    • 2014/09/10: pdfminer_six forked from pdfminer since Yusuke didn't want to merge and pdfminer3k is outdated

    TODO

    diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py index a7bc049f..82a41fdd 100644 --- a/pdfminer/__init__.py +++ b/pdfminer/__init__.py @@ -1,5 +1,16 @@ -#!/usr/bin/env python -__version__ = '20140328' +# -*- coding: utf-8 -*- +""" +Fork of PDFMiner using six for Python 2+3 compatibility + +PDFMiner is a tool for extracting information from PDF documents. +Unlike other PDF-related tools, it focuses entirely on getting and analyzing +text data. PDFMiner allows to obtain the exact location of texts in a page, +as well as other information such as fonts or lines. +It includes a PDF converter that can transform PDF files into other text +formats (such as HTML). It has an extensible PDF parser that can be used for +other purposes instead of text analysis. +""" +__version__ = '20170720' if __name__ == '__main__': - print (__version__) + print(__version__) diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py index 523d991a..5c0e64c9 100644 --- a/pdfminer/arcfour.py +++ b/pdfminer/arcfour.py @@ -1,31 +1,22 @@ -#!/usr/bin/env python -""" Python implementation of Arcfour encryption algorithm. +""" Python implementation of Arcfour encryption algorithm. +See https://en.wikipedia.org/wiki/RC4 This code is in the public domain. """ - +import six # Python 2+3 compatibility ## Arcfour ## class Arcfour(object): - """ - >>> Arcfour(b'Key').process(b'Plaintext').encode('hex') - 'bbf316e8d940af0ad3' - >>> Arcfour(b'Wiki').process(b'pedia').encode('hex') - '1021bf0420' - >>> Arcfour(b'Secret').process(b'Attack at dawn').encode('hex') - '45a01f645fc35b383552544b9bf5' - """ - def __init__(self, key): - s = range(256) + s = [i for i in range(256)] #because Py3 range is not indexable j = 0 klen = len(key) - for i in xrange(256): - j = (j + s[i] + ord(key[i % klen])) % 256 + for i in range(256): + j = (j + s[i] + six.indexbytes(key,i % klen)) % 256 (s[i], s[j]) = (s[j], s[i]) self.s = s (self.i, self.j) = (0, 0) @@ -35,20 +26,15 @@ def process(self, data): (i, j) = (self.i, self.j) s = self.s r = b'' - for c in data: + for c in six.iterbytes(data): i = (i+1) % 256 j = (j+s[i]) % 256 (s[i], s[j]) = (s[j], s[i]) k = s[(s[i]+s[j]) % 256] - r += chr(ord(c) ^ k) + r += six.int2byte(c ^ k) (self.i, self.j) = (i, j) return r encrypt = decrypt = process new = Arcfour - -# test -if __name__ == '__main__': - import doctest - doctest.testmod() diff --git a/pdfminer/ascii85.py b/pdfminer/ascii85.py index 067fccd5..a9f501da 100644 --- a/pdfminer/ascii85.py +++ b/pdfminer/ascii85.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + """ Python implementation of ASCII85/ASCIIHex decoder (Adobe version). @@ -9,6 +9,8 @@ import re import struct +import six #Python 2+3 compatibility + # ascii85decode(data) def ascii85decode(data): @@ -21,17 +23,11 @@ def ascii85decode(data): The Adobe's ASCII85 implementation is slightly different from its original in handling the last characters. - The sample string is taken from: - http://en.wikipedia.org/w/index.php?title=Ascii85 - - >>> ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q') - 'Man is distinguished' - >>> ascii85decode(b'E,9)oF*2M7/c~>') - 'pleasure.' """ n = b = 0 out = b'' - for c in data: + for i in six.iterbytes(data): + c=six.int2byte(i) if b'!' <= c and c <= b'u': n += 1 b = b*85+(ord(c)-33) @@ -39,7 +35,7 @@ def ascii85decode(data): out += struct.pack('>L', b) n = b = 0 elif c == b'z': - assert n == 0 + assert n == 0, str(n) out += b'\0\0\0\0' elif c == b'~': if n: @@ -50,8 +46,8 @@ def ascii85decode(data): return out # asciihexdecode(data) -hex_re = re.compile(r'([a-f\d]{2})', re.IGNORECASE) -trail_re = re.compile(r'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE) +hex_re = re.compile(b'([a-f\d]{2})', re.IGNORECASE) +trail_re = re.compile(b'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE) def asciihexdecode(data): @@ -63,22 +59,16 @@ def asciihexdecode(data): EOD. Any other characters will cause an error. If the filter encounters the EOD marker after reading an odd number of hexadecimal digits, it will behave as if a 0 followed the last digit. - - >>> asciihexdecode(b'61 62 2e6364 65') - 'ab.cde' - >>> asciihexdecode(b'61 62 2e6364 657>') - 'ab.cdep' - >>> asciihexdecode(b'7>') - 'p' """ - decode = (lambda hx: chr(int(hx, 16))) - out = map(decode, hex_re.findall(data)) - m = trail_re.search(data) - if m: - out.append(decode('%c0' % m.group(1))) - return b''.join(out) + def decode(x): + i=int(x,16) + return six.int2byte(i) + out=b'' + for x in hex_re.findall(data): + out+=decode(x) -if __name__ == '__main__': - import doctest - doctest.testmod() + m = trail_re.search(data) + if m: + out+=decode(m.group(1)+b'0') + return out diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py index d0cc0934..efc34823 100644 --- a/pdfminer/ccitt.py +++ b/pdfminer/ccitt.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + # CCITT Fax decoder # # Bugs: uncompressed mode untested. @@ -13,6 +13,17 @@ import sys import array +import six #Python 2+3 compatibility + +if six.PY3: + def get_bytes(data): + for byte in data: + yield byte +else: + def get_bytes(data): + for char in data: + yield ord(char) + ## BitParser ## @@ -26,7 +37,7 @@ def __init__(self): def add(klass, root, v, bits): p = root b = None - for i in xrange(len(bits)): + for i in range(len(bits)): if 0 < i: if p[b] is None: p[b] = [None, None] @@ -39,10 +50,9 @@ def add(klass, root, v, bits): return def feedbytes(self, data): - for c in data: - b = ord(c) + for byte in get_bytes(data): for m in (128, 64, 32, 16, 8, 4, 2, 1): - self._parse_bit(b & m) + self._parse_bit(byte & m) return def _parse_bit(self, x): @@ -327,11 +337,10 @@ def __init__(self, width, bytealign=False): return def feedbytes(self, data): - for c in data: - b = ord(c) + for byte in get_bytes(data): try: for m in (128, 64, 32, 16, 8, 4, 2, 1): - self._parse_bit(b & m) + self._parse_bit(byte & m) except self.ByteSkip: self._accept = self._parse_mode self._state = self.MODE @@ -462,10 +471,10 @@ def _do_vertical(self, dx): x0 = max(0, self._curpos) x1 = max(0, min(self.width, x1)) if x1 < x0: - for x in xrange(x1, x0): + for x in range(x1, x0): self._curline[x] = self._color elif x0 < x1: - for x in xrange(x0, x1): + for x in range(x0, x1): self._curline[x] = self._color self._curpos = x1 self._color = 1-self._color @@ -495,7 +504,7 @@ def _do_pass(self): self._refline[x1] == self._color): break x1 += 1 - for x in xrange(self._curpos, x1): + for x in range(self._curpos, x1): self._curline[x] = self._color self._curpos = x1 return @@ -505,12 +514,12 @@ def _do_horizontal(self, n1, n2): if self._curpos < 0: self._curpos = 0 x = self._curpos - for _ in xrange(n1): + for _ in range(n1): if len(self._curline) <= x: break self._curline[x] = self._color x += 1 - for _ in xrange(n2): + for _ in range(n2): if len(self._curline) <= x: break self._curline[x] = 1-self._color @@ -526,166 +535,9 @@ def _do_uncompressed(self, bits): self._flush_line() return -import unittest - - -## Test cases -## -class TestCCITTG4Parser(unittest.TestCase): - - def get_parser(self, bits): - parser = CCITTG4Parser(len(bits)) - parser._curline = [int(c) for c in bits] - parser._reset_line() - return parser - - def test_b1(self): - parser = self.get_parser('00000') - parser._do_vertical(0) - self.assertEqual(parser._curpos, 0) - return - - def test_b2(self): - parser = self.get_parser('10000') - parser._do_vertical(-1) - self.assertEqual(parser._curpos, 0) - return - def test_b3(self): - parser = self.get_parser('000111') - parser._do_pass() - self.assertEqual(parser._curpos, 3) - self.assertEqual(parser._get_bits(), '111') - return - def test_b4(self): - parser = self.get_parser('00000') - parser._do_vertical(+2) - self.assertEqual(parser._curpos, 2) - self.assertEqual(parser._get_bits(), '11') - return - - def test_b5(self): - parser = self.get_parser('11111111100') - parser._do_horizontal(0, 3) - self.assertEqual(parser._curpos, 3) - parser._do_vertical(1) - self.assertEqual(parser._curpos, 10) - self.assertEqual(parser._get_bits(), '0001111111') - return - - def test_e1(self): - parser = self.get_parser('10000') - parser._do_vertical(0) - self.assertEqual(parser._curpos, 1) - parser._do_vertical(0) - self.assertEqual(parser._curpos, 5) - self.assertEqual(parser._get_bits(), '10000') - return - - def test_e2(self): - parser = self.get_parser('10011') - parser._do_vertical(0) - self.assertEqual(parser._curpos, 1) - parser._do_vertical(2) - self.assertEqual(parser._curpos, 5) - self.assertEqual(parser._get_bits(), '10000') - return - - def test_e3(self): - parser = self.get_parser('011111') - parser._color = 0 - parser._do_vertical(0) - self.assertEqual(parser._color, 1) - self.assertEqual(parser._curpos, 1) - parser._do_vertical(-2) - self.assertEqual(parser._color, 0) - self.assertEqual(parser._curpos, 4) - parser._do_vertical(0) - self.assertEqual(parser._curpos, 6) - self.assertEqual(parser._get_bits(), '011100') - return - def test_e4(self): - parser = self.get_parser('10000') - parser._do_vertical(0) - self.assertEqual(parser._curpos, 1) - parser._do_vertical(-2) - self.assertEqual(parser._curpos, 3) - parser._do_vertical(0) - self.assertEqual(parser._curpos, 5) - self.assertEqual(parser._get_bits(), '10011') - return - - def test_e5(self): - parser = self.get_parser('011000') - parser._color = 0 - parser._do_vertical(0) - self.assertEqual(parser._curpos, 1) - parser._do_vertical(3) - self.assertEqual(parser._curpos, 6) - self.assertEqual(parser._get_bits(), '011111') - return - - def test_e6(self): - parser = self.get_parser('11001') - parser._do_pass() - self.assertEqual(parser._curpos, 4) - parser._do_vertical(0) - self.assertEqual(parser._curpos, 5) - self.assertEqual(parser._get_bits(), '11111') - return - - def test_e7(self): - parser = self.get_parser('0000000000') - parser._curpos = 2 - parser._color = 1 - parser._do_horizontal(2, 6) - self.assertEqual(parser._curpos, 10) - self.assertEqual(parser._get_bits(), '1111000000') - return - - def test_e8(self): - parser = self.get_parser('001100000') - parser._curpos = 1 - parser._color = 0 - parser._do_vertical(0) - self.assertEqual(parser._curpos, 2) - parser._do_horizontal(7, 0) - self.assertEqual(parser._curpos, 9) - self.assertEqual(parser._get_bits(), '101111111') - return - - def test_m1(self): - parser = self.get_parser('10101') - parser._do_pass() - self.assertEqual(parser._curpos, 2) - parser._do_pass() - self.assertEqual(parser._curpos, 4) - self.assertEqual(parser._get_bits(), '1111') - return - - def test_m2(self): - parser = self.get_parser('101011') - parser._do_vertical(-1) - parser._do_vertical(-1) - parser._do_vertical(1) - parser._do_horizontal(1, 1) - self.assertEqual(parser._get_bits(), '011101') - return - - def test_m3(self): - parser = self.get_parser('10111011') - parser._do_vertical(-1) - parser._do_pass() - parser._do_vertical(1) - parser._do_vertical(1) - self.assertEqual(parser._get_bits(), '00000001') - return - - -## CCITTFaxDecoder -## class CCITTFaxDecoder(CCITTG4Parser): def __init__(self, width, bytealign=False, reversed=False): diff --git a/pdfminer/cmap/78-EUC-H.pickle.gz b/pdfminer/cmap/78-EUC-H.pickle.gz new file mode 100644 index 00000000..c6f412cf Binary files /dev/null and b/pdfminer/cmap/78-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/78-EUC-V.pickle.gz b/pdfminer/cmap/78-EUC-V.pickle.gz new file mode 100644 index 00000000..06e39f08 Binary files /dev/null and b/pdfminer/cmap/78-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/78-H.pickle.gz b/pdfminer/cmap/78-H.pickle.gz new file mode 100644 index 00000000..807b1a34 Binary files /dev/null and b/pdfminer/cmap/78-H.pickle.gz differ diff --git a/pdfminer/cmap/78-RKSJ-H.pickle.gz b/pdfminer/cmap/78-RKSJ-H.pickle.gz new file mode 100644 index 00000000..66aee98b Binary files /dev/null and b/pdfminer/cmap/78-RKSJ-H.pickle.gz differ diff --git a/pdfminer/cmap/78-RKSJ-V.pickle.gz b/pdfminer/cmap/78-RKSJ-V.pickle.gz new file mode 100644 index 00000000..c5c73e5d Binary files /dev/null and b/pdfminer/cmap/78-RKSJ-V.pickle.gz differ diff --git a/pdfminer/cmap/78-V.pickle.gz b/pdfminer/cmap/78-V.pickle.gz new file mode 100644 index 00000000..37ae2cc5 Binary files /dev/null and b/pdfminer/cmap/78-V.pickle.gz differ diff --git a/pdfminer/cmap/78ms-RKSJ-H.pickle.gz b/pdfminer/cmap/78ms-RKSJ-H.pickle.gz new file mode 100644 index 00000000..73971fb7 Binary files /dev/null and b/pdfminer/cmap/78ms-RKSJ-H.pickle.gz differ diff --git a/pdfminer/cmap/78ms-RKSJ-V.pickle.gz b/pdfminer/cmap/78ms-RKSJ-V.pickle.gz new file mode 100644 index 00000000..cccb224c Binary files /dev/null and b/pdfminer/cmap/78ms-RKSJ-V.pickle.gz differ diff --git a/pdfminer/cmap/83pv-RKSJ-H.pickle.gz b/pdfminer/cmap/83pv-RKSJ-H.pickle.gz new file mode 100644 index 00000000..3ffa70e4 Binary files /dev/null and b/pdfminer/cmap/83pv-RKSJ-H.pickle.gz differ diff --git a/pdfminer/cmap/83pv-RKSJ-V.pickle.gz b/pdfminer/cmap/83pv-RKSJ-V.pickle.gz new file mode 100644 index 00000000..9d8a54dd Binary files /dev/null and b/pdfminer/cmap/83pv-RKSJ-V.pickle.gz differ diff --git a/pdfminer/cmap/90ms-RKSJ-H.pickle.gz b/pdfminer/cmap/90ms-RKSJ-H.pickle.gz new file mode 100644 index 00000000..c7e2b3fe Binary files /dev/null and b/pdfminer/cmap/90ms-RKSJ-H.pickle.gz differ diff --git a/pdfminer/cmap/90ms-RKSJ-V.pickle.gz b/pdfminer/cmap/90ms-RKSJ-V.pickle.gz new file mode 100644 index 00000000..0240b623 Binary files /dev/null and b/pdfminer/cmap/90ms-RKSJ-V.pickle.gz differ diff --git a/pdfminer/cmap/90msp-RKSJ-H.pickle.gz b/pdfminer/cmap/90msp-RKSJ-H.pickle.gz new file mode 100644 index 00000000..ebfd8164 Binary files /dev/null and b/pdfminer/cmap/90msp-RKSJ-H.pickle.gz differ diff --git a/pdfminer/cmap/90msp-RKSJ-V.pickle.gz b/pdfminer/cmap/90msp-RKSJ-V.pickle.gz new file mode 100644 index 00000000..157e97f9 Binary files /dev/null and b/pdfminer/cmap/90msp-RKSJ-V.pickle.gz differ diff --git a/pdfminer/cmap/90pv-RKSJ-H.pickle.gz b/pdfminer/cmap/90pv-RKSJ-H.pickle.gz new file mode 100644 index 00000000..be960c17 Binary files /dev/null and b/pdfminer/cmap/90pv-RKSJ-H.pickle.gz differ diff --git a/pdfminer/cmap/90pv-RKSJ-V.pickle.gz b/pdfminer/cmap/90pv-RKSJ-V.pickle.gz new file mode 100644 index 00000000..58843757 Binary files /dev/null and b/pdfminer/cmap/90pv-RKSJ-V.pickle.gz differ diff --git a/pdfminer/cmap/Add-H.pickle.gz b/pdfminer/cmap/Add-H.pickle.gz new file mode 100644 index 00000000..503fb8be Binary files /dev/null and b/pdfminer/cmap/Add-H.pickle.gz differ diff --git a/pdfminer/cmap/Add-RKSJ-H.pickle.gz b/pdfminer/cmap/Add-RKSJ-H.pickle.gz new file mode 100644 index 00000000..3275daff Binary files /dev/null and b/pdfminer/cmap/Add-RKSJ-H.pickle.gz differ diff --git a/pdfminer/cmap/Add-RKSJ-V.pickle.gz b/pdfminer/cmap/Add-RKSJ-V.pickle.gz new file mode 100644 index 00000000..e670c24e Binary files /dev/null and b/pdfminer/cmap/Add-RKSJ-V.pickle.gz differ diff --git a/pdfminer/cmap/Add-V.pickle.gz b/pdfminer/cmap/Add-V.pickle.gz new file mode 100644 index 00000000..a7c71c5a Binary files /dev/null and b/pdfminer/cmap/Add-V.pickle.gz differ diff --git a/pdfminer/cmap/B5-H.pickle.gz b/pdfminer/cmap/B5-H.pickle.gz new file mode 100644 index 00000000..8ff5f632 Binary files /dev/null and b/pdfminer/cmap/B5-H.pickle.gz differ diff --git a/pdfminer/cmap/B5-V.pickle.gz b/pdfminer/cmap/B5-V.pickle.gz new file mode 100644 index 00000000..6b2c863a Binary files /dev/null and b/pdfminer/cmap/B5-V.pickle.gz differ diff --git a/pdfminer/cmap/B5pc-H.pickle.gz b/pdfminer/cmap/B5pc-H.pickle.gz new file mode 100644 index 00000000..4353af9d Binary files /dev/null and b/pdfminer/cmap/B5pc-H.pickle.gz differ diff --git a/pdfminer/cmap/B5pc-V.pickle.gz b/pdfminer/cmap/B5pc-V.pickle.gz new file mode 100644 index 00000000..544f454e Binary files /dev/null and b/pdfminer/cmap/B5pc-V.pickle.gz differ diff --git a/pdfminer/cmap/CNS-EUC-H.pickle.gz b/pdfminer/cmap/CNS-EUC-H.pickle.gz new file mode 100644 index 00000000..19a7e6b6 Binary files /dev/null and b/pdfminer/cmap/CNS-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/CNS-EUC-V.pickle.gz b/pdfminer/cmap/CNS-EUC-V.pickle.gz new file mode 100644 index 00000000..7f50ee95 Binary files /dev/null and b/pdfminer/cmap/CNS-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/CNS1-H.pickle.gz b/pdfminer/cmap/CNS1-H.pickle.gz new file mode 100644 index 00000000..72d301a9 Binary files /dev/null and b/pdfminer/cmap/CNS1-H.pickle.gz differ diff --git a/pdfminer/cmap/CNS1-V.pickle.gz b/pdfminer/cmap/CNS1-V.pickle.gz new file mode 100644 index 00000000..7cdbe4a0 Binary files /dev/null and b/pdfminer/cmap/CNS1-V.pickle.gz differ diff --git a/pdfminer/cmap/CNS2-H.pickle.gz b/pdfminer/cmap/CNS2-H.pickle.gz new file mode 100644 index 00000000..e8ae6d3d Binary files /dev/null and b/pdfminer/cmap/CNS2-H.pickle.gz differ diff --git a/pdfminer/cmap/CNS2-V.pickle.gz b/pdfminer/cmap/CNS2-V.pickle.gz new file mode 100644 index 00000000..c9cc1d77 Binary files /dev/null and b/pdfminer/cmap/CNS2-V.pickle.gz differ diff --git a/pdfminer/cmap/ETHK-B5-H.pickle.gz b/pdfminer/cmap/ETHK-B5-H.pickle.gz new file mode 100644 index 00000000..6ac5a7c1 Binary files /dev/null and b/pdfminer/cmap/ETHK-B5-H.pickle.gz differ diff --git a/pdfminer/cmap/ETHK-B5-V.pickle.gz b/pdfminer/cmap/ETHK-B5-V.pickle.gz new file mode 100644 index 00000000..f9965c1f Binary files /dev/null and b/pdfminer/cmap/ETHK-B5-V.pickle.gz differ diff --git a/pdfminer/cmap/ETen-B5-H.pickle.gz b/pdfminer/cmap/ETen-B5-H.pickle.gz new file mode 100644 index 00000000..3a26c52b Binary files /dev/null and b/pdfminer/cmap/ETen-B5-H.pickle.gz differ diff --git a/pdfminer/cmap/ETen-B5-V.pickle.gz b/pdfminer/cmap/ETen-B5-V.pickle.gz new file mode 100644 index 00000000..3d645d22 Binary files /dev/null and b/pdfminer/cmap/ETen-B5-V.pickle.gz differ diff --git a/pdfminer/cmap/ETenms-B5-H.pickle.gz b/pdfminer/cmap/ETenms-B5-H.pickle.gz new file mode 100644 index 00000000..c2be623c Binary files /dev/null and b/pdfminer/cmap/ETenms-B5-H.pickle.gz differ diff --git a/pdfminer/cmap/ETenms-B5-V.pickle.gz b/pdfminer/cmap/ETenms-B5-V.pickle.gz new file mode 100644 index 00000000..860d1991 Binary files /dev/null and b/pdfminer/cmap/ETenms-B5-V.pickle.gz differ diff --git a/pdfminer/cmap/EUC-H.pickle.gz b/pdfminer/cmap/EUC-H.pickle.gz new file mode 100644 index 00000000..d62b96c5 Binary files /dev/null and b/pdfminer/cmap/EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/EUC-V.pickle.gz b/pdfminer/cmap/EUC-V.pickle.gz new file mode 100644 index 00000000..2a0ad994 Binary files /dev/null and b/pdfminer/cmap/EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/Ext-H.pickle.gz b/pdfminer/cmap/Ext-H.pickle.gz new file mode 100644 index 00000000..64a9d5d0 Binary files /dev/null and b/pdfminer/cmap/Ext-H.pickle.gz differ diff --git a/pdfminer/cmap/Ext-RKSJ-H.pickle.gz b/pdfminer/cmap/Ext-RKSJ-H.pickle.gz new file mode 100644 index 00000000..1946dc60 Binary files /dev/null and b/pdfminer/cmap/Ext-RKSJ-H.pickle.gz differ diff --git a/pdfminer/cmap/Ext-RKSJ-V.pickle.gz b/pdfminer/cmap/Ext-RKSJ-V.pickle.gz new file mode 100644 index 00000000..dfa4770e Binary files /dev/null and b/pdfminer/cmap/Ext-RKSJ-V.pickle.gz differ diff --git a/pdfminer/cmap/Ext-V.pickle.gz b/pdfminer/cmap/Ext-V.pickle.gz new file mode 100644 index 00000000..67e4a2f0 Binary files /dev/null and b/pdfminer/cmap/Ext-V.pickle.gz differ diff --git a/pdfminer/cmap/GB-EUC-H.pickle.gz b/pdfminer/cmap/GB-EUC-H.pickle.gz new file mode 100644 index 00000000..2cf692a8 Binary files /dev/null and b/pdfminer/cmap/GB-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/GB-EUC-V.pickle.gz b/pdfminer/cmap/GB-EUC-V.pickle.gz new file mode 100644 index 00000000..09c23184 Binary files /dev/null and b/pdfminer/cmap/GB-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/GB-H.pickle.gz b/pdfminer/cmap/GB-H.pickle.gz new file mode 100644 index 00000000..9bb7d65a Binary files /dev/null and b/pdfminer/cmap/GB-H.pickle.gz differ diff --git a/pdfminer/cmap/GB-V.pickle.gz b/pdfminer/cmap/GB-V.pickle.gz new file mode 100644 index 00000000..000914cb Binary files /dev/null and b/pdfminer/cmap/GB-V.pickle.gz differ diff --git a/pdfminer/cmap/GBK-EUC-H.pickle.gz b/pdfminer/cmap/GBK-EUC-H.pickle.gz new file mode 100644 index 00000000..c3cc563d Binary files /dev/null and b/pdfminer/cmap/GBK-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/GBK-EUC-V.pickle.gz b/pdfminer/cmap/GBK-EUC-V.pickle.gz new file mode 100644 index 00000000..eec4febc Binary files /dev/null and b/pdfminer/cmap/GBK-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/GBK2K-H.pickle.gz b/pdfminer/cmap/GBK2K-H.pickle.gz new file mode 100644 index 00000000..742063ba Binary files /dev/null and b/pdfminer/cmap/GBK2K-H.pickle.gz differ diff --git a/pdfminer/cmap/GBK2K-V.pickle.gz b/pdfminer/cmap/GBK2K-V.pickle.gz new file mode 100644 index 00000000..cf933882 Binary files /dev/null and b/pdfminer/cmap/GBK2K-V.pickle.gz differ diff --git a/pdfminer/cmap/GBKp-EUC-H.pickle.gz b/pdfminer/cmap/GBKp-EUC-H.pickle.gz new file mode 100644 index 00000000..f79dad86 Binary files /dev/null and b/pdfminer/cmap/GBKp-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/GBKp-EUC-V.pickle.gz b/pdfminer/cmap/GBKp-EUC-V.pickle.gz new file mode 100644 index 00000000..76d148ae Binary files /dev/null and b/pdfminer/cmap/GBKp-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/GBT-EUC-H.pickle.gz b/pdfminer/cmap/GBT-EUC-H.pickle.gz new file mode 100644 index 00000000..5a1c7036 Binary files /dev/null and b/pdfminer/cmap/GBT-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/GBT-EUC-V.pickle.gz b/pdfminer/cmap/GBT-EUC-V.pickle.gz new file mode 100644 index 00000000..32323415 Binary files /dev/null and b/pdfminer/cmap/GBT-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/GBT-H.pickle.gz b/pdfminer/cmap/GBT-H.pickle.gz new file mode 100644 index 00000000..50b9e1f6 Binary files /dev/null and b/pdfminer/cmap/GBT-H.pickle.gz differ diff --git a/pdfminer/cmap/GBT-V.pickle.gz b/pdfminer/cmap/GBT-V.pickle.gz new file mode 100644 index 00000000..c2df2156 Binary files /dev/null and b/pdfminer/cmap/GBT-V.pickle.gz differ diff --git a/pdfminer/cmap/GBTpc-EUC-H.pickle.gz b/pdfminer/cmap/GBTpc-EUC-H.pickle.gz new file mode 100644 index 00000000..71f38021 Binary files /dev/null and b/pdfminer/cmap/GBTpc-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/GBTpc-EUC-V.pickle.gz b/pdfminer/cmap/GBTpc-EUC-V.pickle.gz new file mode 100644 index 00000000..a801e738 Binary files /dev/null and b/pdfminer/cmap/GBTpc-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/GBpc-EUC-H.pickle.gz b/pdfminer/cmap/GBpc-EUC-H.pickle.gz new file mode 100644 index 00000000..0b940d16 Binary files /dev/null and b/pdfminer/cmap/GBpc-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/GBpc-EUC-V.pickle.gz b/pdfminer/cmap/GBpc-EUC-V.pickle.gz new file mode 100644 index 00000000..3fd4a32a Binary files /dev/null and b/pdfminer/cmap/GBpc-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/H.pickle.gz b/pdfminer/cmap/H.pickle.gz new file mode 100644 index 00000000..43fa9f2f Binary files /dev/null and b/pdfminer/cmap/H.pickle.gz differ diff --git a/pdfminer/cmap/HKdla-B5-H.pickle.gz b/pdfminer/cmap/HKdla-B5-H.pickle.gz new file mode 100644 index 00000000..10286104 Binary files /dev/null and b/pdfminer/cmap/HKdla-B5-H.pickle.gz differ diff --git a/pdfminer/cmap/HKdla-B5-V.pickle.gz b/pdfminer/cmap/HKdla-B5-V.pickle.gz new file mode 100644 index 00000000..d9556e39 Binary files /dev/null and b/pdfminer/cmap/HKdla-B5-V.pickle.gz differ diff --git a/pdfminer/cmap/HKdlb-B5-H.pickle.gz b/pdfminer/cmap/HKdlb-B5-H.pickle.gz new file mode 100644 index 00000000..0594a74e Binary files /dev/null and b/pdfminer/cmap/HKdlb-B5-H.pickle.gz differ diff --git a/pdfminer/cmap/HKdlb-B5-V.pickle.gz b/pdfminer/cmap/HKdlb-B5-V.pickle.gz new file mode 100644 index 00000000..d405c7b9 Binary files /dev/null and b/pdfminer/cmap/HKdlb-B5-V.pickle.gz differ diff --git a/pdfminer/cmap/HKgccs-B5-H.pickle.gz b/pdfminer/cmap/HKgccs-B5-H.pickle.gz new file mode 100644 index 00000000..90b0b683 Binary files /dev/null and b/pdfminer/cmap/HKgccs-B5-H.pickle.gz differ diff --git a/pdfminer/cmap/HKgccs-B5-V.pickle.gz b/pdfminer/cmap/HKgccs-B5-V.pickle.gz new file mode 100644 index 00000000..91f08138 Binary files /dev/null and b/pdfminer/cmap/HKgccs-B5-V.pickle.gz differ diff --git a/pdfminer/cmap/HKm314-B5-H.pickle.gz b/pdfminer/cmap/HKm314-B5-H.pickle.gz new file mode 100644 index 00000000..8c17b605 Binary files /dev/null and b/pdfminer/cmap/HKm314-B5-H.pickle.gz differ diff --git a/pdfminer/cmap/HKm314-B5-V.pickle.gz b/pdfminer/cmap/HKm314-B5-V.pickle.gz new file mode 100644 index 00000000..7dc58afc Binary files /dev/null and b/pdfminer/cmap/HKm314-B5-V.pickle.gz differ diff --git a/pdfminer/cmap/HKm471-B5-H.pickle.gz b/pdfminer/cmap/HKm471-B5-H.pickle.gz new file mode 100644 index 00000000..0f1c9e99 Binary files /dev/null and b/pdfminer/cmap/HKm471-B5-H.pickle.gz differ diff --git a/pdfminer/cmap/HKm471-B5-V.pickle.gz b/pdfminer/cmap/HKm471-B5-V.pickle.gz new file mode 100644 index 00000000..b64bb1de Binary files /dev/null and b/pdfminer/cmap/HKm471-B5-V.pickle.gz differ diff --git a/pdfminer/cmap/HKscs-B5-H.pickle.gz b/pdfminer/cmap/HKscs-B5-H.pickle.gz new file mode 100644 index 00000000..9c918ace Binary files /dev/null and b/pdfminer/cmap/HKscs-B5-H.pickle.gz differ diff --git a/pdfminer/cmap/HKscs-B5-V.pickle.gz b/pdfminer/cmap/HKscs-B5-V.pickle.gz new file mode 100644 index 00000000..85b6d026 Binary files /dev/null and b/pdfminer/cmap/HKscs-B5-V.pickle.gz differ diff --git a/pdfminer/cmap/Hankaku-H.pickle.gz b/pdfminer/cmap/Hankaku-H.pickle.gz new file mode 100644 index 00000000..cad83d32 Binary files /dev/null and b/pdfminer/cmap/Hankaku-H.pickle.gz differ diff --git a/pdfminer/cmap/Hankaku-V.pickle.gz b/pdfminer/cmap/Hankaku-V.pickle.gz new file mode 100644 index 00000000..ce3ea517 Binary files /dev/null and b/pdfminer/cmap/Hankaku-V.pickle.gz differ diff --git a/pdfminer/cmap/Hiragana-H.pickle.gz b/pdfminer/cmap/Hiragana-H.pickle.gz new file mode 100644 index 00000000..11388035 Binary files /dev/null and b/pdfminer/cmap/Hiragana-H.pickle.gz differ diff --git a/pdfminer/cmap/Hiragana-V.pickle.gz b/pdfminer/cmap/Hiragana-V.pickle.gz new file mode 100644 index 00000000..82b094be Binary files /dev/null and b/pdfminer/cmap/Hiragana-V.pickle.gz differ diff --git a/pdfminer/cmap/KSC-EUC-H.pickle.gz b/pdfminer/cmap/KSC-EUC-H.pickle.gz new file mode 100644 index 00000000..d2515e66 Binary files /dev/null and b/pdfminer/cmap/KSC-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/KSC-EUC-V.pickle.gz b/pdfminer/cmap/KSC-EUC-V.pickle.gz new file mode 100644 index 00000000..b4c0ebe3 Binary files /dev/null and b/pdfminer/cmap/KSC-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/KSC-H.pickle.gz b/pdfminer/cmap/KSC-H.pickle.gz new file mode 100644 index 00000000..c2feb14d Binary files /dev/null and b/pdfminer/cmap/KSC-H.pickle.gz differ diff --git a/pdfminer/cmap/KSC-Johab-H.pickle.gz b/pdfminer/cmap/KSC-Johab-H.pickle.gz new file mode 100644 index 00000000..5051f444 Binary files /dev/null and b/pdfminer/cmap/KSC-Johab-H.pickle.gz differ diff --git a/pdfminer/cmap/KSC-Johab-V.pickle.gz b/pdfminer/cmap/KSC-Johab-V.pickle.gz new file mode 100644 index 00000000..e1436d89 Binary files /dev/null and b/pdfminer/cmap/KSC-Johab-V.pickle.gz differ diff --git a/pdfminer/cmap/KSC-V.pickle.gz b/pdfminer/cmap/KSC-V.pickle.gz new file mode 100644 index 00000000..e6cf409c Binary files /dev/null and b/pdfminer/cmap/KSC-V.pickle.gz differ diff --git a/pdfminer/cmap/KSCms-UHC-H.pickle.gz b/pdfminer/cmap/KSCms-UHC-H.pickle.gz new file mode 100644 index 00000000..8dd53848 Binary files /dev/null and b/pdfminer/cmap/KSCms-UHC-H.pickle.gz differ diff --git a/pdfminer/cmap/KSCms-UHC-HW-H.pickle.gz b/pdfminer/cmap/KSCms-UHC-HW-H.pickle.gz new file mode 100644 index 00000000..9aa322c9 Binary files /dev/null and b/pdfminer/cmap/KSCms-UHC-HW-H.pickle.gz differ diff --git a/pdfminer/cmap/KSCms-UHC-HW-V.pickle.gz b/pdfminer/cmap/KSCms-UHC-HW-V.pickle.gz new file mode 100644 index 00000000..9768d18f Binary files /dev/null and b/pdfminer/cmap/KSCms-UHC-HW-V.pickle.gz differ diff --git a/pdfminer/cmap/KSCms-UHC-V.pickle.gz b/pdfminer/cmap/KSCms-UHC-V.pickle.gz new file mode 100644 index 00000000..450f983d Binary files /dev/null and b/pdfminer/cmap/KSCms-UHC-V.pickle.gz differ diff --git a/pdfminer/cmap/KSCpc-EUC-H.pickle.gz b/pdfminer/cmap/KSCpc-EUC-H.pickle.gz new file mode 100644 index 00000000..576315aa Binary files /dev/null and b/pdfminer/cmap/KSCpc-EUC-H.pickle.gz differ diff --git a/pdfminer/cmap/KSCpc-EUC-V.pickle.gz b/pdfminer/cmap/KSCpc-EUC-V.pickle.gz new file mode 100644 index 00000000..0a0f7b47 Binary files /dev/null and b/pdfminer/cmap/KSCpc-EUC-V.pickle.gz differ diff --git a/pdfminer/cmap/Katakana-H.pickle.gz b/pdfminer/cmap/Katakana-H.pickle.gz new file mode 100644 index 00000000..270f0faf Binary files /dev/null and b/pdfminer/cmap/Katakana-H.pickle.gz differ diff --git a/pdfminer/cmap/Katakana-V.pickle.gz b/pdfminer/cmap/Katakana-V.pickle.gz new file mode 100644 index 00000000..236ae3b2 Binary files /dev/null and b/pdfminer/cmap/Katakana-V.pickle.gz differ diff --git a/pdfminer/cmap/NWP-H.pickle.gz b/pdfminer/cmap/NWP-H.pickle.gz new file mode 100644 index 00000000..b8ec7cce Binary files /dev/null and b/pdfminer/cmap/NWP-H.pickle.gz differ diff --git a/pdfminer/cmap/NWP-V.pickle.gz b/pdfminer/cmap/NWP-V.pickle.gz new file mode 100644 index 00000000..a39a5bff Binary files /dev/null and b/pdfminer/cmap/NWP-V.pickle.gz differ diff --git a/pdfminer/cmap/README.txt b/pdfminer/cmap/README.txt new file mode 100644 index 00000000..c7a7d0df --- /dev/null +++ b/pdfminer/cmap/README.txt @@ -0,0 +1,79 @@ +README.txt for cmap + +This directory contains *.pickle.gz files converted from Adobe CMap resources. +CMaps are required to decode text data written in CJK (Chinese, Japanese, +Korean) language. CMap resources are now available freely from Adobe web site: +http://opensource.adobe.com/wiki/display/cmap/CMap+Resources + +The follwing files were extracted from the downloadable tarballs: + +cid2code_Adobe_CNS1.txt: + http://download.macromedia.com/pub/opensource/cmap/cmapresources_cns1-6.tar.z + +cid2code_Adobe_GB1.txt: + http://download.macromedia.com/pub/opensource/cmap/cmapresources_gb1-5.tar.z + +cid2code_Adobe_Japan1.txt: + http://download.macromedia.com/pub/opensource/cmap/cmapresources_japan1-6.tar.z + +cid2code_Adobe_Korea1.txt: + http://download.macromedia.com/pub/opensource/cmap/cmapresources_korean1-2.tar.z + + +These *.pickle.gz files can be generated by running following commands in the +top directory: + + $ make cmap + python tools/conv_cmap.py pdfminer/cmap Adobe-CNS1 cmaprsrc/cid2code_Adobe_CNS1.txt + reading 'cmaprsrc/cid2code_Adobe_CNS1.txt'... + writing 'CNS1_H.py'... + ... + +On Windows machines which don't have `make` command, +paste the following commands on a command line prompt: + + mkdir pdfminer\cmap + python tools\conv_cmap.py -c B5=cp950 -c UniCNS-UTF8=utf-8 pdfminer\cmap Adobe-CNS1 cmaprsrc\cid2code_Adobe_CNS1.txt + python tools\conv_cmap.py -c GBK-EUC=cp936 -c UniGB-UTF8=utf-8 pdfminer\cmap Adobe-GB1 cmaprsrc\cid2code_Adobe_GB1.txt + python tools\conv_cmap.py -c RKSJ=cp932 -c EUC=euc-jp -c UniJIS-UTF8=utf-8 pdfminer\cmap Adobe-Japan1 cmaprsrc\cid2code_Adobe_Japan1.txt + python tools\conv_cmap.py -c KSC-EUC=euc-kr -c KSC-Johab=johab -c KSCms-UHC=cp949 -c UniKS-UTF8=utf-8 pdfminer\cmap Adobe-Korea1 cmaprsrc\cid2code_Adobe_Korea1.txt + + +Here is the license information in the original files: + +%%Copyright: ----------------------------------------------------------- +%%Copyright: Copyright 1990-20xx Adobe Systems Incorporated. +%%Copyright: All rights reserved. +%%Copyright: +%%Copyright: Redistribution and use in source and binary forms, with or +%%Copyright: without modification, are permitted provided that the +%%Copyright: following conditions are met: +%%Copyright: +%%Copyright: Redistributions of source code must retain the above +%%Copyright: copyright notice, this list of conditions and the following +%%Copyright: disclaimer. +%%Copyright: +%%Copyright: Redistributions in binary form must reproduce the above +%%Copyright: copyright notice, this list of conditions and the following +%%Copyright: disclaimer in the documentation and/or other materials +%%Copyright: provided with the distribution. +%%Copyright: +%%Copyright: Neither the name of Adobe Systems Incorporated nor the names +%%Copyright: of its contributors may be used to endorse or promote +%%Copyright: products derived from this software without specific prior +%%Copyright: written permission. +%%Copyright: +%%Copyright: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +%%Copyright: CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +%%Copyright: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +%%Copyright: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +%%Copyright: DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +%%Copyright: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +%%Copyright: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +%%Copyright: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +%%Copyright: LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +%%Copyright: HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +%%Copyright: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +%%Copyright: OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +%%Copyright: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +%%Copyright: ----------------------------------------------------------- diff --git a/pdfminer/cmap/RKSJ-H.pickle.gz b/pdfminer/cmap/RKSJ-H.pickle.gz new file mode 100644 index 00000000..744e530f Binary files /dev/null and b/pdfminer/cmap/RKSJ-H.pickle.gz differ diff --git a/pdfminer/cmap/RKSJ-V.pickle.gz b/pdfminer/cmap/RKSJ-V.pickle.gz new file mode 100644 index 00000000..8e90a9f6 Binary files /dev/null and b/pdfminer/cmap/RKSJ-V.pickle.gz differ diff --git a/pdfminer/cmap/Roman-H.pickle.gz b/pdfminer/cmap/Roman-H.pickle.gz new file mode 100644 index 00000000..74d955b1 Binary files /dev/null and b/pdfminer/cmap/Roman-H.pickle.gz differ diff --git a/pdfminer/cmap/Roman-V.pickle.gz b/pdfminer/cmap/Roman-V.pickle.gz new file mode 100644 index 00000000..ba034bc3 Binary files /dev/null and b/pdfminer/cmap/Roman-V.pickle.gz differ diff --git a/pdfminer/cmap/UniCNS-UCS2-H.pickle.gz b/pdfminer/cmap/UniCNS-UCS2-H.pickle.gz new file mode 100644 index 00000000..6b31ee01 Binary files /dev/null and b/pdfminer/cmap/UniCNS-UCS2-H.pickle.gz differ diff --git a/pdfminer/cmap/UniCNS-UCS2-V.pickle.gz b/pdfminer/cmap/UniCNS-UCS2-V.pickle.gz new file mode 100644 index 00000000..304d94c1 Binary files /dev/null and b/pdfminer/cmap/UniCNS-UCS2-V.pickle.gz differ diff --git a/pdfminer/cmap/UniCNS-UTF16-H.pickle.gz b/pdfminer/cmap/UniCNS-UTF16-H.pickle.gz new file mode 100644 index 00000000..847f4dee Binary files /dev/null and b/pdfminer/cmap/UniCNS-UTF16-H.pickle.gz differ diff --git a/pdfminer/cmap/UniCNS-UTF16-V.pickle.gz b/pdfminer/cmap/UniCNS-UTF16-V.pickle.gz new file mode 100644 index 00000000..bea6ef08 Binary files /dev/null and b/pdfminer/cmap/UniCNS-UTF16-V.pickle.gz differ diff --git a/pdfminer/cmap/UniCNS-UTF32-H.pickle.gz b/pdfminer/cmap/UniCNS-UTF32-H.pickle.gz new file mode 100644 index 00000000..ff115698 Binary files /dev/null and b/pdfminer/cmap/UniCNS-UTF32-H.pickle.gz differ diff --git a/pdfminer/cmap/UniCNS-UTF32-V.pickle.gz b/pdfminer/cmap/UniCNS-UTF32-V.pickle.gz new file mode 100644 index 00000000..b3ce4360 Binary files /dev/null and b/pdfminer/cmap/UniCNS-UTF32-V.pickle.gz differ diff --git a/pdfminer/cmap/UniCNS-UTF8-H.pickle.gz b/pdfminer/cmap/UniCNS-UTF8-H.pickle.gz new file mode 100644 index 00000000..edab5166 Binary files /dev/null and b/pdfminer/cmap/UniCNS-UTF8-H.pickle.gz differ diff --git a/pdfminer/cmap/UniCNS-UTF8-V.pickle.gz b/pdfminer/cmap/UniCNS-UTF8-V.pickle.gz new file mode 100644 index 00000000..9e8a2e64 Binary files /dev/null and b/pdfminer/cmap/UniCNS-UTF8-V.pickle.gz differ diff --git a/pdfminer/cmap/UniGB-UCS2-H.pickle.gz b/pdfminer/cmap/UniGB-UCS2-H.pickle.gz new file mode 100644 index 00000000..640bf7ec Binary files /dev/null and b/pdfminer/cmap/UniGB-UCS2-H.pickle.gz differ diff --git a/pdfminer/cmap/UniGB-UCS2-V.pickle.gz b/pdfminer/cmap/UniGB-UCS2-V.pickle.gz new file mode 100644 index 00000000..0ed0331d Binary files /dev/null and b/pdfminer/cmap/UniGB-UCS2-V.pickle.gz differ diff --git a/pdfminer/cmap/UniGB-UTF16-H.pickle.gz b/pdfminer/cmap/UniGB-UTF16-H.pickle.gz new file mode 100644 index 00000000..f8f71f86 Binary files /dev/null and b/pdfminer/cmap/UniGB-UTF16-H.pickle.gz differ diff --git a/pdfminer/cmap/UniGB-UTF16-V.pickle.gz b/pdfminer/cmap/UniGB-UTF16-V.pickle.gz new file mode 100644 index 00000000..6828864c Binary files /dev/null and b/pdfminer/cmap/UniGB-UTF16-V.pickle.gz differ diff --git a/pdfminer/cmap/UniGB-UTF32-H.pickle.gz b/pdfminer/cmap/UniGB-UTF32-H.pickle.gz new file mode 100644 index 00000000..842d286b Binary files /dev/null and b/pdfminer/cmap/UniGB-UTF32-H.pickle.gz differ diff --git a/pdfminer/cmap/UniGB-UTF32-V.pickle.gz b/pdfminer/cmap/UniGB-UTF32-V.pickle.gz new file mode 100644 index 00000000..d6f410a9 Binary files /dev/null and b/pdfminer/cmap/UniGB-UTF32-V.pickle.gz differ diff --git a/pdfminer/cmap/UniGB-UTF8-H.pickle.gz b/pdfminer/cmap/UniGB-UTF8-H.pickle.gz new file mode 100644 index 00000000..19eac074 Binary files /dev/null and b/pdfminer/cmap/UniGB-UTF8-H.pickle.gz differ diff --git a/pdfminer/cmap/UniGB-UTF8-V.pickle.gz b/pdfminer/cmap/UniGB-UTF8-V.pickle.gz new file mode 100644 index 00000000..911b0957 Binary files /dev/null and b/pdfminer/cmap/UniGB-UTF8-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UCS2-H.pickle.gz b/pdfminer/cmap/UniJIS-UCS2-H.pickle.gz new file mode 100644 index 00000000..767cb037 Binary files /dev/null and b/pdfminer/cmap/UniJIS-UCS2-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UCS2-HW-H.pickle.gz b/pdfminer/cmap/UniJIS-UCS2-HW-H.pickle.gz new file mode 100644 index 00000000..3d8d5319 Binary files /dev/null and b/pdfminer/cmap/UniJIS-UCS2-HW-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UCS2-HW-V.pickle.gz b/pdfminer/cmap/UniJIS-UCS2-HW-V.pickle.gz new file mode 100644 index 00000000..1850dee9 Binary files /dev/null and b/pdfminer/cmap/UniJIS-UCS2-HW-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UCS2-V.pickle.gz b/pdfminer/cmap/UniJIS-UCS2-V.pickle.gz new file mode 100644 index 00000000..1ed3c806 Binary files /dev/null and b/pdfminer/cmap/UniJIS-UCS2-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UTF16-H.pickle.gz b/pdfminer/cmap/UniJIS-UTF16-H.pickle.gz new file mode 100644 index 00000000..b8b65591 Binary files /dev/null and b/pdfminer/cmap/UniJIS-UTF16-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UTF16-V.pickle.gz b/pdfminer/cmap/UniJIS-UTF16-V.pickle.gz new file mode 100644 index 00000000..5159409a Binary files /dev/null and b/pdfminer/cmap/UniJIS-UTF16-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UTF32-H.pickle.gz b/pdfminer/cmap/UniJIS-UTF32-H.pickle.gz new file mode 100644 index 00000000..cba91aa2 Binary files /dev/null and b/pdfminer/cmap/UniJIS-UTF32-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UTF32-V.pickle.gz b/pdfminer/cmap/UniJIS-UTF32-V.pickle.gz new file mode 100644 index 00000000..b35a7150 Binary files /dev/null and b/pdfminer/cmap/UniJIS-UTF32-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UTF8-H.pickle.gz b/pdfminer/cmap/UniJIS-UTF8-H.pickle.gz new file mode 100644 index 00000000..a209232f Binary files /dev/null and b/pdfminer/cmap/UniJIS-UTF8-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS-UTF8-V.pickle.gz b/pdfminer/cmap/UniJIS-UTF8-V.pickle.gz new file mode 100644 index 00000000..24752ad6 Binary files /dev/null and b/pdfminer/cmap/UniJIS-UTF8-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS2004-UTF16-H.pickle.gz b/pdfminer/cmap/UniJIS2004-UTF16-H.pickle.gz new file mode 100644 index 00000000..ef4d1f25 Binary files /dev/null and b/pdfminer/cmap/UniJIS2004-UTF16-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS2004-UTF16-V.pickle.gz b/pdfminer/cmap/UniJIS2004-UTF16-V.pickle.gz new file mode 100644 index 00000000..66731b56 Binary files /dev/null and b/pdfminer/cmap/UniJIS2004-UTF16-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS2004-UTF32-H.pickle.gz b/pdfminer/cmap/UniJIS2004-UTF32-H.pickle.gz new file mode 100644 index 00000000..c6c9e9ab Binary files /dev/null and b/pdfminer/cmap/UniJIS2004-UTF32-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS2004-UTF32-V.pickle.gz b/pdfminer/cmap/UniJIS2004-UTF32-V.pickle.gz new file mode 100644 index 00000000..5f8de3c8 Binary files /dev/null and b/pdfminer/cmap/UniJIS2004-UTF32-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS2004-UTF8-H.pickle.gz b/pdfminer/cmap/UniJIS2004-UTF8-H.pickle.gz new file mode 100644 index 00000000..b4c7813a Binary files /dev/null and b/pdfminer/cmap/UniJIS2004-UTF8-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJIS2004-UTF8-V.pickle.gz b/pdfminer/cmap/UniJIS2004-UTF8-V.pickle.gz new file mode 100644 index 00000000..fb29f82e Binary files /dev/null and b/pdfminer/cmap/UniJIS2004-UTF8-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJISX0213-UTF32-H.pickle.gz b/pdfminer/cmap/UniJISX0213-UTF32-H.pickle.gz new file mode 100644 index 00000000..68788446 Binary files /dev/null and b/pdfminer/cmap/UniJISX0213-UTF32-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJISX0213-UTF32-V.pickle.gz b/pdfminer/cmap/UniJISX0213-UTF32-V.pickle.gz new file mode 100644 index 00000000..3fb944df Binary files /dev/null and b/pdfminer/cmap/UniJISX0213-UTF32-V.pickle.gz differ diff --git a/pdfminer/cmap/UniJISX02132004-UTF32-H.pickle.gz b/pdfminer/cmap/UniJISX02132004-UTF32-H.pickle.gz new file mode 100644 index 00000000..27f596a0 Binary files /dev/null and b/pdfminer/cmap/UniJISX02132004-UTF32-H.pickle.gz differ diff --git a/pdfminer/cmap/UniJISX02132004-UTF32-V.pickle.gz b/pdfminer/cmap/UniJISX02132004-UTF32-V.pickle.gz new file mode 100644 index 00000000..adf1a125 Binary files /dev/null and b/pdfminer/cmap/UniJISX02132004-UTF32-V.pickle.gz differ diff --git a/pdfminer/cmap/UniKS-UCS2-H.pickle.gz b/pdfminer/cmap/UniKS-UCS2-H.pickle.gz new file mode 100644 index 00000000..65873fe1 Binary files /dev/null and b/pdfminer/cmap/UniKS-UCS2-H.pickle.gz differ diff --git a/pdfminer/cmap/UniKS-UCS2-V.pickle.gz b/pdfminer/cmap/UniKS-UCS2-V.pickle.gz new file mode 100644 index 00000000..9c8eca08 Binary files /dev/null and b/pdfminer/cmap/UniKS-UCS2-V.pickle.gz differ diff --git a/pdfminer/cmap/UniKS-UTF16-H.pickle.gz b/pdfminer/cmap/UniKS-UTF16-H.pickle.gz new file mode 100644 index 00000000..4d528fd0 Binary files /dev/null and b/pdfminer/cmap/UniKS-UTF16-H.pickle.gz differ diff --git a/pdfminer/cmap/UniKS-UTF16-V.pickle.gz b/pdfminer/cmap/UniKS-UTF16-V.pickle.gz new file mode 100644 index 00000000..2649bf95 Binary files /dev/null and b/pdfminer/cmap/UniKS-UTF16-V.pickle.gz differ diff --git a/pdfminer/cmap/UniKS-UTF32-H.pickle.gz b/pdfminer/cmap/UniKS-UTF32-H.pickle.gz new file mode 100644 index 00000000..cc875813 Binary files /dev/null and b/pdfminer/cmap/UniKS-UTF32-H.pickle.gz differ diff --git a/pdfminer/cmap/UniKS-UTF32-V.pickle.gz b/pdfminer/cmap/UniKS-UTF32-V.pickle.gz new file mode 100644 index 00000000..5ec9064f Binary files /dev/null and b/pdfminer/cmap/UniKS-UTF32-V.pickle.gz differ diff --git a/pdfminer/cmap/UniKS-UTF8-H.pickle.gz b/pdfminer/cmap/UniKS-UTF8-H.pickle.gz new file mode 100644 index 00000000..d02a6088 Binary files /dev/null and b/pdfminer/cmap/UniKS-UTF8-H.pickle.gz differ diff --git a/pdfminer/cmap/UniKS-UTF8-V.pickle.gz b/pdfminer/cmap/UniKS-UTF8-V.pickle.gz new file mode 100644 index 00000000..0048952a Binary files /dev/null and b/pdfminer/cmap/UniKS-UTF8-V.pickle.gz differ diff --git a/pdfminer/cmap/V.pickle.gz b/pdfminer/cmap/V.pickle.gz new file mode 100644 index 00000000..c0c0a985 Binary files /dev/null and b/pdfminer/cmap/V.pickle.gz differ diff --git a/pdfminer/cmap/WP-Symbol-H.pickle.gz b/pdfminer/cmap/WP-Symbol-H.pickle.gz new file mode 100644 index 00000000..cac95506 Binary files /dev/null and b/pdfminer/cmap/WP-Symbol-H.pickle.gz differ diff --git a/pdfminer/cmap/WP-Symbol-V.pickle.gz b/pdfminer/cmap/WP-Symbol-V.pickle.gz new file mode 100644 index 00000000..4d222e3c Binary files /dev/null and b/pdfminer/cmap/WP-Symbol-V.pickle.gz differ diff --git a/pdfminer/cmap/to-unicode-Adobe-CNS1.pickle.gz b/pdfminer/cmap/to-unicode-Adobe-CNS1.pickle.gz new file mode 100644 index 00000000..8de6794a Binary files /dev/null and b/pdfminer/cmap/to-unicode-Adobe-CNS1.pickle.gz differ diff --git a/pdfminer/cmap/to-unicode-Adobe-GB1.pickle.gz b/pdfminer/cmap/to-unicode-Adobe-GB1.pickle.gz new file mode 100644 index 00000000..120cac28 Binary files /dev/null and b/pdfminer/cmap/to-unicode-Adobe-GB1.pickle.gz differ diff --git a/pdfminer/cmap/to-unicode-Adobe-Japan1.pickle.gz b/pdfminer/cmap/to-unicode-Adobe-Japan1.pickle.gz new file mode 100644 index 00000000..9eeb637d Binary files /dev/null and b/pdfminer/cmap/to-unicode-Adobe-Japan1.pickle.gz differ diff --git a/pdfminer/cmap/to-unicode-Adobe-Korea1.pickle.gz b/pdfminer/cmap/to-unicode-Adobe-Korea1.pickle.gz new file mode 100644 index 00000000..9ac933ee Binary files /dev/null and b/pdfminer/cmap/to-unicode-Adobe-Korea1.pickle.gz differ diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py index dbe98717..4b508124 100644 --- a/pdfminer/cmapdb.py +++ b/pdfminer/cmapdb.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + """ Adobe character mapping (CMap) support. @@ -31,6 +31,10 @@ from .utils import choplist from .utils import nunpack +import six #Python 2+3 compatibility + +log = logging.getLogger(__name__) + class CMapError(Exception): pass @@ -76,7 +80,7 @@ def __repr__(self): return '' % self.attrs.get('CMapName') def use_cmap(self, cmap): - assert isinstance(cmap, CMap) + assert isinstance(cmap, CMap), str(type(cmap)) def copy(dst, src): for (k, v) in src.iteritems(): @@ -90,13 +94,11 @@ def copy(dst, src): return def decode(self, code): - if self.debug: - logging.debug('decode: %r, %r' % (self, code)) + log.debug('decode: %r, %r', self, code) d = self.code2cid - for c in code: - c = ord(c) - if c in d: - d = d[c] + for i in six.iterbytes(code): + if i in d: + d = d[i] if isinstance(d, int): yield d d = self.code2cid @@ -142,8 +144,7 @@ def __repr__(self): return '' % self.attrs.get('CMapName') def get_unichr(self, cid): - if self.debug: - logging.debug('get_unichr: %r, %r' % (self, cid)) + log.debug('get_unichr: %r, %r', self, cid) return self.cid2unichr[cid] def dump(self, out=sys.stdout): @@ -157,7 +158,7 @@ def dump(self, out=sys.stdout): class FileCMap(CMap): def add_code2cid(self, code, cid): - assert isinstance(code, str) and isinstance(cid, int) + assert isinstance(code, str) and isinstance(cid, int), str((type(code), type(cid))) d = self.code2cid for c in code[:-1]: c = ord(c) @@ -177,15 +178,15 @@ def add_code2cid(self, code, cid): class FileUnicodeMap(UnicodeMap): def add_cid2unichr(self, cid, code): - assert isinstance(cid, int) + assert isinstance(cid, int), str(type(cid)) if isinstance(code, PSLiteral): # Interpret as an Adobe glyph name. self.cid2unichr[cid] = name2unicode(code.name) - elif isinstance(code, str): + elif isinstance(code, bytes): # Interpret as UTF-16BE. - self.cid2unichr[cid] = unicode(code, 'UTF-16BE', 'ignore') + self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore') elif isinstance(code, int): - self.cid2unichr[cid] = unichr(code) + self.cid2unichr[cid] = six.unichr(code) else: raise TypeError(code) return @@ -229,8 +230,9 @@ class CMapNotFound(CMapError): @classmethod def _load_data(klass, name): + name = name.replace("\0", "") filename = '%s.pickle.gz' % name - logging.info('loading: %r' % name) + log.info('loading: %r', name) cmap_paths = (os.environ.get('CMAP_PATH', '/usr/share/pdfminer/'), os.path.join(os.path.dirname(__file__), 'cmap'),) for directory in cmap_paths: @@ -303,7 +305,7 @@ def run(self): KEYWORD_ENDBFCHAR = KWD(b'endbfchar') KEYWORD_BEGINNOTDEFRANGE = KWD(b'beginnotdefrange') KEYWORD_ENDNOTDEFRANGE = KWD(b'endnotdefrange') - + def do_keyword(self, pos, token): if token is self.KEYWORD_BEGINCMAP: self._in_cmap = True @@ -358,8 +360,8 @@ def do_keyword(self, pos, token): s1 = nunpack(svar) e1 = nunpack(evar) vlen = len(svar) - #assert s1 <= e1 - for i in xrange(e1-s1+1): + #assert s1 <= e1, str((s1, e1)) + for i in range(e1-s1+1): x = sprefix+struct.pack('>L', s1+i)[-vlen:] self.cmap.add_code2cid(x, cid+i) return @@ -380,21 +382,21 @@ def do_keyword(self, pos, token): if token is self.KEYWORD_ENDBFRANGE: objs = [obj for (__, obj) in self.popall()] for (s, e, code) in choplist(3, objs): - if (not isinstance(s, str) or not isinstance(e, str) or + if (not isinstance(s, bytes) or not isinstance(e, bytes) or len(s) != len(e)): continue s1 = nunpack(s) e1 = nunpack(e) - #assert s1 <= e1 + #assert s1 <= e1, str((s1, e1)) if isinstance(code, list): - for i in xrange(e1-s1+1): + for i in range(e1-s1+1): self.cmap.add_cid2unichr(s1+i, code[i]) else: var = code[-4:] base = nunpack(var) prefix = code[:-4] vlen = len(var) - for i in xrange(e1-s1+1): + for i in range(e1-s1+1): x = prefix+struct.pack('>L', base+i)[-vlen:] self.cmap.add_cid2unichr(s1+i, x) return @@ -405,7 +407,7 @@ def do_keyword(self, pos, token): if token is self.KEYWORD_ENDBFCHAR: objs = [obj for (__, obj) in self.popall()] for (cid, code) in choplist(2, objs): - if isinstance(cid, str) and isinstance(code, str): + if isinstance(cid, bytes) and isinstance(code, bytes): self.cmap.add_cid2unichr(nunpack(cid), code) return diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 30ceb226..12ee651a 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python + +# -*- coding: utf-8 -*- import logging import re from .pdfdevice import PDFTextDevice @@ -20,6 +21,11 @@ from .utils import mult_matrix from .utils import enc from .utils import bbox2str +from . import utils + +import six # Python 2+3 compatibility + +log = logging.getLogger(__name__) ## PDFLayoutAnalyzer @@ -42,8 +48,8 @@ def begin_page(self, page, ctm): return def end_page(self, page): - assert not self._stack - assert isinstance(self.cur_item, LTPage) + assert not self._stack, str(len(self._stack)) + assert isinstance(self.cur_item, LTPage), str(type(self.cur_item)) if self.laparams is not None: self.cur_item.analyze(self.laparams) self.pageno += 1 @@ -57,13 +63,13 @@ def begin_figure(self, name, bbox, matrix): def end_figure(self, _): fig = self.cur_item - assert isinstance(self.cur_item, LTFigure) + assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) self.cur_item = self._stack.pop() self.cur_item.add(fig) return def render_image(self, name, stream): - assert isinstance(self.cur_item, LTFigure) + assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item)) item = LTImage(name, stream, (self.cur_item.x0, self.cur_item.y0, self.cur_item.x1, self.cur_item.y1)) @@ -79,7 +85,8 @@ def paint_path(self, gstate, stroke, fill, evenodd, path): (x0, y0) = apply_matrix_pt(self.ctm, (x0, y0)) (x1, y1) = apply_matrix_pt(self.ctm, (x1, y1)) if x0 == x1 or y0 == y1: - self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1))) + self.cur_item.add(LTLine(gstate.linewidth, (x0, y0), (x1, y1), + stroke, fill, evenodd, gstate.scolor, gstate.ncolor)) return if shape == 'mlllh': # rectangle @@ -93,30 +100,32 @@ def paint_path(self, gstate, stroke, fill, evenodd, path): (x3, y3) = apply_matrix_pt(self.ctm, (x3, y3)) if ((x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or (y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0)): - self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2))) + self.cur_item.add(LTRect(gstate.linewidth, (x0, y0, x2, y2), + stroke, fill, evenodd, gstate.scolor, gstate.ncolor)) return # other shapes pts = [] for p in path: - for i in xrange(1, len(p), 2): + for i in range(1, len(p), 2): pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1]))) - self.cur_item.add(LTCurve(gstate.linewidth, pts)) + self.cur_item.add(LTCurve(gstate.linewidth, pts, stroke, fill, + evenodd, gstate.scolor, gstate.ncolor)) return - def render_char(self, matrix, font, fontsize, scaling, rise, cid): + def render_char(self, matrix, font, fontsize, scaling, rise, cid, graphicstate): try: text = font.to_unichr(cid) - assert isinstance(text, unicode), text + assert isinstance(text, six.text_type), str(type(text)) except PDFUnicodeNotDefined: text = self.handle_undefined_char(font, cid) textwidth = font.char_width(cid) textdisp = font.char_disp(cid) - item = LTChar(matrix, font, fontsize, scaling, rise, text, textwidth, textdisp) + item = LTChar(matrix, font, fontsize, scaling, rise, text, textwidth, textdisp, graphicstate) self.cur_item.add(item) return item.adv def handle_undefined_char(self, font, cid): - logging.info('undefined: %r, %r' % (font, cid)) + log.info('undefined: %r, %r', font, cid) return '(cid:%d)' % cid def receive_layout(self, ltpage): @@ -148,6 +157,23 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None): PDFLayoutAnalyzer.__init__(self, rsrcmgr, pageno=pageno, laparams=laparams) self.outfp = outfp self.codec = codec + if hasattr(self.outfp, 'mode'): + if 'b' in self.outfp.mode: + self.outfp_binary = True + else: + self.outfp_binary = False + else: + import io + if isinstance(self.outfp, io.BytesIO): + self.outfp_binary = True + elif isinstance(self.outfp, io.StringIO): + self.outfp_binary = False + else: + try: + self.outfp.write(u"é") + self.outfp_binary = False + except TypeError: + self.outfp_binary = True return @@ -163,7 +189,10 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None, return def write_text(self, text): - self.outfp.write(text.encode(self.codec, 'ignore')) + text = utils.compatible_encode_method(text, self.codec, 'ignore') + if six.PY3 and self.outfp_binary: + text = text.encode() + self.outfp.write(text) return def receive_layout(self, ltpage): @@ -240,23 +269,28 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None, return def write(self, text): + if self.codec: + text = text.encode(self.codec) self.outfp.write(text) return def write_header(self): self.write('\n') - self.write('\n' % self.codec) + if self.codec: + self.write('\n' % self.codec) + else: + self.write('\n') self.write('\n') return def write_footer(self): self.write('
    Page: %s
    \n' % - ', '.join('%s' % (i, i) for i in xrange(1, self.pageno))) + ', '.join('%s' % (i, i) for i in range(1, self.pageno))) self.write('\n') return def write_text(self, text): - self.write(enc(text, self.codec)) + self.write(enc(text, None)) return def place_rect(self, color, borderwidth, x, y, w, h): @@ -278,7 +312,7 @@ def place_image(self, item, borderwidth, x, y, w, h): name = self.imagewriter.export_image(item) self.write('\n' % - (enc(name), borderwidth, + (enc(name, None), borderwidth, x*self.scale, (self._yoffset-y)*self.scale, w*self.scale, h*self.scale)) return @@ -398,7 +432,7 @@ def close(self): ## class XMLConverter(PDFConverter): - CONTROL = re.compile(ur'[\x00-\x08\x0b-\x0c\x0e-\x1f]') + CONTROL = re.compile(u'[\x00-\x08\x0b-\x0c\x0e-\x1f]') def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None, imagewriter=None, stripcontrol=False): @@ -408,91 +442,100 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, self.write_header() return + def write(self, text): + if self.codec: + text = text.encode(self.codec) + self.outfp.write(text) + return + def write_header(self): - self.outfp.write('\n' % self.codec) - self.outfp.write('\n') + if self.codec: + self.write('\n' % self.codec) + else: + self.write('\n') + self.write('\n') return def write_footer(self): - self.outfp.write('\n') + self.write('\n') return def write_text(self, text): if self.stripcontrol: text = self.CONTROL.sub(u'', text) - self.outfp.write(enc(text, self.codec)) + self.write(enc(text, None)) return def receive_layout(self, ltpage): def show_group(item): if isinstance(item, LTTextBox): - self.outfp.write('\n' % + self.write('\n' % (item.index, bbox2str(item.bbox))) elif isinstance(item, LTTextGroup): - self.outfp.write('\n' % bbox2str(item.bbox)) + self.write('\n' % bbox2str(item.bbox)) for child in item: show_group(child) - self.outfp.write('\n') + self.write('\n') return def render(item): if isinstance(item, LTPage): - self.outfp.write('\n' % + self.write('\n' % (item.pageid, bbox2str(item.bbox), item.rotate)) for child in item: render(child) if item.groups is not None: - self.outfp.write('\n') + self.write('\n') for group in item.groups: show_group(group) - self.outfp.write('\n') - self.outfp.write('\n') + self.write('\n') + self.write('\n') elif isinstance(item, LTLine): - self.outfp.write('\n' % + self.write('\n' % (item.linewidth, bbox2str(item.bbox))) elif isinstance(item, LTRect): - self.outfp.write('\n' % + self.write('\n' % (item.linewidth, bbox2str(item.bbox))) elif isinstance(item, LTCurve): - self.outfp.write('\n' % + self.write('\n' % (item.linewidth, bbox2str(item.bbox), item.get_pts())) elif isinstance(item, LTFigure): - self.outfp.write('
    \n' % + self.write('
    \n' % (item.name, bbox2str(item.bbox))) for child in item: render(child) - self.outfp.write('
    \n') + self.write('
    \n') elif isinstance(item, LTTextLine): - self.outfp.write('\n' % bbox2str(item.bbox)) + self.write('\n' % bbox2str(item.bbox)) for child in item: render(child) - self.outfp.write('\n') + self.write('\n') elif isinstance(item, LTTextBox): wmode = '' if isinstance(item, LTTextBoxVertical): wmode = ' wmode="vertical"' - self.outfp.write('\n' % + self.write('\n' % (item.index, bbox2str(item.bbox), wmode)) for child in item: render(child) - self.outfp.write('\n') + self.write('\n') elif isinstance(item, LTChar): - self.outfp.write('' % - (enc(item.fontname), bbox2str(item.bbox), item.size)) + self.write('' % + (enc(item.fontname, None), bbox2str(item.bbox), item.size)) self.write_text(item.get_text()) - self.outfp.write('\n') + self.write('\n') elif isinstance(item, LTText): - self.outfp.write('%s\n' % item.get_text()) + self.write('%s\n' % item.get_text()) elif isinstance(item, LTImage): if self.imagewriter is not None: name = self.imagewriter.export_image(item) - self.outfp.write('\n' % - (enc(name), item.width, item.height)) + self.write('\n' % + (enc(name, None), item.width, item.height)) else: - self.outfp.write('\n' % + self.write('\n' % (item.width, item.height)) else: - assert 0, item + assert False, str(('Unhandled', item)) return render(ltpage) return diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py index b3263bda..870bd28e 100644 --- a/pdfminer/encodingdb.py +++ b/pdfminer/encodingdb.py @@ -1,9 +1,10 @@ -#!/usr/bin/env python + import re from .psparser import PSLiteral from .glyphlist import glyphname2unicode from .latin_enc import ENCODING +import six # Python 2+3 compatibility STRIP_NAME = re.compile(r'[0-9]+') @@ -17,7 +18,7 @@ def name2unicode(name): m = STRIP_NAME.search(name) if not m: raise KeyError(name) - return unichr(int(m.group(0))) + return six.unichr(int(m.group(0))) ## EncodingDB diff --git a/pdfminer/fontmetrics.py b/pdfminer/fontmetrics.py index bf0be752..8b3779ac 100644 --- a/pdfminer/fontmetrics.py +++ b/pdfminer/fontmetrics.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + """ Font metrics for the Adobe core 14 fonts. diff --git a/pdfminer/glyphlist.py b/pdfminer/glyphlist.py index 10e1008d..848b0971 100644 --- a/pdfminer/glyphlist.py +++ b/pdfminer/glyphlist.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + """ Mappings from Adobe glyph names to Unicode characters. diff --git a/pdfminer/high_level.py b/pdfminer/high_level.py new file mode 100644 index 00000000..cdef1e7f --- /dev/null +++ b/pdfminer/high_level.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +""" +Functions that encapsulate "usual" use-cases for pdfminer, for use making +bundled scripts and for using pdfminer as a module for routine tasks. +""" + +import six +import sys + +from .pdfdocument import PDFDocument +from .pdfparser import PDFParser +from .pdfinterp import PDFResourceManager, PDFPageInterpreter +from .pdfdevice import PDFDevice, TagExtractor +from .pdfpage import PDFPage +from .converter import XMLConverter, HTMLConverter, TextConverter +from .cmapdb import CMapDB +from .image import ImageWriter + + +def extract_text_to_fp(inf, outfp, + _py2_no_more_posargs=None, # Bloody Python2 needs a shim + output_type='text', codec='utf-8', laparams = None, + maxpages=0, page_numbers=None, password="", scale=1.0, rotation=0, + layoutmode='normal', output_dir=None, strip_control=False, + debug=False, disable_caching=False, **other): + """ + Parses text from inf-file and writes to outfp file-like object. + Takes loads of optional arguments but the defaults are somewhat sane. + Beware laparams: Including an empty LAParams is not the same as passing None! + Returns nothing, acting as it does on two streams. Use StringIO to get strings. + + output_type: May be 'text', 'xml', 'html', 'tag'. Only 'text' works properly. + codec: Text decoding codec + laparams: An LAParams object from pdfminer.layout. + Default is None but may not layout correctly. + maxpages: How many pages to stop parsing after + page_numbers: zero-indexed page numbers to operate on. + password: For encrypted PDFs, the password to decrypt. + scale: Scale factor + rotation: Rotation factor + layoutmode: Default is 'normal', see pdfminer.converter.HTMLConverter + output_dir: If given, creates an ImageWriter for extracted images. + strip_control: Does what it says on the tin + debug: Output more logging data + disable_caching: Does what it says on the tin + """ + if six.PY2 and sys.stdin.encoding: + password = password.decode(sys.stdin.encoding) + + imagewriter = None + if output_dir: + imagewriter = ImageWriter(output_dir) + + rsrcmgr = PDFResourceManager(caching=not disable_caching) + + if output_type == 'text': + device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams, + imagewriter=imagewriter) + + if six.PY3 and outfp == sys.stdout: + outfp = sys.stdout.buffer + + if output_type == 'xml': + device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams, + imagewriter=imagewriter, + stripcontrol=strip_control) + elif output_type == 'html': + device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale, + layoutmode=layoutmode, laparams=laparams, + imagewriter=imagewriter) + elif output_type == 'tag': + device = TagExtractor(rsrcmgr, outfp, codec=codec) + + interpreter = PDFPageInterpreter(rsrcmgr, device) + for page in PDFPage.get_pages(inf, + page_numbers, + maxpages=maxpages, + password=password, + caching=not disable_caching, + check_extractable=True): + page.rotate = (page.rotate + rotation) % 360 + interpreter.process_page(page) + + device.close() diff --git a/pdfminer/image.py b/pdfminer/image.py index e7b98e8f..e85815c8 100644 --- a/pdfminer/image.py +++ b/pdfminer/image.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + import struct import os import os.path @@ -34,9 +34,9 @@ def __init__(self, fp, bits, width, height): self.datasize = self.linesize * self.height headersize = 14+40+ncols*4 info = struct.pack('>> lzwdecode(b'\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01') - '\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42' - """ fp = BytesIO(data) - return b''.join(LZWDecoder(fp).run()) - -if __name__ == '__main__': - import doctest - doctest.testmod() + s=LZWDecoder(fp).run() + return b''.join(s) diff --git a/pdfminer/pdfcolor.py b/pdfminer/pdfcolor.py index 83843961..6fe6eaa2 100644 --- a/pdfminer/pdfcolor.py +++ b/pdfminer/pdfcolor.py @@ -1,6 +1,7 @@ -#!/usr/bin/env python + from .psparser import LIT +import six #Python 2+3 compatibility ## PDFColorSpace ## @@ -20,15 +21,17 @@ def __repr__(self): return '' % (self.name, self.ncomponents) -PREDEFINED_COLORSPACE = dict( - (name, PDFColorSpace(name, n)) for (name, n) in { - 'CalRGB': 3, - 'CalGray': 1, - 'Lab': 3, - 'DeviceRGB': 3, - 'DeviceCMYK': 4, - 'DeviceGray': 1, - 'Separation': 1, - 'Indexed': 1, - 'Pattern': 1, - }.iteritems()) +PREDEFINED_COLORSPACE = {} +for (name, n) in six.iteritems({ + 'CalRGB': 3, + 'CalGray': 1, + 'Lab': 3, + 'DeviceRGB': 3, + 'DeviceCMYK': 4, + 'DeviceGray': 1, + 'Separation': 1, + 'Indexed': 1, + 'Pattern': 1, +}) : + PREDEFINED_COLORSPACE[name]=PDFColorSpace(name, n) + \ No newline at end of file diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 3efee9ed..c1f563ca 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -1,11 +1,7 @@ -#!/usr/bin/env python -from .utils import mult_matrix -from .utils import translate_matrix -from .utils import enc -from .utils import bbox2str -from .utils import isnumber + from .pdffont import PDFUnicodeNotDefined +from . import utils ## PDFDevice ## @@ -19,6 +15,12 @@ def __init__(self, rsrcmgr): def __repr__(self): return '' + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + def close(self): return @@ -61,8 +63,8 @@ def render_string(self, textstate, seq): ## class PDFTextDevice(PDFDevice): - def render_string(self, textstate, seq): - matrix = mult_matrix(textstate.matrix, self.ctm) + def render_string(self, textstate, seq, graphicstate): + matrix = utils.mult_matrix(textstate.matrix, self.ctm) font = textstate.font fontsize = textstate.fontsize scaling = textstate.scaling * .01 @@ -75,52 +77,52 @@ def render_string(self, textstate, seq): if font.is_vertical(): textstate.linematrix = self.render_string_vertical( seq, matrix, textstate.linematrix, font, fontsize, - scaling, charspace, wordspace, rise, dxscale) + scaling, charspace, wordspace, rise, dxscale, graphicstate) else: textstate.linematrix = self.render_string_horizontal( seq, matrix, textstate.linematrix, font, fontsize, - scaling, charspace, wordspace, rise, dxscale) + scaling, charspace, wordspace, rise, dxscale, graphicstate) return def render_string_horizontal(self, seq, matrix, pos, - font, fontsize, scaling, charspace, wordspace, rise, dxscale): + font, fontsize, scaling, charspace, wordspace, rise, dxscale, graphicstate): (x, y) = pos needcharspace = False for obj in seq: - if isnumber(obj): + if utils.isnumber(obj): x -= obj*dxscale needcharspace = True else: for cid in font.decode(obj): if needcharspace: x += charspace - x += self.render_char(translate_matrix(matrix, (x, y)), - font, fontsize, scaling, rise, cid) + x += self.render_char(utils.translate_matrix(matrix, (x, y)), + font, fontsize, scaling, rise, cid, graphicstate) if cid == 32 and wordspace: x += wordspace needcharspace = True return (x, y) def render_string_vertical(self, seq, matrix, pos, - font, fontsize, scaling, charspace, wordspace, rise, dxscale): + font, fontsize, scaling, charspace, wordspace, rise, dxscale, graphicstate): (x, y) = pos needcharspace = False for obj in seq: - if isnumber(obj): + if utils.isnumber(obj): y -= obj*dxscale needcharspace = True else: for cid in font.decode(obj): if needcharspace: y += charspace - y += self.render_char(translate_matrix(matrix, (x, y)), - font, fontsize, scaling, rise, cid) + y += self.render_char(utils.translate_matrix(matrix, (x, y)), + font, fontsize, scaling, rise, cid, graphicstate) if cid == 32 and wordspace: y += wordspace needcharspace = True return (x, y) - def render_char(self, matrix, font, fontsize, scaling, rise, cid): + def render_char(self, matrix, font, fontsize, scaling, rise, cid, graphicstate): return 0 @@ -140,6 +142,7 @@ def render_string(self, textstate, seq): font = textstate.font text = '' for obj in seq: + obj = utils.make_compat_str(obj) if not isinstance(obj, str): continue chars = font.decode(obj) @@ -148,33 +151,36 @@ def render_string(self, textstate, seq): char = font.to_unichr(cid) text += char except PDFUnicodeNotDefined: + print(chars) pass - self.outfp.write(enc(text, self.codec)) + self.outfp.write(utils.enc(text, self.codec)) return def begin_page(self, page, ctm): - self.outfp.write('' % - (self.pageno, bbox2str(page.mediabox), page.rotate)) + output = '' % (self.pageno, utils.bbox2str(page.mediabox), page.rotate) + self.outfp.write(utils.make_compat_bytes(output)) return def end_page(self, page): - self.outfp.write('\n') + self.outfp.write(utils.make_compat_bytes('\n')) self.pageno += 1 return def begin_tag(self, tag, props=None): s = '' if isinstance(props, dict): - s = ''.join(' %s="%s"' % (enc(k), enc(str(v))) for (k, v) + s = ''.join(' %s="%s"' % (utils.enc(k), utils.enc(str(v))) for (k, v) in sorted(props.iteritems())) - self.outfp.write('<%s%s>' % (enc(tag.name), s)) + out_s = '<%s%s>' % (utils.enc(tag.name), s) + self.outfp.write(utils.make_compat_bytes(out_s)) self._stack.append(tag) return def end_tag(self): - assert self._stack + assert self._stack, str(self.pageno) tag = self._stack.pop(-1) - self.outfp.write('' % enc(tag.name)) + out_s = '' % utils.enc(tag.name) + self.outfp.write(utils.make_compat_bytes(out_s)) return def do_tag(self, tag, props=None): diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py index 66b575ae..5fb9cce4 100644 --- a/pdfminer/pdfdocument.py +++ b/pdfminer/pdfdocument.py @@ -1,7 +1,9 @@ -#!/usr/bin/env python + import re import struct import logging + +import six # Python 2+3 compatibility try: import hashlib as md5 except ImportError: @@ -17,7 +19,7 @@ from .psparser import literal_name from .psparser import LIT from .psparser import KWD -from .psparser import STRICT +from . import settings from .pdftypes import PDFException from .pdftypes import PDFTypeError from .pdftypes import PDFStream @@ -35,6 +37,8 @@ from .utils import decode_text +log = logging.getLogger(__name__) + ## Exceptions ## class PDFNoValidXRef(PDFSyntaxError): @@ -65,8 +69,6 @@ class PDFTextExtractionNotAllowed(PDFEncryptionError): ## class PDFBaseXRef(object): - debug = False - def get_trailer(self): raise NotImplementedError @@ -93,7 +95,7 @@ def __repr__(self): return '' % (self.offsets.keys()) def load(self, parser): - while 1: + while True: try: (pos, line) = parser.nextline() if not line.strip(): @@ -109,10 +111,13 @@ def load(self, parser): if len(f) != 2: raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) try: - (start, nobjs) = map(long, f) + if six.PY2: + (start, nobjs) = map(long, f) + else: + (start, nobjs) = map(int, f) except ValueError: raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line)) - for objid in xrange(start, start+nobjs): + for objid in range(start, start+nobjs): try: (_, line) = parser.nextline() except PSEOF: @@ -123,17 +128,15 @@ def load(self, parser): (pos, genno, use) = f if use != b'n': continue - self.offsets[objid] = (None, long(pos), int(genno)) - if self.debug: logging.info('xref objects: %r' % self.offsets) + self.offsets[objid] = (None, long(pos) if six.PY2 else int(pos), int(genno)) + log.info('xref objects: %r', self.offsets) self.load_trailer(parser) return - KEYWORD_TRAILER = KWD('trailer') - def load_trailer(self, parser): try: (_, kwd) = parser.nexttoken() - assert kwd is self.KEYWORD_TRAILER + assert kwd is KWD(b'trailer'), str(kwd) (_, dic) = parser.nextobject() except PSEOF: x = parser.pop(1) @@ -141,13 +144,14 @@ def load_trailer(self, parser): raise PDFNoValidXRef('Unexpected EOF - file corrupted') (_, dic) = x[0] self.trailer.update(dict_value(dic)) + log.debug('trailer=%r', self.trailer) return def get_trailer(self): return self.trailer def get_objids(self): - return self.offsets.iterkeys() + return six.iterkeys(self.offsets) def get_pos(self, objid): try: @@ -175,8 +179,10 @@ def load(self, parser): if line.startswith(b'trailer'): parser.seek(pos) self.load_trailer(parser) - if self.debug: logging.info('trailer: %r' % self.get_trailer()) + log.info('trailer: %r', self.trailer) break + if six.PY3: + line=line.decode('latin-1') #default pdf encoding m = self.PDFOBJ_CUE.match(line) if not m: continue @@ -192,7 +198,7 @@ def load(self, parser): try: n = stream['N'] except KeyError: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 parser1 = PDFStreamParser(stream.get_data()) @@ -204,7 +210,7 @@ def load(self, parser): except PSEOF: pass n = min(n, len(objs)//2) - for index in xrange(n): + for index in range(n): objid1 = objs[index*2] self.offsets[objid1] = (objid, index, 0) return @@ -214,8 +220,6 @@ def load(self, parser): ## class PDFXRefStream(PDFBaseXRef): - debug = False - def __init__(self): self.data = None self.entlen = None @@ -242,10 +246,9 @@ def load(self, parser): self.data = stream.get_data() self.entlen = self.fl1+self.fl2+self.fl3 self.trailer = stream.attrs - if self.debug: - logging.info('xref stream: objid=%s, fields=%d,%d,%d' % - (', '.join(map(repr, self.ranges)), - self.fl1, self.fl2, self.fl3)) + log.info('xref stream: objid=%s, fields=%d,%d,%d', + ', '.join(map(repr, self.ranges)), + self.fl1, self.fl2, self.fl3) return def get_trailer(self): @@ -253,7 +256,7 @@ def get_trailer(self): def get_objids(self): for (start, nobjs) in self.ranges: - for i in xrange(nobjs): + for i in range(nobjs): offset = self.entlen * i ent = self.data[offset:offset+self.entlen] f1 = nunpack(ent[:self.fl1], 1) @@ -293,7 +296,7 @@ class PDFStandardSecurityHandler(object): b'..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz') supported_revisions = (2, 3) - def __init__(self, docid, param, password=b''): + def __init__(self, docid, param, password=''): self.docid = docid self.param = param self.password = password @@ -341,7 +344,7 @@ def compute_u(self, key): hash.update(self.docid[0]) # 3 result = ARC4.new(key).encrypt(hash.digest()) # 4 for i in range(1, 20): # 5 - k = b''.join(chr(ord(c) ^ i) for c in key) + k = b''.join(six.int2byte(c ^ i) for c in six.iterbytes(key)) result = ARC4.new(k).encrypt(result) result += result # 6 return result @@ -365,6 +368,7 @@ def compute_encryption_key(self, password): return result[:n] def authenticate(self, password): + password = password.encode("latin1") key = self.authenticate_user_password(password) if key is None: key = self.authenticate_owner_password(password) @@ -400,7 +404,7 @@ def authenticate_owner_password(self, password): else: user_password = self.o for i in range(19, -1, -1): - k = b''.join(chr(ord(c) ^ i) for c in key) + k = b''.join(six.int2byte(c ^ i) for c in six.iterbytes(key)) user_password = ARC4.new(k).decrypt(user_password) return self.authenticate_user_password(user_password) @@ -535,9 +539,7 @@ class PDFDocument(object): if SHA256 is not None: security_handler_registry[5] = PDFStandardSecurityHandlerV5 - debug = 0 - - def __init__(self, parser, password=b'', caching=True, fallback=True): + def __init__(self, parser, password='', caching=True, fallback=True): "Set the document to use a given PDFParser object." self.caching = caching self.xrefs = [] @@ -557,7 +559,7 @@ def __init__(self, parser, password=b'', caching=True, fallback=True): pos = self.find_xref(parser) self.read_xref_from(parser, pos, self.xrefs) except PDFNoValidXRef: - fallback = True + pass # fallback = True if fallback: parser.fallback = True xref = PDFXRefFallback() @@ -569,7 +571,7 @@ def __init__(self, parser, password=b'', caching=True, fallback=True): continue # If there's an encryption info, remember it. if 'Encrypt' in trailer: - #assert not self.encryption + #assert not self.encryption, str(self.encryption) self.encryption = (list_value(trailer['ID']), dict_value(trailer['Encrypt'])) self._initialize_password(password) @@ -582,13 +584,15 @@ def __init__(self, parser, password=b'', caching=True, fallback=True): else: raise PDFSyntaxError('No /Root object! - Is this really a PDF?') if self.catalog.get('Type') is not LITERAL_CATALOG: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('Catalog not found!') return + + KEYWORD_OBJ = KWD(b'obj') # _initialize_password(password=b'') # Perform the initialization with a given password. - def _initialize_password(self, password=b''): + def _initialize_password(self, password=''): (docid, param) = self.encryption if literal_name(param.get('Filter')) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) @@ -620,12 +624,12 @@ def _getobj_objstm(self, stream, index, objid): def _get_objects(self, stream): if stream.get('Type') is not LITERAL_OBJSTM: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('Not a stream object: %r' % stream) try: n = stream['N'] except KeyError: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 parser = PDFStreamParser(stream.get_data()) @@ -639,16 +643,28 @@ def _get_objects(self, stream): pass return (objs, n) - KEYWORD_OBJ = KWD('obj') - def _getobj_parse(self, pos, objid): self._parser.seek(pos) (_, objid1) = self._parser.nexttoken() # objid - if objid1 != objid: - raise PDFSyntaxError('objid mismatch: %r=%r' % (objid1, objid)) (_, genno) = self._parser.nexttoken() # genno (_, kwd) = self._parser.nexttoken() - if kwd is not self.KEYWORD_OBJ: + # #### hack around malformed pdf files + # copied from https://github.com/jaepil/pdfminer3k/blob/master/pdfminer/pdfparser.py#L399 + #to solve https://github.com/pdfminer/pdfminer.six/issues/56 + #assert objid1 == objid, str((objid1, objid)) + if objid1 != objid: + x = [] + while kwd is not self.KEYWORD_OBJ: + (_,kwd) = self._parser.nexttoken() + x.append(kwd) + if x: + objid1 = x[-2] + genno = x[-1] + # #### end hack around malformed pdf files + if objid1 != objid: + raise PDFSyntaxError('objid mismatch: %r=%r' % (objid1, objid)) + + if kwd != KWD(b'obj'): raise PDFSyntaxError('Invalid object spec: offset=%r' % pos) (_, obj) = self._parser.nextobject() return obj @@ -658,8 +674,7 @@ def getobj(self, objid): assert objid != 0 if not self.xrefs: raise PDFException('PDFDocument is not initialized') - if self.debug: - logging.debug('getobj: objid=%r' % objid) + log.debug('getobj: objid=%r', objid) if objid in self._cached_objs: (obj, genno) = self._cached_objs[objid] else: @@ -684,8 +699,7 @@ def getobj(self, objid): continue else: raise PDFObjectNotFound(objid) - if self.debug: - logging.debug('register: objid=%r: %r' % (objid, obj)) + log.debug('register: objid=%r: %r', objid, obj) if self.caching: self._cached_objs[objid] = (obj, genno) return obj @@ -758,17 +772,15 @@ def find_xref(self, parser): prev = None for line in parser.revreadlines(): line = line.strip() - if self.debug: - logging.debug('find_xref: %r' % line) + log.debug('find_xref: %r', line) if line == b'startxref': break if line: prev = line else: raise PDFNoValidXRef('Unexpected EOF') - if self.debug: - logging.info('xref found: pos=%r' % prev) - return long(prev) + log.info('xref found: pos=%r', prev) + return long(prev) if six.PY2 else int(prev) # read xref table def read_xref_from(self, parser, start, xrefs): @@ -779,8 +791,7 @@ def read_xref_from(self, parser, start, xrefs): (pos, token) = parser.nexttoken() except PSEOF: raise PDFNoValidXRef('Unexpected EOF') - if self.debug: - logging.info('read_xref_from: start=%d, token=%r' % (start, token)) + log.info('read_xref_from: start=%d, token=%r', start, token) if isinstance(token, int): # XRefStream: PDF-1.5 parser.seek(pos) @@ -794,8 +805,7 @@ def read_xref_from(self, parser, start, xrefs): xref.load(parser) xrefs.append(xref) trailer = xref.get_trailer() - if self.debug: - logging.info('trailer: %r' % trailer) + log.info('trailer: %r', trailer) if 'XRefStm' in trailer: pos = int_value(trailer['XRefStm']) self.read_xref_from(parser, pos, xrefs) diff --git a/pdfminer/pdffont.py b/pdfminer/pdffont.py index 6f8c6191..2a1c5f32 100644 --- a/pdfminer/pdffont.py +++ b/pdfminer/pdffont.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + import sys import struct from io import BytesIO @@ -12,7 +12,7 @@ from .psparser import PSEOF from .psparser import LIT from .psparser import KWD -from .psparser import STRICT +from . import settings from .psparser import PSLiteral from .psparser import literal_name from .pdftypes import PDFException @@ -28,6 +28,8 @@ from .utils import choplist from .utils import isnumber +import six #Python 2+3 compatibility + def get_widths(seq): widths = {} @@ -43,7 +45,7 @@ def get_widths(seq): r.append(v) if len(r) == 3: (char1, char2, w) = r - for i in xrange(char1, char2+1): + for i in range(char1, char2+1): widths[i] = w r = [] return widths @@ -66,7 +68,7 @@ def get_widths2(seq): r.append(v) if len(r) == 5: (char1, char2, w, vx, vy) = r - for i in xrange(char1, char2+1): + for i in range(char1, char2+1): widths[i] = (w, (vx, vy)) r = [] return widths @@ -264,7 +266,7 @@ def __init__(self, fp): self.fp = fp self.offsets = [] (count, offsize) = struct.unpack('>HB', self.fp.read(3)) - for i in xrange(count+1): + for i in range(count+1): self.offsets.append(nunpack(self.fp.read(offsize))) self.base = self.fp.tell()-1 self.fp.seek(self.base+self.offsets[-1]) @@ -281,7 +283,7 @@ def __getitem__(self, i): return self.fp.read(self.offsets[i+1]-self.offsets[i]) def __iter__(self): - return iter(self[i] for i in xrange(len(self))) + return iter(self[i] for i in range(len(self))) def __init__(self, name, fp): self.name = name @@ -321,9 +323,9 @@ def __init__(self, name, fp): # Format 1 (n,) = struct.unpack('B', self.fp.read(1)) code = 0 - for i in xrange(n): + for i in range(n): (first, nleft) = struct.unpack('BB', self.fp.read(2)) - for gid in xrange(first, first+nleft+1): + for gid in range(first, first+nleft+1): self.code2gid[code] = gid self.gid2code[gid] = code code += 1 @@ -346,16 +348,16 @@ def __init__(self, name, fp): # Format 1 (n,) = struct.unpack('B', self.fp.read(1)) sid = 0 - for i in xrange(n): + for i in range(n): (first, nleft) = struct.unpack('BB', self.fp.read(2)) - for gid in xrange(first, first+nleft+1): + for gid in range(first, first+nleft+1): name = self.getstr(sid) self.name2gid[name] = gid self.gid2name[gid] = name sid += 1 elif format == b'\x02': # Format 2 - assert 0 + assert False, str(('Unhandled', format)) else: raise ValueError('unsupported charset format: %r' % format) #print self.code2gid @@ -382,7 +384,7 @@ def __init__(self, name, fp): self.tables = {} self.fonttype = fp.read(4) (ntables, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8)) - for _ in xrange(ntables): + for _ in range(ntables): (name, tsum, offset, length) = struct.unpack('>4sLLL', fp.read(16)) self.tables[name] = (offset, length) return @@ -395,7 +397,7 @@ def create_unicode_map(self): fp.seek(base_offset) (version, nsubtables) = struct.unpack('>HH', fp.read(4)) subtables = [] - for i in xrange(nsubtables): + for i in range(nsubtables): subtables.append(struct.unpack('>HHL', fp.read(8))) char2gid = {} # Only supports subtable type 0, 2 and 4. @@ -411,7 +413,7 @@ def create_unicode_map(self): firstbytes[k//8] = i nhdrs = max(subheaderkeys)//8 + 1 hdrs = [] - for i in xrange(nhdrs): + for i in range(nhdrs): (firstcode, entcount, delta, offset) = struct.unpack('>HHhH', fp.read(8)) hdrs.append((i, firstcode, entcount, delta, fp.tell()-2+offset)) for (i, firstcode, entcount, delta, pos) in hdrs: @@ -419,7 +421,7 @@ def create_unicode_map(self): continue first = firstcode + (firstbytes[i] << 8) fp.seek(pos) - for c in xrange(entcount): + for c in range(entcount): gid = struct.unpack('>H', fp.read(2)) if gid: gid += delta @@ -436,13 +438,13 @@ def create_unicode_map(self): for (ec, sc, idd, idr) in zip(ecs, scs, idds, idrs): if idr: fp.seek(pos+idr) - for c in xrange(sc, ec+1): + for c in range(sc, ec+1): char2gid[c] = (struct.unpack('>H', fp.read(2))[0] + idd) & 0xffff else: - for c in xrange(sc, ec+1): + for c in range(sc, ec+1): char2gid[c] = (c + idd) & 0xffff else: - assert 0 + assert False, str(('Unhandled', fmttype)) # create unicode map unicode_map = FileUnicodeMap() for (char, gid) in char2gid.iteritems(): @@ -492,7 +494,7 @@ def is_multibyte(self): return False def decode(self, bytes): - return map(ord, bytes) + return bytearray(bytes) # map(ord, bytes) def get_ascent(self): return self.ascent * self.vscale @@ -541,7 +543,7 @@ def __init__(self, descriptor, widths, spec): encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING)) - diff = list_value(encoding.get('Differences', None)) + diff = list_value(encoding.get('Differences', [])) self.cid2unicode = EncodingDB.get_encoding(name, diff) else: self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding)) @@ -572,7 +574,7 @@ def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: - if STRICT: + if settings.STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: @@ -630,32 +632,32 @@ def __repr__(self): # PDFCIDFont class PDFCIDFont(PDFFont): - def __init__(self, rsrcmgr, spec): + def __init__(self, rsrcmgr, spec, strict=settings.STRICT): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: - if STRICT: + if strict: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) - self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'), - self.cidsysteminfo.get('Ordering', 'unknown')) + self.cidcoding = '%s-%s' % (resolve1(self.cidsysteminfo.get('Registry', b'unknown')).decode("latin1"), + resolve1(self.cidsysteminfo.get('Ordering', b'unknown')).decode("latin1")) try: name = literal_name(spec['Encoding']) except KeyError: - if STRICT: + if strict: raise PDFFontError('Encoding is unspecified') name = 'unknown' try: self.cmap = CMapDB.get_cmap(name) except CMapDB.CMapNotFound as e: - if STRICT: + if strict: raise PDFFontError(e) self.cmap = CMap() try: descriptor = dict_value(spec['FontDescriptor']) except KeyError: - if STRICT: + if strict: raise PDFFontError('FontDescriptor is missing') descriptor = {} ttf = None @@ -684,10 +686,10 @@ def __init__(self, rsrcmgr, spec): if self.vertical: # writing mode: vertical widths = get_widths2(list_value(spec.get('W2', []))) - self.disps = dict((cid, (vx, vy)) for (cid, (_, (vx, vy))) in widths.iteritems()) + self.disps = dict((cid, (vx, vy)) for (cid, (_, (vx, vy))) in six.iteritems(widths)) (vy, w) = spec.get('DW2', [880, -1000]) self.default_disp = (None, vy) - widths = dict((cid, w) for (cid, (w, _)) in widths.iteritems()) + widths = dict((cid, w) for (cid, (w, _)) in six.iteritems(widths)) default_width = w else: # writing mode: horizontal @@ -726,7 +728,7 @@ def to_unichr(self, cid): # main def main(argv): for fname in argv[1:]: - fp = file(fname, 'rb') + fp = open(fname, 'rb') #font = TrueTypeFont(fname, fp) font = CFFFont(fname, fp) print (font) diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 3f3f3934..8aa47b4d 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + import re import logging from io import BytesIO @@ -12,7 +12,7 @@ from .psparser import PSStackParser from .psparser import LIT from .psparser import KWD -from .psparser import STRICT +from . import settings from .pdftypes import PDFException from .pdftypes import PDFStream from .pdftypes import PDFObjRef @@ -31,6 +31,9 @@ from .utils import mult_matrix from .utils import MATRIX_IDENTITY +import six # Python 2+3 compatibility + +log = logging.getLogger(__name__) ## Exceptions ## @@ -40,7 +43,6 @@ class PDFResourceError(PDFException): class PDFInterpreterError(PDFException): pass - ## Constants ## LITERAL_PDF = LIT('PDF') @@ -49,7 +51,6 @@ class PDFInterpreterError(PDFException): LITERAL_FORM = LIT('Form') LITERAL_IMAGE = LIT('Image') - ## PDFTextState ## class PDFTextState(object): @@ -108,6 +109,14 @@ def __init__(self): self.dash = None self.intent = None self.flatness = None + + self.color = None + + # stroking color + self.scolor = None + + # non stroking color + self.ncolor = None return def copy(self): @@ -119,13 +128,18 @@ def copy(self): obj.dash = self.dash obj.intent = self.intent obj.flatness = self.flatness + obj.color = self.color + obj.scolor = self.scolor + obj.ncolor = self.ncolor return obj def __repr__(self): return ('' % + ' miterlimit=%r, dash=%r, intent=%r, flatness=%r, ' + ' stroking color=%r, non stroking color=%r>' % (self.linewidth, self.linecap, self.linejoin, - self.miterlimit, self.dash, self.intent, self.flatness)) + self.miterlimit, self.dash, self.intent, self.flatness, + self.scolor, self.ncolor)) ## Resource Manager @@ -139,8 +153,6 @@ class PDFResourceManager(object): allocated multiple times. """ - debug = False - def __init__(self, caching=True): self.caching = caching self._cached_fonts = {} @@ -169,16 +181,15 @@ def get_font(self, objid, spec): if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: - if self.debug: - logging.info('get_font: create: objid=%r, spec=%r' % (objid, spec)) - if STRICT: + log.info('get_font: create: objid=%r, spec=%r', objid, spec) + if settings.STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') # Create a Font object. if 'Subtype' in spec: subtype = literal_name(spec['Subtype']) else: - if STRICT: + if settings.STRICT: raise PDFFontError('Font Subtype is not specified.') subtype = 'Type1' if subtype in ('Type1', 'MMType1'): @@ -203,7 +214,7 @@ def get_font(self, objid, spec): subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: - if STRICT: + if settings.STRICT: raise PDFFontError('Invalid Font spec: %r' % spec) font = PDFType1Font(self, spec) # this is so wrong! if objid and self.caching: @@ -237,8 +248,13 @@ def seek(self, pos): return def fillbuf(self): - if self.charpos < len(self.buf): - return + if isinstance(self.charpos, tuple): #somtimes charpos is tuple instead of int. if tuple, get correct pos data from tuple + if self.charpos[1] < len(self.buf): + return + else: + if self.charpos < len(self.buf): + return + #if self.charpos < len(self.buf): while 1: self.fillfp() self.bufpos = self.fp.tell() @@ -256,12 +272,13 @@ def get_inline_data(self, pos, target=b'EI'): while i <= len(target): self.fillbuf() if i: - c = self.buf[self.charpos] + c = six.indexbytes(self.buf,self.charpos) + c=six.int2byte(c) data += c self.charpos += 1 if len(target) <= i and c.isspace(): i += 1 - elif i < len(target) and c == target[i]: + elif i < len(target) and c == (six.int2byte(target[i]) if six.PY3 else target[i]): i += 1 else: i = 0 @@ -302,7 +319,7 @@ def do_keyword(self, pos, token): self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: - if STRICT: + if settings.STRICT: raise else: self.push((pos, token)) @@ -313,8 +330,6 @@ def do_keyword(self, pos, token): ## class PDFPageInterpreter(object): - debug = 0 - def __init__(self, rsrcmgr, device): self.rsrcmgr = rsrcmgr self.device = device @@ -344,23 +359,22 @@ def get_colorspace(spec): return PDFColorSpace(name, len(list_value(spec[1]))) else: return PREDEFINED_COLORSPACE.get(name) - for (k, v) in dict_value(resources).iteritems(): - if self.debug: - logging.debug('Resource: %r: %r' % (k, v)) + for (k, v) in six.iteritems(dict_value(resources)): + log.debug('Resource: %r: %r', k, v) if k == 'Font': - for (fontid, spec) in dict_value(v).iteritems(): + for (fontid, spec) in six.iteritems(dict_value(v)): objid = None if isinstance(spec, PDFObjRef): objid = spec.objid spec = dict_value(spec) self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec) elif k == 'ColorSpace': - for (csid, spec) in dict_value(v).iteritems(): + for (csid, spec) in six.iteritems(dict_value(v)): self.csmap[csid] = get_colorspace(resolve1(spec)) elif k == 'ProcSet': self.rsrcmgr.get_procset(list_value(v)) elif k == 'XObject': - for (xobjid, xobjstrm) in dict_value(v).iteritems(): + for (xobjid, xobjstrm) in six.iteritems(dict_value(v)): self.xobjmap[xobjid] = xobjstrm return @@ -379,7 +393,7 @@ def init_state(self, ctm): # set some global states. self.scs = self.ncs = None if self.csmap: - self.scs = self.ncs = self.csmap.values()[0] + self.scs = self.ncs = six.next(six.itervalues(self.csmap)) return def push(self, obj): @@ -565,7 +579,7 @@ def do_CS(self, name): try: self.scs = self.csmap[literal_name(name)] except KeyError: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Undefined ColorSpace: %r' % name) return @@ -574,37 +588,45 @@ def do_cs(self, name): try: self.ncs = self.csmap[literal_name(name)] except KeyError: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Undefined ColorSpace: %r' % name) return # setgray-stroking def do_G(self, gray): + self.graphicstate.color = gray + self.graphicstate.scolor = gray #self.do_CS(LITERAL_DEVICE_GRAY) return # setgray-non-stroking def do_g(self, gray): + self.graphicstate.color = gray + self.graphicstate.ncolor = gray #self.do_cs(LITERAL_DEVICE_GRAY) return # setrgb-stroking def do_RG(self, r, g, b): + self.graphicstate.color = (r, g, b) #self.do_CS(LITERAL_DEVICE_RGB) return # setrgb-non-stroking def do_rg(self, r, g, b): + self.graphicstate.color = (r, g, b) #self.do_cs(LITERAL_DEVICE_RGB) return # setcmyk-stroking def do_K(self, c, m, y, k): + self.graphicstate.color = (c, m, y, k) #self.do_CS(LITERAL_DEVICE_CMYK) return # setcmyk-non-stroking def do_k(self, c, m, y, k): + self.graphicstate.color = (c, m, y, k) #self.do_cs(LITERAL_DEVICE_CMYK) return @@ -613,20 +635,20 @@ def do_SCN(self): if self.scs: n = self.scs.ncomponents else: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('No colorspace specified!') n = 1 - self.pop(n) + self.graphicstate.scolor = self.pop(n) return def do_scn(self): if self.ncs: n = self.ncs.ncomponents else: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('No colorspace specified!') n = 1 - self.pop(n) + self.graphicstate.ncolor = self.pop(n) return def do_SC(self): @@ -704,7 +726,7 @@ def do_Tf(self, fontid, fontsize): try: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Undefined Font id: %r' % fontid) self.textstate.font = self.rsrcmgr.get_font(None, {}) self.textstate.fontsize = fontsize @@ -754,10 +776,10 @@ def do_T_a(self): def do_TJ(self, seq): #print >>sys.stderr, 'TJ(%r): %r' % (seq, self.textstate) if self.textstate.font is None: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('No font specified!') return - self.device.render_string(self.textstate, seq) + self.device.render_string(self.textstate, seq, self.graphicstate.copy()) return # show @@ -799,10 +821,10 @@ def do_Do(self, xobjid): try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return - if self.debug: logging.info('Processing xobj: %r' % xobj) + log.info('Processing xobj: %r', xobj) subtype = xobj.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj: interpreter = self.dup() @@ -811,7 +833,8 @@ def do_Do(self, xobjid): # According to PDF reference 1.7 section 4.9.1, XObjects in # earlier PDFs (prior to v1.2) use the page's Resources entry # instead of having their own Resources entry. - resources = dict_value(xobj.get('Resources')) or self.resources.copy() + xobjres = xobj.get('Resources') + resources = dict_value(xobjres) if xobjres else self.resources.copy() self.device.begin_figure(xobjid, bbox, matrix) interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm)) self.device.end_figure(xobjid) @@ -825,7 +848,7 @@ def do_Do(self, xobjid): return def process_page(self, page): - if self.debug: logging.info('Processing page: %r' % page) + log.info('Processing page: %r', page) (x0, y0, x1, y1) = page.mediabox if page.rotate == 90: ctm = (0, -1, 1, 0, -y0, x1) @@ -844,9 +867,8 @@ def process_page(self, page): # Render the content streams. # This method may be called recursively. def render_contents(self, resources, streams, ctm=MATRIX_IDENTITY): - if self.debug: - logging.info('render_contents: resources=%r, streams=%r, ctm=%r' % - (resources, streams, ctm)) + log.info('render_contents: resources=%r, streams=%r, ctm=%r', + resources, streams, ctm) self.init_resources(resources) self.init_state(ctm) self.execute(list_value(streams)) @@ -868,19 +890,17 @@ def execute(self, streams): method = 'do_%s' % name.replace('*', '_a').replace('"', '_w').replace("'", '_q') if hasattr(self, method): func = getattr(self, method) - nargs = func.func_code.co_argcount-1 + nargs = six.get_function_code(func).co_argcount-1 if nargs: args = self.pop(nargs) - if self.debug: - logging.debug('exec: %s %r' % (name, args)) + log.debug('exec: %s %r', name, args) if len(args) == nargs: func(*args) else: - if self.debug: - logging.debug('exec: %s' % name) + log.debug('exec: %s', name) func() else: - if STRICT: + if settings.STRICT: raise PDFInterpreterError('Unknown operator: %r' % name) else: self.push(obj) diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index a48767c6..418aeb24 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -1,5 +1,6 @@ -#!/usr/bin/env python + import logging +from . import settings from .psparser import LIT from .pdftypes import PDFObjectNotFound from .pdftypes import resolve1 @@ -10,11 +11,14 @@ from .pdfdocument import PDFDocument from .pdfdocument import PDFTextExtractionNotAllowed +import six # Python 2+3 compatibility + +log = logging.getLogger(__name__) + # some predefined literals and keywords. LITERAL_PAGE = LIT('Page') LITERAL_PAGES = LIT('Pages') - ## PDFPage ## class PDFPage(object): @@ -39,8 +43,6 @@ class PDFPage(object): beads: a chain that represents natural reading order. """ - debug = False - def __init__(self, doc, pageid, attrs): """Initialize a page object. @@ -84,16 +86,21 @@ def search(obj, parent): else: objid = obj.objid tree = dict_value(obj).copy() - for (k, v) in parent.iteritems(): + for (k, v) in six.iteritems(parent): if k in klass.INHERITABLE_ATTRS and k not in tree: tree[k] = v - if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree: - if klass.debug: logging.info('Pages: Kids=%r' % tree['Kids']) + + tree_type = tree.get('Type') + if tree_type is None and not settings.STRICT: # See #64 + tree_type = tree.get('type') + + if tree_type is LITERAL_PAGES and 'Kids' in tree: + log.info('Pages: Kids=%r', tree['Kids']) for c in list_value(tree['Kids']): for x in search(c, tree): yield x - elif tree.get('Type') is LITERAL_PAGE: - if klass.debug: logging.info('Page: %r' % tree) + elif tree_type is LITERAL_PAGE: + log.info('Page: %r', tree) yield (objid, tree) pages = False if 'Pages' in document.catalog: @@ -114,7 +121,7 @@ def search(obj, parent): @classmethod def get_pages(klass, fp, - pagenos=None, maxpages=0, password=b'', + pagenos=None, maxpages=0, password='', caching=True, check_extractable=True): # Create a PDF parser object associated with the file object. parser = PDFParser(fp) diff --git a/pdfminer/pdfparser.py b/pdfminer/pdfparser.py index 61eb1dc8..1dc17d7b 100644 --- a/pdfminer/pdfparser.py +++ b/pdfminer/pdfparser.py @@ -1,17 +1,19 @@ -#!/usr/bin/env python + import logging from io import BytesIO from .psparser import PSStackParser from .psparser import PSSyntaxError from .psparser import PSEOF from .psparser import KWD -from .psparser import STRICT +from . import settings from .pdftypes import PDFException from .pdftypes import PDFStream from .pdftypes import PDFObjRef from .pdftypes import int_value from .pdftypes import dict_value +log = logging.getLogger(__name__) + ## Exceptions ## @@ -89,13 +91,13 @@ def do_keyword(self, pos, token): try: objlen = int_value(dic['Length']) except KeyError: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('/Length is undefined: %r' % dic) self.seek(pos) try: (_, line) = self.nextline() # 'stream' except PSEOF: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('Unexpected EOF') return pos += len(line) @@ -106,7 +108,7 @@ def do_keyword(self, pos, token): try: (linepos, line) = self.nextline() except PSEOF: - if STRICT: + if settings.STRICT: raise PDFSyntaxError('Unexpected EOF') break if b'endstream' in line: @@ -120,9 +122,7 @@ def do_keyword(self, pos, token): data += line self.seek(pos+objlen) # XXX limit objlen not to exceed object boundary - if self.debug: - logging.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \ - (pos, objlen, dic, data[:10])) + log.debug('Stream: pos=%d, objlen=%d, dic=%r, data=%r...', pos, objlen, dic, data[:10]) obj = PDFStream(dic, data, self.doc.decipher) self.push((pos, obj)) @@ -166,7 +166,7 @@ def do_keyword(self, pos, token): pass return elif token in (self.KEYWORD_OBJ, self.KEYWORD_ENDOBJ): - if STRICT: + if settings.STRICT: # See PDF Spec 3.4.6: Only the object values are stored in the # stream; the obj and endobj keywords are not used. raise PDFSyntaxError('Keyword endobj found in stream') diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py index 20d981dd..40cca46b 100644 --- a/pdfminer/pdftypes.py +++ b/pdfminer/pdftypes.py @@ -1,5 +1,6 @@ -#!/usr/bin/env python + import zlib +import logging from .lzw import lzwdecode from .ascii85 import ascii85decode from .ascii85 import asciihexdecode @@ -8,10 +9,13 @@ from .psparser import PSException from .psparser import PSObject from .psparser import LIT -from .psparser import STRICT +from . import settings from .utils import apply_png_predictor from .utils import isnumber +import six #Python 2+3 compatibility + +log = logging.getLogger(__name__) LITERAL_CRYPT = LIT('Crypt') @@ -52,7 +56,7 @@ class PDFObjRef(PDFObject): def __init__(self, doc, objid, _): if objid == 0: - if STRICT: + if settings.STRICT: raise PDFValueError('PDF object id cannot be 0.') self.doc = doc self.objid = objid @@ -100,21 +104,21 @@ def resolve_all(x, default=None): def decipher_all(decipher, objid, genno, x): """Recursively deciphers the given object. """ - if isinstance(x, str): + if isinstance(x, bytes): return decipher(objid, genno, x) if isinstance(x, list): x = [decipher_all(decipher, objid, genno, v) for v in x] elif isinstance(x, dict): - for (k, v) in x.iteritems(): + for (k, v) in six.iteritems(x): x[k] = decipher_all(decipher, objid, genno, v) return x -# Type checking +# Type cheking def int_value(x): x = resolve1(x) if not isinstance(x, int): - if STRICT: + if settings.STRICT: raise PDFTypeError('Integer required: %r' % x) return 0 return x @@ -123,7 +127,7 @@ def int_value(x): def float_value(x): x = resolve1(x) if not isinstance(x, float): - if STRICT: + if settings.STRICT: raise PDFTypeError('Float required: %r' % x) return 0.0 return x @@ -132,7 +136,7 @@ def float_value(x): def num_value(x): x = resolve1(x) if not isnumber(x): - if STRICT: + if settings.STRICT: raise PDFTypeError('Int or Float required: %r' % x) return 0 return x @@ -140,8 +144,8 @@ def num_value(x): def str_value(x): x = resolve1(x) - if not isinstance(x, str): - if STRICT: + if not isinstance(x, six.binary_type): + if settings.STRICT: raise PDFTypeError('String required: %r' % x) return '' return x @@ -150,7 +154,7 @@ def str_value(x): def list_value(x): x = resolve1(x) if not isinstance(x, (list, tuple)): - if STRICT: + if settings.STRICT: raise PDFTypeError('List required: %r' % x) return [] return x @@ -159,7 +163,8 @@ def list_value(x): def dict_value(x): x = resolve1(x) if not isinstance(x, dict): - if STRICT: + if settings.STRICT: + log.error('PDFTypeError : Dict required: %r', x) raise PDFTypeError('Dict required: %r' % x) return {} return x @@ -168,9 +173,9 @@ def dict_value(x): def stream_value(x): x = resolve1(x) if not isinstance(x, PDFStream): - if STRICT: + if settings.STRICT: raise PDFTypeError('PDFStream required: %r' % x) - return PDFStream({}, '') + return PDFStream({}, b'') return x @@ -179,7 +184,7 @@ def stream_value(x): class PDFStream(PDFObject): def __init__(self, attrs, rawdata, decipher=None): - assert isinstance(attrs, dict) + assert isinstance(attrs, dict), str(type(attrs)) self.attrs = attrs self.rawdata = rawdata self.decipher = decipher @@ -225,13 +230,19 @@ def get_filters(self): filters = [filters] if not isinstance(params, list): # Make sure the parameters list is the same as filters. - params = [params]*len(filters) - if STRICT and len(params) != len(filters): + params = [params] * len(filters) + if settings.STRICT and len(params) != len(filters): raise PDFException("Parameters len filter mismatch") - return zip(filters, params) + # resolve filter if possible + _filters = [] + for fltr in filters: + if hasattr(fltr, 'resolve'): + fltr = fltr.resolve()[0] + _filters.append(fltr) + return list(zip(_filters, params)) #solves https://github.com/pdfminer/pdfminer.six/issues/15 def decode(self): - assert self.data is None and self.rawdata is not None + assert self.data is None and self.rawdata is not None, str((self.data, self.rawdata)) data = self.rawdata if self.decipher: # Handle encryption @@ -247,7 +258,7 @@ def decode(self): try: data = zlib.decompress(data) except zlib.error as e: - if STRICT: + if settings.STRICT: raise PDFException('Invalid zlib bytes: %r, %r' % (e, data)) data = b'' elif f in LITERALS_LZW_DECODE: @@ -270,7 +281,7 @@ def decode(self): else: raise PDFNotImplementedError('Unsupported filter: %r' % f) # apply predictors - if 'Predictor' in params: + if params and 'Predictor' in params: pred = int_value(params['Predictor']) if pred == 1: # no predictor diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index 7270b454..9b214af0 100644 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -1,9 +1,16 @@ #!/usr/bin/env python + +# -*- coding: utf-8 -*- + import re import logging + +import six # Python 2+3 compatibility + +from . import settings from .utils import choplist -STRICT = 0 +log = logging.getLogger(__name__) ## PS Exceptions @@ -57,10 +64,10 @@ class PSLiteral(PSObject): def __init__(self, name): self.name = name - return def __repr__(self): - return '/%r' % self.name + name=self.name + return '/%r' % name ## PSKeyword @@ -82,7 +89,8 @@ def __init__(self, name): return def __repr__(self): - return self.name + name=self.name + return '/%r' % name ## PSSymbolTable @@ -121,20 +129,30 @@ def intern(self, name): def literal_name(x): if not isinstance(x, PSLiteral): - if STRICT: + if settings.STRICT: raise PSTypeError('Literal required: %r' % (x,)) else: - return str(x) - return x.name - + name=x + else: + name=x.name + if six.PY3: + try: + name = str(name,'utf-8') + except: + pass + return name def keyword_name(x): if not isinstance(x, PSKeyword): - if STRICT: - raise PSTypeError('Keyword required: %r' % (x,)) + if settings.STRICT: + raise PSTypeError('Keyword required: %r' % x) else: - return str(x) - return x.name + name=x + else: + name=x.name + if six.PY3: + name = str(name,'utf-8','ignore') + return name ## PSBaseParser @@ -159,8 +177,6 @@ class PSBaseParser(object): """ BUFSIZ = 4096 - debug = 0 - def __init__(self, fp): self.fp = fp self.seek(0) @@ -184,15 +200,14 @@ def poll(self, pos=None, n=80): if not pos: pos = self.bufpos+self.charpos self.fp.seek(pos) - logging.info('poll(%d): %r' % (pos, self.fp.read(n))) + log.info('poll(%d): %r', pos, self.fp.read(n)) self.fp.seek(pos0) return def seek(self, pos): """Seeks the parser to the given position. """ - if self.debug: - logging.debug('seek: %r' % pos) + log.debug('seek: %r', pos) self.fp.seek(pos) # reset the status for nextline() self.bufpos = pos @@ -225,7 +240,7 @@ def nextline(self): while 1: self.fillbuf() if eol: - c = self.buf[self.charpos] + c = self.buf[self.charpos:self.charpos+1] # handle b'\r\n' if c == b'\n': linebuf += c @@ -235,19 +250,19 @@ def nextline(self): if m: linebuf += self.buf[self.charpos:m.end(0)] self.charpos = m.end(0) - if linebuf[-1] == b'\r': + if linebuf[-1:] == b'\r': eol = True else: break else: linebuf += self.buf[self.charpos:] self.charpos = len(self.buf) - if self.debug: - logging.debug('nextline: %r, %r' % (linepos, linebuf)) + log.debug('nextline: %r, %r', linepos, linebuf) + return (linepos, linebuf) def revreadlines(self): - """Fetches a next line backward. + """Fetches a next line backword. This is used to locate the trailers at the end of a file. """ @@ -266,7 +281,7 @@ def revreadlines(self): if n == -1: buf = s + buf break - yield s[n:]+buf + yield s[n:] + buf s = s[:n] buf = b'' return @@ -276,7 +291,7 @@ def _parse_main(self, s, i): if not m: return len(s) j = m.start(0) - c = s[j] + c = s[j:j+1] self._curtokenpos = self.bufpos+j if c == b'%': self._curtoken = b'%' @@ -338,33 +353,26 @@ def _parse_literal(self, s, i): return len(s) j = m.start(0) self._curtoken += s[i:j] - c = s[j] + c = s[j:j+1] if c == b'#': self.hex = b'' self._parse1 = self._parse_literal_hex return j+1 - try: - # Try to interpret the token as a utf-8 string - utoken = self._curtoken.decode('utf-8') - except UnicodeDecodeError: - # We failed, there is possibly a corrupt PDF here. - if STRICT: raise - utoken = "" - self._add_token(LIT(utoken)) + self._curtoken=str(self._curtoken,'utf-8') + except: + pass + self._add_token(LIT(self._curtoken)) self._parse1 = self._parse_main return j def _parse_literal_hex(self, s, i): - c = s[i] + c = s[i:i+1] if HEX.match(c) and len(self.hex) < 2: self.hex += c return i+1 if self.hex: - try: - self._curtoken += chr(int(self.hex, 16)) - except ValueError: - pass + self._curtoken += six.int2byte(int(self.hex, 16)) self._parse1 = self._parse_literal return i @@ -375,7 +383,7 @@ def _parse_number(self, s, i): return len(s) j = m.start(0) self._curtoken += s[i:j] - c = s[j] + c = s[j:j+1] if c == b'.': self._curtoken += c self._parse1 = self._parse_float @@ -425,7 +433,7 @@ def _parse_string(self, s, i): return len(s) j = m.start(0) self._curtoken += s[i:j] - c = s[j] + c = s[j:j+1] if c == b'\\': self.oct = b'' self._parse1 = self._parse_string_1 @@ -439,29 +447,26 @@ def _parse_string(self, s, i): if self.paren: # WTF, they said balanced parens need no special treatment. self._curtoken += c return j+1 - self._add_token(str(self._curtoken)) + self._add_token(self._curtoken) self._parse1 = self._parse_main return j+1 def _parse_string_1(self, s, i): - c = s[i] + c = s[i:i+1] if OCT_STRING.match(c) and len(self.oct) < 3: self.oct += c return i+1 if self.oct: - try: - self._curtoken += chr(int(self.oct, 8)) - except ValueError: - pass + self._curtoken += six.int2byte(int(self.oct, 8)) self._parse1 = self._parse_string return i if c in ESC_STRING: - self._curtoken += chr(ESC_STRING[c]) + self._curtoken += six.int2byte(ESC_STRING[c]) self._parse1 = self._parse_string return i+1 def _parse_wopen(self, s, i): - c = s[i] + c = s[i:i+1] if c == b'<': self._add_token(KEYWORD_DICT_BEGIN) self._parse1 = self._parse_main @@ -471,7 +476,7 @@ def _parse_wopen(self, s, i): return i def _parse_wclose(self, s, i): - c = s[i] + c = s[i:i+1] if c == b'>': self._add_token(KEYWORD_DICT_END) i += 1 @@ -485,12 +490,8 @@ def _parse_hexstring(self, s, i): return len(s) j = m.start(0) self._curtoken += s[i:j] - try: - token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), - SPC.sub(b'', self._curtoken)) - self._add_token(token) - except ValueError: - pass + token = HEX_PAIR.sub(lambda m: six.int2byte(int(m.group(0), 16)),SPC.sub(b'', self._curtoken)) + self._add_token(token) self._parse1 = self._parse_main return j @@ -499,8 +500,7 @@ def nexttoken(self): self.fillbuf() self.charpos = self._parse1(self.buf, self.charpos) token = self._tokens.pop(0) - if self.debug: - logging.debug('nexttoken: %r' % (token,)) + log.debug('nexttoken: %r', token) return token @@ -540,16 +540,17 @@ def popall(self): return objs def add_results(self, *objs): - if self.debug: - logging.debug('add_results: %r' % (objs,)) + try: + log.debug('add_results: %r', objs) + except: + log.debug('add_results: (unprintable object)') self.results.extend(objs) return def start_type(self, pos, type): self.context.append((pos, self.curtype, self.curstack)) (self.curtype, self.curstack) = (type, []) - if self.debug: - logging.debug('start_type: pos=%r, type=%r' % (pos, type)) + log.debug('start_type: pos=%r, type=%r', pos, type) return def end_type(self, type): @@ -557,8 +558,7 @@ def end_type(self, type): raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) objs = [obj for (_, obj) in self.curstack] (pos, self.curtype, self.curstack) = self.context.pop() - if self.debug: - logging.debug('end_type: pos=%r, type=%r, objs=%r' % (pos, type, objs)) + log.debug('end_type: pos=%r, type=%r, objs=%r', pos, type, objs) return (pos, objs) def do_keyword(self, pos, token): @@ -573,7 +573,7 @@ def nextobject(self): while not self.results: (pos, token) = self.nexttoken() #print (pos,token), (self.curtype, self.curstack) - if isinstance(token, (int, long, float, bool, str, PSLiteral)): + if isinstance(token, (six.integer_types, float, bool, six.string_types, six.binary_type, PSLiteral)): # normal token self.push((pos, token)) elif token == KEYWORD_ARRAY_BEGIN: @@ -584,7 +584,7 @@ def nextobject(self): try: self.push(self.end_type('a')) except PSTypeError: - if STRICT: + if settings.STRICT: raise elif token == KEYWORD_DICT_BEGIN: # begin dictionary @@ -594,12 +594,12 @@ def nextobject(self): try: (pos, objs) = self.end_type('d') if len(objs) % 2 != 0: - raise PSSyntaxError('Invalid dictionary construct: %r' % (objs,)) + raise PSSyntaxError('Invalid dictionary construct: %r' % objs) # construct a Python dictionary. d = dict((literal_name(k), v) for (k, v) in choplist(2, objs) if v is not None) self.push((pos, d)) except PSTypeError: - if STRICT: + if settings.STRICT: raise elif token == KEYWORD_PROC_BEGIN: # begin proc @@ -609,117 +609,22 @@ def nextobject(self): try: self.push(self.end_type('p')) except PSTypeError: - if STRICT: + if settings.STRICT: raise + elif isinstance(token,PSKeyword): + log.debug('do_keyword: pos=%r, token=%r, stack=%r', pos, token, self.curstack) + self.do_keyword(pos, token) else: - if self.debug: - logging.debug('do_keyword: pos=%r, token=%r, stack=%r' % \ - (pos, token, self.curstack)) + log.error('unknown token: pos=%r, token=%r, stack=%r', pos, token, self.curstack) self.do_keyword(pos, token) + raise if self.context: continue else: self.flush() obj = self.results.pop(0) - if self.debug: - logging.debug('nextobject: %r' % (obj,)) - return obj - - -import unittest - - -## Simplistic Test cases -## -class TestPSBaseParser(unittest.TestCase): - - TESTDATA = br'''%!PS -begin end - " @ # -/a/BCD /Some_Name /foo#5f#xbaa -0 +1 -2 .5 1.234 -(abc) () (abc ( def ) ghi) -(def\040\0\0404ghi) (bach\\slask) (foo\nbaa) -(this % is not a comment.) -(foo -baa) -(foo\ -baa) -<> <20> < 40 4020 > - -func/a/b{(c)do*}def -[ 1 (z) ! ] -<< /foo (bar) >> -''' - - TOKENS = [ - (5, KWD(b'begin')), (11, KWD(b'end')), (16, KWD(b'"')), (19, KWD(b'@')), - (21, KWD(b'#')), (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')), - (41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5), - (65, 1.234), (71, b'abc'), (77, b''), (80, b'abc ( def ) ghi'), - (98, b'def \x00 4ghi'), (118, b'bach\\slask'), (132, b'foo\nbaa'), - (143, b'this % is not a comment.'), (170, b'foo\nbaa'), (180, b'foobaa'), - (191, b''), (194, b' '), (199, b'@@ '), (211, b'\xab\xcd\x00\x124\x05'), - (226, KWD(b'func')), (230, LIT('a')), (232, LIT('b')), - (234, KWD(b'{')), (235, b'c'), (238, KWD(b'do*')), (241, KWD(b'}')), - (242, KWD(b'def')), (246, KWD(b'[')), (248, 1), (250, b'z'), (254, KWD(b'!')), - (256, KWD(b']')), (258, KWD(b'<<')), (261, LIT('foo')), (266, b'bar'), - (272, KWD(b'>>')) - ] - - OBJS = [ - (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')), - (41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5), - (65, 1.234), (71, 'abc'), (77, ''), (80, 'abc ( def ) ghi'), - (98, 'def \x00 4ghi'), (118, 'bach\\slask'), (132, 'foo\nbaa'), - (143, 'this % is not a comment.'), (170, 'foo\nbaa'), (180, 'foobaa'), - (191, ''), (194, ' '), (199, '@@ '), (211, '\xab\xcd\x00\x124\x05'), - (230, LIT('a')), (232, LIT('b')), (234, ['c']), (246, [1, 'z']), - (258, {'foo': 'bar'}), - ] - - def get_tokens(self, s): - from io import BytesIO - - class MyParser(PSBaseParser): - def flush(self): - self.add_results(*self.popall()) - parser = MyParser(BytesIO(s)) - r = [] - try: - while 1: - r.append(parser.nexttoken()) - except PSEOF: - pass - return r - - def get_objects(self, s): - from io import BytesIO - - class MyParser(PSStackParser): - def flush(self): - self.add_results(*self.popall()) - parser = MyParser(BytesIO(s)) - r = [] try: - while 1: - r.append(parser.nextobject()) - except PSEOF: - pass - return r - - def test_1(self): - tokens = self.get_tokens(self.TESTDATA) - print (tokens) - self.assertEqual(tokens, self.TOKENS) - return - - def test_2(self): - objs = self.get_objects(self.TESTDATA) - print (objs) - self.assertEqual(objs, self.OBJS) - return - -if __name__ == '__main__': - unittest.main() + log.debug('nextobject: %r', obj) + except: + log.debug('nextobject: (unprintable object)') + return obj diff --git a/pdfminer/rijndael.py b/pdfminer/rijndael.py index 756dd467..2d3a7ab2 100644 --- a/pdfminer/rijndael.py +++ b/pdfminer/rijndael.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + """ Python implementation of Rijndael encryption algorithm. @@ -25,673 +25,673 @@ def NROUNDS(keybits): return (keybits)//32+6 Te0 = [ - 0xc66363a5L, 0xf87c7c84L, 0xee777799L, 0xf67b7b8dL, - 0xfff2f20dL, 0xd66b6bbdL, 0xde6f6fb1L, 0x91c5c554L, - 0x60303050L, 0x02010103L, 0xce6767a9L, 0x562b2b7dL, - 0xe7fefe19L, 0xb5d7d762L, 0x4dababe6L, 0xec76769aL, - 0x8fcaca45L, 0x1f82829dL, 0x89c9c940L, 0xfa7d7d87L, - 0xeffafa15L, 0xb25959ebL, 0x8e4747c9L, 0xfbf0f00bL, - 0x41adadecL, 0xb3d4d467L, 0x5fa2a2fdL, 0x45afafeaL, - 0x239c9cbfL, 0x53a4a4f7L, 0xe4727296L, 0x9bc0c05bL, - 0x75b7b7c2L, 0xe1fdfd1cL, 0x3d9393aeL, 0x4c26266aL, - 0x6c36365aL, 0x7e3f3f41L, 0xf5f7f702L, 0x83cccc4fL, - 0x6834345cL, 0x51a5a5f4L, 0xd1e5e534L, 0xf9f1f108L, - 0xe2717193L, 0xabd8d873L, 0x62313153L, 0x2a15153fL, - 0x0804040cL, 0x95c7c752L, 0x46232365L, 0x9dc3c35eL, - 0x30181828L, 0x379696a1L, 0x0a05050fL, 0x2f9a9ab5L, - 0x0e070709L, 0x24121236L, 0x1b80809bL, 0xdfe2e23dL, - 0xcdebeb26L, 0x4e272769L, 0x7fb2b2cdL, 0xea75759fL, - 0x1209091bL, 0x1d83839eL, 0x582c2c74L, 0x341a1a2eL, - 0x361b1b2dL, 0xdc6e6eb2L, 0xb45a5aeeL, 0x5ba0a0fbL, - 0xa45252f6L, 0x763b3b4dL, 0xb7d6d661L, 0x7db3b3ceL, - 0x5229297bL, 0xdde3e33eL, 0x5e2f2f71L, 0x13848497L, - 0xa65353f5L, 0xb9d1d168L, 0x00000000L, 0xc1eded2cL, - 0x40202060L, 0xe3fcfc1fL, 0x79b1b1c8L, 0xb65b5bedL, - 0xd46a6abeL, 0x8dcbcb46L, 0x67bebed9L, 0x7239394bL, - 0x944a4adeL, 0x984c4cd4L, 0xb05858e8L, 0x85cfcf4aL, - 0xbbd0d06bL, 0xc5efef2aL, 0x4faaaae5L, 0xedfbfb16L, - 0x864343c5L, 0x9a4d4dd7L, 0x66333355L, 0x11858594L, - 0x8a4545cfL, 0xe9f9f910L, 0x04020206L, 0xfe7f7f81L, - 0xa05050f0L, 0x783c3c44L, 0x259f9fbaL, 0x4ba8a8e3L, - 0xa25151f3L, 0x5da3a3feL, 0x804040c0L, 0x058f8f8aL, - 0x3f9292adL, 0x219d9dbcL, 0x70383848L, 0xf1f5f504L, - 0x63bcbcdfL, 0x77b6b6c1L, 0xafdada75L, 0x42212163L, - 0x20101030L, 0xe5ffff1aL, 0xfdf3f30eL, 0xbfd2d26dL, - 0x81cdcd4cL, 0x180c0c14L, 0x26131335L, 0xc3ecec2fL, - 0xbe5f5fe1L, 0x359797a2L, 0x884444ccL, 0x2e171739L, - 0x93c4c457L, 0x55a7a7f2L, 0xfc7e7e82L, 0x7a3d3d47L, - 0xc86464acL, 0xba5d5de7L, 0x3219192bL, 0xe6737395L, - 0xc06060a0L, 0x19818198L, 0x9e4f4fd1L, 0xa3dcdc7fL, - 0x44222266L, 0x542a2a7eL, 0x3b9090abL, 0x0b888883L, - 0x8c4646caL, 0xc7eeee29L, 0x6bb8b8d3L, 0x2814143cL, - 0xa7dede79L, 0xbc5e5ee2L, 0x160b0b1dL, 0xaddbdb76L, - 0xdbe0e03bL, 0x64323256L, 0x743a3a4eL, 0x140a0a1eL, - 0x924949dbL, 0x0c06060aL, 0x4824246cL, 0xb85c5ce4L, - 0x9fc2c25dL, 0xbdd3d36eL, 0x43acacefL, 0xc46262a6L, - 0x399191a8L, 0x319595a4L, 0xd3e4e437L, 0xf279798bL, - 0xd5e7e732L, 0x8bc8c843L, 0x6e373759L, 0xda6d6db7L, - 0x018d8d8cL, 0xb1d5d564L, 0x9c4e4ed2L, 0x49a9a9e0L, - 0xd86c6cb4L, 0xac5656faL, 0xf3f4f407L, 0xcfeaea25L, - 0xca6565afL, 0xf47a7a8eL, 0x47aeaee9L, 0x10080818L, - 0x6fbabad5L, 0xf0787888L, 0x4a25256fL, 0x5c2e2e72L, - 0x381c1c24L, 0x57a6a6f1L, 0x73b4b4c7L, 0x97c6c651L, - 0xcbe8e823L, 0xa1dddd7cL, 0xe874749cL, 0x3e1f1f21L, - 0x964b4bddL, 0x61bdbddcL, 0x0d8b8b86L, 0x0f8a8a85L, - 0xe0707090L, 0x7c3e3e42L, 0x71b5b5c4L, 0xcc6666aaL, - 0x904848d8L, 0x06030305L, 0xf7f6f601L, 0x1c0e0e12L, - 0xc26161a3L, 0x6a35355fL, 0xae5757f9L, 0x69b9b9d0L, - 0x17868691L, 0x99c1c158L, 0x3a1d1d27L, 0x279e9eb9L, - 0xd9e1e138L, 0xebf8f813L, 0x2b9898b3L, 0x22111133L, - 0xd26969bbL, 0xa9d9d970L, 0x078e8e89L, 0x339494a7L, - 0x2d9b9bb6L, 0x3c1e1e22L, 0x15878792L, 0xc9e9e920L, - 0x87cece49L, 0xaa5555ffL, 0x50282878L, 0xa5dfdf7aL, - 0x038c8c8fL, 0x59a1a1f8L, 0x09898980L, 0x1a0d0d17L, - 0x65bfbfdaL, 0xd7e6e631L, 0x844242c6L, 0xd06868b8L, - 0x824141c3L, 0x299999b0L, 0x5a2d2d77L, 0x1e0f0f11L, - 0x7bb0b0cbL, 0xa85454fcL, 0x6dbbbbd6L, 0x2c16163aL, + 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, + 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, + 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, + 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, + 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, + 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, + 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, + 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, + 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, + 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, + 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, + 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, + 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, + 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, + 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, + 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, + 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, + 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, + 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, + 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, + 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, + 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, + 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, + 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, + 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, + 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, + 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, + 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, + 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, + 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, + 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, + 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, + 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, + 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, + 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, + 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, + 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, + 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, + 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, + 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, + 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, + 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, + 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, + 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, + 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, + 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, + 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, + 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, + 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, + 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, + 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, + 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, + 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, + 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, + 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, + 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, + 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, + 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, + 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, + 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, + 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, + 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, + 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, + 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, ] Te1 = [ - 0xa5c66363L, 0x84f87c7cL, 0x99ee7777L, 0x8df67b7bL, - 0x0dfff2f2L, 0xbdd66b6bL, 0xb1de6f6fL, 0x5491c5c5L, - 0x50603030L, 0x03020101L, 0xa9ce6767L, 0x7d562b2bL, - 0x19e7fefeL, 0x62b5d7d7L, 0xe64dababL, 0x9aec7676L, - 0x458fcacaL, 0x9d1f8282L, 0x4089c9c9L, 0x87fa7d7dL, - 0x15effafaL, 0xebb25959L, 0xc98e4747L, 0x0bfbf0f0L, - 0xec41adadL, 0x67b3d4d4L, 0xfd5fa2a2L, 0xea45afafL, - 0xbf239c9cL, 0xf753a4a4L, 0x96e47272L, 0x5b9bc0c0L, - 0xc275b7b7L, 0x1ce1fdfdL, 0xae3d9393L, 0x6a4c2626L, - 0x5a6c3636L, 0x417e3f3fL, 0x02f5f7f7L, 0x4f83ccccL, - 0x5c683434L, 0xf451a5a5L, 0x34d1e5e5L, 0x08f9f1f1L, - 0x93e27171L, 0x73abd8d8L, 0x53623131L, 0x3f2a1515L, - 0x0c080404L, 0x5295c7c7L, 0x65462323L, 0x5e9dc3c3L, - 0x28301818L, 0xa1379696L, 0x0f0a0505L, 0xb52f9a9aL, - 0x090e0707L, 0x36241212L, 0x9b1b8080L, 0x3ddfe2e2L, - 0x26cdebebL, 0x694e2727L, 0xcd7fb2b2L, 0x9fea7575L, - 0x1b120909L, 0x9e1d8383L, 0x74582c2cL, 0x2e341a1aL, - 0x2d361b1bL, 0xb2dc6e6eL, 0xeeb45a5aL, 0xfb5ba0a0L, - 0xf6a45252L, 0x4d763b3bL, 0x61b7d6d6L, 0xce7db3b3L, - 0x7b522929L, 0x3edde3e3L, 0x715e2f2fL, 0x97138484L, - 0xf5a65353L, 0x68b9d1d1L, 0x00000000L, 0x2cc1ededL, - 0x60402020L, 0x1fe3fcfcL, 0xc879b1b1L, 0xedb65b5bL, - 0xbed46a6aL, 0x468dcbcbL, 0xd967bebeL, 0x4b723939L, - 0xde944a4aL, 0xd4984c4cL, 0xe8b05858L, 0x4a85cfcfL, - 0x6bbbd0d0L, 0x2ac5efefL, 0xe54faaaaL, 0x16edfbfbL, - 0xc5864343L, 0xd79a4d4dL, 0x55663333L, 0x94118585L, - 0xcf8a4545L, 0x10e9f9f9L, 0x06040202L, 0x81fe7f7fL, - 0xf0a05050L, 0x44783c3cL, 0xba259f9fL, 0xe34ba8a8L, - 0xf3a25151L, 0xfe5da3a3L, 0xc0804040L, 0x8a058f8fL, - 0xad3f9292L, 0xbc219d9dL, 0x48703838L, 0x04f1f5f5L, - 0xdf63bcbcL, 0xc177b6b6L, 0x75afdadaL, 0x63422121L, - 0x30201010L, 0x1ae5ffffL, 0x0efdf3f3L, 0x6dbfd2d2L, - 0x4c81cdcdL, 0x14180c0cL, 0x35261313L, 0x2fc3ececL, - 0xe1be5f5fL, 0xa2359797L, 0xcc884444L, 0x392e1717L, - 0x5793c4c4L, 0xf255a7a7L, 0x82fc7e7eL, 0x477a3d3dL, - 0xacc86464L, 0xe7ba5d5dL, 0x2b321919L, 0x95e67373L, - 0xa0c06060L, 0x98198181L, 0xd19e4f4fL, 0x7fa3dcdcL, - 0x66442222L, 0x7e542a2aL, 0xab3b9090L, 0x830b8888L, - 0xca8c4646L, 0x29c7eeeeL, 0xd36bb8b8L, 0x3c281414L, - 0x79a7dedeL, 0xe2bc5e5eL, 0x1d160b0bL, 0x76addbdbL, - 0x3bdbe0e0L, 0x56643232L, 0x4e743a3aL, 0x1e140a0aL, - 0xdb924949L, 0x0a0c0606L, 0x6c482424L, 0xe4b85c5cL, - 0x5d9fc2c2L, 0x6ebdd3d3L, 0xef43acacL, 0xa6c46262L, - 0xa8399191L, 0xa4319595L, 0x37d3e4e4L, 0x8bf27979L, - 0x32d5e7e7L, 0x438bc8c8L, 0x596e3737L, 0xb7da6d6dL, - 0x8c018d8dL, 0x64b1d5d5L, 0xd29c4e4eL, 0xe049a9a9L, - 0xb4d86c6cL, 0xfaac5656L, 0x07f3f4f4L, 0x25cfeaeaL, - 0xafca6565L, 0x8ef47a7aL, 0xe947aeaeL, 0x18100808L, - 0xd56fbabaL, 0x88f07878L, 0x6f4a2525L, 0x725c2e2eL, - 0x24381c1cL, 0xf157a6a6L, 0xc773b4b4L, 0x5197c6c6L, - 0x23cbe8e8L, 0x7ca1ddddL, 0x9ce87474L, 0x213e1f1fL, - 0xdd964b4bL, 0xdc61bdbdL, 0x860d8b8bL, 0x850f8a8aL, - 0x90e07070L, 0x427c3e3eL, 0xc471b5b5L, 0xaacc6666L, - 0xd8904848L, 0x05060303L, 0x01f7f6f6L, 0x121c0e0eL, - 0xa3c26161L, 0x5f6a3535L, 0xf9ae5757L, 0xd069b9b9L, - 0x91178686L, 0x5899c1c1L, 0x273a1d1dL, 0xb9279e9eL, - 0x38d9e1e1L, 0x13ebf8f8L, 0xb32b9898L, 0x33221111L, - 0xbbd26969L, 0x70a9d9d9L, 0x89078e8eL, 0xa7339494L, - 0xb62d9b9bL, 0x223c1e1eL, 0x92158787L, 0x20c9e9e9L, - 0x4987ceceL, 0xffaa5555L, 0x78502828L, 0x7aa5dfdfL, - 0x8f038c8cL, 0xf859a1a1L, 0x80098989L, 0x171a0d0dL, - 0xda65bfbfL, 0x31d7e6e6L, 0xc6844242L, 0xb8d06868L, - 0xc3824141L, 0xb0299999L, 0x775a2d2dL, 0x111e0f0fL, - 0xcb7bb0b0L, 0xfca85454L, 0xd66dbbbbL, 0x3a2c1616L, + 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, + 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, + 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, + 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, + 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, + 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, + 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, + 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, + 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, + 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, + 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, + 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, + 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, + 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, + 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, + 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, + 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, + 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, + 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, + 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, + 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, + 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, + 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, + 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, + 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, + 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, + 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, + 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, + 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, + 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, + 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, + 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, + 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, + 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, + 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, + 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, + 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, + 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, + 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, + 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, + 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, + 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, + 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, + 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, + 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, + 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, + 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, + 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, + 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, + 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, + 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, + 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, + 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, + 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, + 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, + 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, + 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, + 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, + 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, + 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, + 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, + 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, + 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, + 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, ] Te2 = [ - 0x63a5c663L, 0x7c84f87cL, 0x7799ee77L, 0x7b8df67bL, - 0xf20dfff2L, 0x6bbdd66bL, 0x6fb1de6fL, 0xc55491c5L, - 0x30506030L, 0x01030201L, 0x67a9ce67L, 0x2b7d562bL, - 0xfe19e7feL, 0xd762b5d7L, 0xabe64dabL, 0x769aec76L, - 0xca458fcaL, 0x829d1f82L, 0xc94089c9L, 0x7d87fa7dL, - 0xfa15effaL, 0x59ebb259L, 0x47c98e47L, 0xf00bfbf0L, - 0xadec41adL, 0xd467b3d4L, 0xa2fd5fa2L, 0xafea45afL, - 0x9cbf239cL, 0xa4f753a4L, 0x7296e472L, 0xc05b9bc0L, - 0xb7c275b7L, 0xfd1ce1fdL, 0x93ae3d93L, 0x266a4c26L, - 0x365a6c36L, 0x3f417e3fL, 0xf702f5f7L, 0xcc4f83ccL, - 0x345c6834L, 0xa5f451a5L, 0xe534d1e5L, 0xf108f9f1L, - 0x7193e271L, 0xd873abd8L, 0x31536231L, 0x153f2a15L, - 0x040c0804L, 0xc75295c7L, 0x23654623L, 0xc35e9dc3L, - 0x18283018L, 0x96a13796L, 0x050f0a05L, 0x9ab52f9aL, - 0x07090e07L, 0x12362412L, 0x809b1b80L, 0xe23ddfe2L, - 0xeb26cdebL, 0x27694e27L, 0xb2cd7fb2L, 0x759fea75L, - 0x091b1209L, 0x839e1d83L, 0x2c74582cL, 0x1a2e341aL, - 0x1b2d361bL, 0x6eb2dc6eL, 0x5aeeb45aL, 0xa0fb5ba0L, - 0x52f6a452L, 0x3b4d763bL, 0xd661b7d6L, 0xb3ce7db3L, - 0x297b5229L, 0xe33edde3L, 0x2f715e2fL, 0x84971384L, - 0x53f5a653L, 0xd168b9d1L, 0x00000000L, 0xed2cc1edL, - 0x20604020L, 0xfc1fe3fcL, 0xb1c879b1L, 0x5bedb65bL, - 0x6abed46aL, 0xcb468dcbL, 0xbed967beL, 0x394b7239L, - 0x4ade944aL, 0x4cd4984cL, 0x58e8b058L, 0xcf4a85cfL, - 0xd06bbbd0L, 0xef2ac5efL, 0xaae54faaL, 0xfb16edfbL, - 0x43c58643L, 0x4dd79a4dL, 0x33556633L, 0x85941185L, - 0x45cf8a45L, 0xf910e9f9L, 0x02060402L, 0x7f81fe7fL, - 0x50f0a050L, 0x3c44783cL, 0x9fba259fL, 0xa8e34ba8L, - 0x51f3a251L, 0xa3fe5da3L, 0x40c08040L, 0x8f8a058fL, - 0x92ad3f92L, 0x9dbc219dL, 0x38487038L, 0xf504f1f5L, - 0xbcdf63bcL, 0xb6c177b6L, 0xda75afdaL, 0x21634221L, - 0x10302010L, 0xff1ae5ffL, 0xf30efdf3L, 0xd26dbfd2L, - 0xcd4c81cdL, 0x0c14180cL, 0x13352613L, 0xec2fc3ecL, - 0x5fe1be5fL, 0x97a23597L, 0x44cc8844L, 0x17392e17L, - 0xc45793c4L, 0xa7f255a7L, 0x7e82fc7eL, 0x3d477a3dL, - 0x64acc864L, 0x5de7ba5dL, 0x192b3219L, 0x7395e673L, - 0x60a0c060L, 0x81981981L, 0x4fd19e4fL, 0xdc7fa3dcL, - 0x22664422L, 0x2a7e542aL, 0x90ab3b90L, 0x88830b88L, - 0x46ca8c46L, 0xee29c7eeL, 0xb8d36bb8L, 0x143c2814L, - 0xde79a7deL, 0x5ee2bc5eL, 0x0b1d160bL, 0xdb76addbL, - 0xe03bdbe0L, 0x32566432L, 0x3a4e743aL, 0x0a1e140aL, - 0x49db9249L, 0x060a0c06L, 0x246c4824L, 0x5ce4b85cL, - 0xc25d9fc2L, 0xd36ebdd3L, 0xacef43acL, 0x62a6c462L, - 0x91a83991L, 0x95a43195L, 0xe437d3e4L, 0x798bf279L, - 0xe732d5e7L, 0xc8438bc8L, 0x37596e37L, 0x6db7da6dL, - 0x8d8c018dL, 0xd564b1d5L, 0x4ed29c4eL, 0xa9e049a9L, - 0x6cb4d86cL, 0x56faac56L, 0xf407f3f4L, 0xea25cfeaL, - 0x65afca65L, 0x7a8ef47aL, 0xaee947aeL, 0x08181008L, - 0xbad56fbaL, 0x7888f078L, 0x256f4a25L, 0x2e725c2eL, - 0x1c24381cL, 0xa6f157a6L, 0xb4c773b4L, 0xc65197c6L, - 0xe823cbe8L, 0xdd7ca1ddL, 0x749ce874L, 0x1f213e1fL, - 0x4bdd964bL, 0xbddc61bdL, 0x8b860d8bL, 0x8a850f8aL, - 0x7090e070L, 0x3e427c3eL, 0xb5c471b5L, 0x66aacc66L, - 0x48d89048L, 0x03050603L, 0xf601f7f6L, 0x0e121c0eL, - 0x61a3c261L, 0x355f6a35L, 0x57f9ae57L, 0xb9d069b9L, - 0x86911786L, 0xc15899c1L, 0x1d273a1dL, 0x9eb9279eL, - 0xe138d9e1L, 0xf813ebf8L, 0x98b32b98L, 0x11332211L, - 0x69bbd269L, 0xd970a9d9L, 0x8e89078eL, 0x94a73394L, - 0x9bb62d9bL, 0x1e223c1eL, 0x87921587L, 0xe920c9e9L, - 0xce4987ceL, 0x55ffaa55L, 0x28785028L, 0xdf7aa5dfL, - 0x8c8f038cL, 0xa1f859a1L, 0x89800989L, 0x0d171a0dL, - 0xbfda65bfL, 0xe631d7e6L, 0x42c68442L, 0x68b8d068L, - 0x41c38241L, 0x99b02999L, 0x2d775a2dL, 0x0f111e0fL, - 0xb0cb7bb0L, 0x54fca854L, 0xbbd66dbbL, 0x163a2c16L, + 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, + 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, + 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, + 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, + 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, + 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, + 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, + 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, + 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, + 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, + 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, + 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, + 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, + 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, + 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, + 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, + 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, + 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, + 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, + 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, + 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, + 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, + 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, + 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, + 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, + 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, + 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, + 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, + 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, + 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, + 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, + 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, + 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, + 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, + 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, + 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, + 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, + 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, + 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, + 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, + 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, + 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, + 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, + 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, + 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, + 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, + 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, + 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, + 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, + 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, + 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, + 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, + 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, + 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, + 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, + 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, + 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, + 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, + 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, + 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, + 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, + 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, + 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, + 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, ] Te3 = [ - 0x6363a5c6L, 0x7c7c84f8L, 0x777799eeL, 0x7b7b8df6L, - 0xf2f20dffL, 0x6b6bbdd6L, 0x6f6fb1deL, 0xc5c55491L, - 0x30305060L, 0x01010302L, 0x6767a9ceL, 0x2b2b7d56L, - 0xfefe19e7L, 0xd7d762b5L, 0xababe64dL, 0x76769aecL, - 0xcaca458fL, 0x82829d1fL, 0xc9c94089L, 0x7d7d87faL, - 0xfafa15efL, 0x5959ebb2L, 0x4747c98eL, 0xf0f00bfbL, - 0xadadec41L, 0xd4d467b3L, 0xa2a2fd5fL, 0xafafea45L, - 0x9c9cbf23L, 0xa4a4f753L, 0x727296e4L, 0xc0c05b9bL, - 0xb7b7c275L, 0xfdfd1ce1L, 0x9393ae3dL, 0x26266a4cL, - 0x36365a6cL, 0x3f3f417eL, 0xf7f702f5L, 0xcccc4f83L, - 0x34345c68L, 0xa5a5f451L, 0xe5e534d1L, 0xf1f108f9L, - 0x717193e2L, 0xd8d873abL, 0x31315362L, 0x15153f2aL, - 0x04040c08L, 0xc7c75295L, 0x23236546L, 0xc3c35e9dL, - 0x18182830L, 0x9696a137L, 0x05050f0aL, 0x9a9ab52fL, - 0x0707090eL, 0x12123624L, 0x80809b1bL, 0xe2e23ddfL, - 0xebeb26cdL, 0x2727694eL, 0xb2b2cd7fL, 0x75759feaL, - 0x09091b12L, 0x83839e1dL, 0x2c2c7458L, 0x1a1a2e34L, - 0x1b1b2d36L, 0x6e6eb2dcL, 0x5a5aeeb4L, 0xa0a0fb5bL, - 0x5252f6a4L, 0x3b3b4d76L, 0xd6d661b7L, 0xb3b3ce7dL, - 0x29297b52L, 0xe3e33eddL, 0x2f2f715eL, 0x84849713L, - 0x5353f5a6L, 0xd1d168b9L, 0x00000000L, 0xeded2cc1L, - 0x20206040L, 0xfcfc1fe3L, 0xb1b1c879L, 0x5b5bedb6L, - 0x6a6abed4L, 0xcbcb468dL, 0xbebed967L, 0x39394b72L, - 0x4a4ade94L, 0x4c4cd498L, 0x5858e8b0L, 0xcfcf4a85L, - 0xd0d06bbbL, 0xefef2ac5L, 0xaaaae54fL, 0xfbfb16edL, - 0x4343c586L, 0x4d4dd79aL, 0x33335566L, 0x85859411L, - 0x4545cf8aL, 0xf9f910e9L, 0x02020604L, 0x7f7f81feL, - 0x5050f0a0L, 0x3c3c4478L, 0x9f9fba25L, 0xa8a8e34bL, - 0x5151f3a2L, 0xa3a3fe5dL, 0x4040c080L, 0x8f8f8a05L, - 0x9292ad3fL, 0x9d9dbc21L, 0x38384870L, 0xf5f504f1L, - 0xbcbcdf63L, 0xb6b6c177L, 0xdada75afL, 0x21216342L, - 0x10103020L, 0xffff1ae5L, 0xf3f30efdL, 0xd2d26dbfL, - 0xcdcd4c81L, 0x0c0c1418L, 0x13133526L, 0xecec2fc3L, - 0x5f5fe1beL, 0x9797a235L, 0x4444cc88L, 0x1717392eL, - 0xc4c45793L, 0xa7a7f255L, 0x7e7e82fcL, 0x3d3d477aL, - 0x6464acc8L, 0x5d5de7baL, 0x19192b32L, 0x737395e6L, - 0x6060a0c0L, 0x81819819L, 0x4f4fd19eL, 0xdcdc7fa3L, - 0x22226644L, 0x2a2a7e54L, 0x9090ab3bL, 0x8888830bL, - 0x4646ca8cL, 0xeeee29c7L, 0xb8b8d36bL, 0x14143c28L, - 0xdede79a7L, 0x5e5ee2bcL, 0x0b0b1d16L, 0xdbdb76adL, - 0xe0e03bdbL, 0x32325664L, 0x3a3a4e74L, 0x0a0a1e14L, - 0x4949db92L, 0x06060a0cL, 0x24246c48L, 0x5c5ce4b8L, - 0xc2c25d9fL, 0xd3d36ebdL, 0xacacef43L, 0x6262a6c4L, - 0x9191a839L, 0x9595a431L, 0xe4e437d3L, 0x79798bf2L, - 0xe7e732d5L, 0xc8c8438bL, 0x3737596eL, 0x6d6db7daL, - 0x8d8d8c01L, 0xd5d564b1L, 0x4e4ed29cL, 0xa9a9e049L, - 0x6c6cb4d8L, 0x5656faacL, 0xf4f407f3L, 0xeaea25cfL, - 0x6565afcaL, 0x7a7a8ef4L, 0xaeaee947L, 0x08081810L, - 0xbabad56fL, 0x787888f0L, 0x25256f4aL, 0x2e2e725cL, - 0x1c1c2438L, 0xa6a6f157L, 0xb4b4c773L, 0xc6c65197L, - 0xe8e823cbL, 0xdddd7ca1L, 0x74749ce8L, 0x1f1f213eL, - 0x4b4bdd96L, 0xbdbddc61L, 0x8b8b860dL, 0x8a8a850fL, - 0x707090e0L, 0x3e3e427cL, 0xb5b5c471L, 0x6666aaccL, - 0x4848d890L, 0x03030506L, 0xf6f601f7L, 0x0e0e121cL, - 0x6161a3c2L, 0x35355f6aL, 0x5757f9aeL, 0xb9b9d069L, - 0x86869117L, 0xc1c15899L, 0x1d1d273aL, 0x9e9eb927L, - 0xe1e138d9L, 0xf8f813ebL, 0x9898b32bL, 0x11113322L, - 0x6969bbd2L, 0xd9d970a9L, 0x8e8e8907L, 0x9494a733L, - 0x9b9bb62dL, 0x1e1e223cL, 0x87879215L, 0xe9e920c9L, - 0xcece4987L, 0x5555ffaaL, 0x28287850L, 0xdfdf7aa5L, - 0x8c8c8f03L, 0xa1a1f859L, 0x89898009L, 0x0d0d171aL, - 0xbfbfda65L, 0xe6e631d7L, 0x4242c684L, 0x6868b8d0L, - 0x4141c382L, 0x9999b029L, 0x2d2d775aL, 0x0f0f111eL, - 0xb0b0cb7bL, 0x5454fca8L, 0xbbbbd66dL, 0x16163a2cL, + 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, + 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, + 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, + 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, + 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, + 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, + 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, + 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, + 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, + 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, + 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, + 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, + 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, + 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, + 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, + 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, + 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, + 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, + 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, + 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, + 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, + 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, + 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, + 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, + 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, + 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, + 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, + 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, + 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, + 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, + 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, + 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, + 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, + 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, + 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, + 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, + 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, + 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, + 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, + 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, + 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, + 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, + 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, + 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, + 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, + 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, + 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, + 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, + 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, + 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, + 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, + 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, + 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, + 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, + 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, + 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, + 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, + 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, + 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, + 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, + 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, + 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, + 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, + 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, ] Te4 = [ - 0x63636363L, 0x7c7c7c7cL, 0x77777777L, 0x7b7b7b7bL, - 0xf2f2f2f2L, 0x6b6b6b6bL, 0x6f6f6f6fL, 0xc5c5c5c5L, - 0x30303030L, 0x01010101L, 0x67676767L, 0x2b2b2b2bL, - 0xfefefefeL, 0xd7d7d7d7L, 0xababababL, 0x76767676L, - 0xcacacacaL, 0x82828282L, 0xc9c9c9c9L, 0x7d7d7d7dL, - 0xfafafafaL, 0x59595959L, 0x47474747L, 0xf0f0f0f0L, - 0xadadadadL, 0xd4d4d4d4L, 0xa2a2a2a2L, 0xafafafafL, - 0x9c9c9c9cL, 0xa4a4a4a4L, 0x72727272L, 0xc0c0c0c0L, - 0xb7b7b7b7L, 0xfdfdfdfdL, 0x93939393L, 0x26262626L, - 0x36363636L, 0x3f3f3f3fL, 0xf7f7f7f7L, 0xccccccccL, - 0x34343434L, 0xa5a5a5a5L, 0xe5e5e5e5L, 0xf1f1f1f1L, - 0x71717171L, 0xd8d8d8d8L, 0x31313131L, 0x15151515L, - 0x04040404L, 0xc7c7c7c7L, 0x23232323L, 0xc3c3c3c3L, - 0x18181818L, 0x96969696L, 0x05050505L, 0x9a9a9a9aL, - 0x07070707L, 0x12121212L, 0x80808080L, 0xe2e2e2e2L, - 0xebebebebL, 0x27272727L, 0xb2b2b2b2L, 0x75757575L, - 0x09090909L, 0x83838383L, 0x2c2c2c2cL, 0x1a1a1a1aL, - 0x1b1b1b1bL, 0x6e6e6e6eL, 0x5a5a5a5aL, 0xa0a0a0a0L, - 0x52525252L, 0x3b3b3b3bL, 0xd6d6d6d6L, 0xb3b3b3b3L, - 0x29292929L, 0xe3e3e3e3L, 0x2f2f2f2fL, 0x84848484L, - 0x53535353L, 0xd1d1d1d1L, 0x00000000L, 0xededededL, - 0x20202020L, 0xfcfcfcfcL, 0xb1b1b1b1L, 0x5b5b5b5bL, - 0x6a6a6a6aL, 0xcbcbcbcbL, 0xbebebebeL, 0x39393939L, - 0x4a4a4a4aL, 0x4c4c4c4cL, 0x58585858L, 0xcfcfcfcfL, - 0xd0d0d0d0L, 0xefefefefL, 0xaaaaaaaaL, 0xfbfbfbfbL, - 0x43434343L, 0x4d4d4d4dL, 0x33333333L, 0x85858585L, - 0x45454545L, 0xf9f9f9f9L, 0x02020202L, 0x7f7f7f7fL, - 0x50505050L, 0x3c3c3c3cL, 0x9f9f9f9fL, 0xa8a8a8a8L, - 0x51515151L, 0xa3a3a3a3L, 0x40404040L, 0x8f8f8f8fL, - 0x92929292L, 0x9d9d9d9dL, 0x38383838L, 0xf5f5f5f5L, - 0xbcbcbcbcL, 0xb6b6b6b6L, 0xdadadadaL, 0x21212121L, - 0x10101010L, 0xffffffffL, 0xf3f3f3f3L, 0xd2d2d2d2L, - 0xcdcdcdcdL, 0x0c0c0c0cL, 0x13131313L, 0xececececL, - 0x5f5f5f5fL, 0x97979797L, 0x44444444L, 0x17171717L, - 0xc4c4c4c4L, 0xa7a7a7a7L, 0x7e7e7e7eL, 0x3d3d3d3dL, - 0x64646464L, 0x5d5d5d5dL, 0x19191919L, 0x73737373L, - 0x60606060L, 0x81818181L, 0x4f4f4f4fL, 0xdcdcdcdcL, - 0x22222222L, 0x2a2a2a2aL, 0x90909090L, 0x88888888L, - 0x46464646L, 0xeeeeeeeeL, 0xb8b8b8b8L, 0x14141414L, - 0xdedededeL, 0x5e5e5e5eL, 0x0b0b0b0bL, 0xdbdbdbdbL, - 0xe0e0e0e0L, 0x32323232L, 0x3a3a3a3aL, 0x0a0a0a0aL, - 0x49494949L, 0x06060606L, 0x24242424L, 0x5c5c5c5cL, - 0xc2c2c2c2L, 0xd3d3d3d3L, 0xacacacacL, 0x62626262L, - 0x91919191L, 0x95959595L, 0xe4e4e4e4L, 0x79797979L, - 0xe7e7e7e7L, 0xc8c8c8c8L, 0x37373737L, 0x6d6d6d6dL, - 0x8d8d8d8dL, 0xd5d5d5d5L, 0x4e4e4e4eL, 0xa9a9a9a9L, - 0x6c6c6c6cL, 0x56565656L, 0xf4f4f4f4L, 0xeaeaeaeaL, - 0x65656565L, 0x7a7a7a7aL, 0xaeaeaeaeL, 0x08080808L, - 0xbabababaL, 0x78787878L, 0x25252525L, 0x2e2e2e2eL, - 0x1c1c1c1cL, 0xa6a6a6a6L, 0xb4b4b4b4L, 0xc6c6c6c6L, - 0xe8e8e8e8L, 0xddddddddL, 0x74747474L, 0x1f1f1f1fL, - 0x4b4b4b4bL, 0xbdbdbdbdL, 0x8b8b8b8bL, 0x8a8a8a8aL, - 0x70707070L, 0x3e3e3e3eL, 0xb5b5b5b5L, 0x66666666L, - 0x48484848L, 0x03030303L, 0xf6f6f6f6L, 0x0e0e0e0eL, - 0x61616161L, 0x35353535L, 0x57575757L, 0xb9b9b9b9L, - 0x86868686L, 0xc1c1c1c1L, 0x1d1d1d1dL, 0x9e9e9e9eL, - 0xe1e1e1e1L, 0xf8f8f8f8L, 0x98989898L, 0x11111111L, - 0x69696969L, 0xd9d9d9d9L, 0x8e8e8e8eL, 0x94949494L, - 0x9b9b9b9bL, 0x1e1e1e1eL, 0x87878787L, 0xe9e9e9e9L, - 0xcecececeL, 0x55555555L, 0x28282828L, 0xdfdfdfdfL, - 0x8c8c8c8cL, 0xa1a1a1a1L, 0x89898989L, 0x0d0d0d0dL, - 0xbfbfbfbfL, 0xe6e6e6e6L, 0x42424242L, 0x68686868L, - 0x41414141L, 0x99999999L, 0x2d2d2d2dL, 0x0f0f0f0fL, - 0xb0b0b0b0L, 0x54545454L, 0xbbbbbbbbL, 0x16161616L, + 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, + 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, + 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, + 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, + 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, + 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, + 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, + 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, + 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, + 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, + 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, + 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, + 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, + 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, + 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, + 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, + 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, + 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, + 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, + 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, + 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, + 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, + 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, + 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, + 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, + 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, + 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, + 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, + 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, + 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, + 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, + 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, + 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, + 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, + 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, + 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, + 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, + 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, + 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, + 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, + 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, + 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, + 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, + 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, + 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, + 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, + 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, + 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, + 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, + 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, + 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, + 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, + 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, + 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, + 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, + 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, + 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, + 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, + 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, + 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, + 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, + 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, + 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, + 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, ] Td0 = [ - 0x51f4a750L, 0x7e416553L, 0x1a17a4c3L, 0x3a275e96L, - 0x3bab6bcbL, 0x1f9d45f1L, 0xacfa58abL, 0x4be30393L, - 0x2030fa55L, 0xad766df6L, 0x88cc7691L, 0xf5024c25L, - 0x4fe5d7fcL, 0xc52acbd7L, 0x26354480L, 0xb562a38fL, - 0xdeb15a49L, 0x25ba1b67L, 0x45ea0e98L, 0x5dfec0e1L, - 0xc32f7502L, 0x814cf012L, 0x8d4697a3L, 0x6bd3f9c6L, - 0x038f5fe7L, 0x15929c95L, 0xbf6d7aebL, 0x955259daL, - 0xd4be832dL, 0x587421d3L, 0x49e06929L, 0x8ec9c844L, - 0x75c2896aL, 0xf48e7978L, 0x99583e6bL, 0x27b971ddL, - 0xbee14fb6L, 0xf088ad17L, 0xc920ac66L, 0x7dce3ab4L, - 0x63df4a18L, 0xe51a3182L, 0x97513360L, 0x62537f45L, - 0xb16477e0L, 0xbb6bae84L, 0xfe81a01cL, 0xf9082b94L, - 0x70486858L, 0x8f45fd19L, 0x94de6c87L, 0x527bf8b7L, - 0xab73d323L, 0x724b02e2L, 0xe31f8f57L, 0x6655ab2aL, - 0xb2eb2807L, 0x2fb5c203L, 0x86c57b9aL, 0xd33708a5L, - 0x302887f2L, 0x23bfa5b2L, 0x02036abaL, 0xed16825cL, - 0x8acf1c2bL, 0xa779b492L, 0xf307f2f0L, 0x4e69e2a1L, - 0x65daf4cdL, 0x0605bed5L, 0xd134621fL, 0xc4a6fe8aL, - 0x342e539dL, 0xa2f355a0L, 0x058ae132L, 0xa4f6eb75L, - 0x0b83ec39L, 0x4060efaaL, 0x5e719f06L, 0xbd6e1051L, - 0x3e218af9L, 0x96dd063dL, 0xdd3e05aeL, 0x4de6bd46L, - 0x91548db5L, 0x71c45d05L, 0x0406d46fL, 0x605015ffL, - 0x1998fb24L, 0xd6bde997L, 0x894043ccL, 0x67d99e77L, - 0xb0e842bdL, 0x07898b88L, 0xe7195b38L, 0x79c8eedbL, - 0xa17c0a47L, 0x7c420fe9L, 0xf8841ec9L, 0x00000000L, - 0x09808683L, 0x322bed48L, 0x1e1170acL, 0x6c5a724eL, - 0xfd0efffbL, 0x0f853856L, 0x3daed51eL, 0x362d3927L, - 0x0a0fd964L, 0x685ca621L, 0x9b5b54d1L, 0x24362e3aL, - 0x0c0a67b1L, 0x9357e70fL, 0xb4ee96d2L, 0x1b9b919eL, - 0x80c0c54fL, 0x61dc20a2L, 0x5a774b69L, 0x1c121a16L, - 0xe293ba0aL, 0xc0a02ae5L, 0x3c22e043L, 0x121b171dL, - 0x0e090d0bL, 0xf28bc7adL, 0x2db6a8b9L, 0x141ea9c8L, - 0x57f11985L, 0xaf75074cL, 0xee99ddbbL, 0xa37f60fdL, - 0xf701269fL, 0x5c72f5bcL, 0x44663bc5L, 0x5bfb7e34L, - 0x8b432976L, 0xcb23c6dcL, 0xb6edfc68L, 0xb8e4f163L, - 0xd731dccaL, 0x42638510L, 0x13972240L, 0x84c61120L, - 0x854a247dL, 0xd2bb3df8L, 0xaef93211L, 0xc729a16dL, - 0x1d9e2f4bL, 0xdcb230f3L, 0x0d8652ecL, 0x77c1e3d0L, - 0x2bb3166cL, 0xa970b999L, 0x119448faL, 0x47e96422L, - 0xa8fc8cc4L, 0xa0f03f1aL, 0x567d2cd8L, 0x223390efL, - 0x87494ec7L, 0xd938d1c1L, 0x8ccaa2feL, 0x98d40b36L, - 0xa6f581cfL, 0xa57ade28L, 0xdab78e26L, 0x3fadbfa4L, - 0x2c3a9de4L, 0x5078920dL, 0x6a5fcc9bL, 0x547e4662L, - 0xf68d13c2L, 0x90d8b8e8L, 0x2e39f75eL, 0x82c3aff5L, - 0x9f5d80beL, 0x69d0937cL, 0x6fd52da9L, 0xcf2512b3L, - 0xc8ac993bL, 0x10187da7L, 0xe89c636eL, 0xdb3bbb7bL, - 0xcd267809L, 0x6e5918f4L, 0xec9ab701L, 0x834f9aa8L, - 0xe6956e65L, 0xaaffe67eL, 0x21bccf08L, 0xef15e8e6L, - 0xbae79bd9L, 0x4a6f36ceL, 0xea9f09d4L, 0x29b07cd6L, - 0x31a4b2afL, 0x2a3f2331L, 0xc6a59430L, 0x35a266c0L, - 0x744ebc37L, 0xfc82caa6L, 0xe090d0b0L, 0x33a7d815L, - 0xf104984aL, 0x41ecdaf7L, 0x7fcd500eL, 0x1791f62fL, - 0x764dd68dL, 0x43efb04dL, 0xccaa4d54L, 0xe49604dfL, - 0x9ed1b5e3L, 0x4c6a881bL, 0xc12c1fb8L, 0x4665517fL, - 0x9d5eea04L, 0x018c355dL, 0xfa877473L, 0xfb0b412eL, - 0xb3671d5aL, 0x92dbd252L, 0xe9105633L, 0x6dd64713L, - 0x9ad7618cL, 0x37a10c7aL, 0x59f8148eL, 0xeb133c89L, - 0xcea927eeL, 0xb761c935L, 0xe11ce5edL, 0x7a47b13cL, - 0x9cd2df59L, 0x55f2733fL, 0x1814ce79L, 0x73c737bfL, - 0x53f7cdeaL, 0x5ffdaa5bL, 0xdf3d6f14L, 0x7844db86L, - 0xcaaff381L, 0xb968c43eL, 0x3824342cL, 0xc2a3405fL, - 0x161dc372L, 0xbce2250cL, 0x283c498bL, 0xff0d9541L, - 0x39a80171L, 0x080cb3deL, 0xd8b4e49cL, 0x6456c190L, - 0x7bcb8461L, 0xd532b670L, 0x486c5c74L, 0xd0b85742L, + 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, + 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, + 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, + 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, + 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, + 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, + 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, + 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, + 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, + 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, + 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, + 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, + 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, + 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, + 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, + 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, + 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, + 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, + 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, + 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, + 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, + 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, + 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, + 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, + 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, + 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, + 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, + 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, + 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, + 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, + 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, + 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, + 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, + 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, + 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, + 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, + 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, + 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, + 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, + 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, + 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, + 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, + 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, + 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, + 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, + 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, + 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, + 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, + 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, + 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, + 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, + 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, + 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, + 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, + 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, + 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, + 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, + 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, + 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, + 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, + 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, + 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, + 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, + 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, ] Td1 = [ - 0x5051f4a7L, 0x537e4165L, 0xc31a17a4L, 0x963a275eL, - 0xcb3bab6bL, 0xf11f9d45L, 0xabacfa58L, 0x934be303L, - 0x552030faL, 0xf6ad766dL, 0x9188cc76L, 0x25f5024cL, - 0xfc4fe5d7L, 0xd7c52acbL, 0x80263544L, 0x8fb562a3L, - 0x49deb15aL, 0x6725ba1bL, 0x9845ea0eL, 0xe15dfec0L, - 0x02c32f75L, 0x12814cf0L, 0xa38d4697L, 0xc66bd3f9L, - 0xe7038f5fL, 0x9515929cL, 0xebbf6d7aL, 0xda955259L, - 0x2dd4be83L, 0xd3587421L, 0x2949e069L, 0x448ec9c8L, - 0x6a75c289L, 0x78f48e79L, 0x6b99583eL, 0xdd27b971L, - 0xb6bee14fL, 0x17f088adL, 0x66c920acL, 0xb47dce3aL, - 0x1863df4aL, 0x82e51a31L, 0x60975133L, 0x4562537fL, - 0xe0b16477L, 0x84bb6baeL, 0x1cfe81a0L, 0x94f9082bL, - 0x58704868L, 0x198f45fdL, 0x8794de6cL, 0xb7527bf8L, - 0x23ab73d3L, 0xe2724b02L, 0x57e31f8fL, 0x2a6655abL, - 0x07b2eb28L, 0x032fb5c2L, 0x9a86c57bL, 0xa5d33708L, - 0xf2302887L, 0xb223bfa5L, 0xba02036aL, 0x5ced1682L, - 0x2b8acf1cL, 0x92a779b4L, 0xf0f307f2L, 0xa14e69e2L, - 0xcd65daf4L, 0xd50605beL, 0x1fd13462L, 0x8ac4a6feL, - 0x9d342e53L, 0xa0a2f355L, 0x32058ae1L, 0x75a4f6ebL, - 0x390b83ecL, 0xaa4060efL, 0x065e719fL, 0x51bd6e10L, - 0xf93e218aL, 0x3d96dd06L, 0xaedd3e05L, 0x464de6bdL, - 0xb591548dL, 0x0571c45dL, 0x6f0406d4L, 0xff605015L, - 0x241998fbL, 0x97d6bde9L, 0xcc894043L, 0x7767d99eL, - 0xbdb0e842L, 0x8807898bL, 0x38e7195bL, 0xdb79c8eeL, - 0x47a17c0aL, 0xe97c420fL, 0xc9f8841eL, 0x00000000L, - 0x83098086L, 0x48322bedL, 0xac1e1170L, 0x4e6c5a72L, - 0xfbfd0effL, 0x560f8538L, 0x1e3daed5L, 0x27362d39L, - 0x640a0fd9L, 0x21685ca6L, 0xd19b5b54L, 0x3a24362eL, - 0xb10c0a67L, 0x0f9357e7L, 0xd2b4ee96L, 0x9e1b9b91L, - 0x4f80c0c5L, 0xa261dc20L, 0x695a774bL, 0x161c121aL, - 0x0ae293baL, 0xe5c0a02aL, 0x433c22e0L, 0x1d121b17L, - 0x0b0e090dL, 0xadf28bc7L, 0xb92db6a8L, 0xc8141ea9L, - 0x8557f119L, 0x4caf7507L, 0xbbee99ddL, 0xfda37f60L, - 0x9ff70126L, 0xbc5c72f5L, 0xc544663bL, 0x345bfb7eL, - 0x768b4329L, 0xdccb23c6L, 0x68b6edfcL, 0x63b8e4f1L, - 0xcad731dcL, 0x10426385L, 0x40139722L, 0x2084c611L, - 0x7d854a24L, 0xf8d2bb3dL, 0x11aef932L, 0x6dc729a1L, - 0x4b1d9e2fL, 0xf3dcb230L, 0xec0d8652L, 0xd077c1e3L, - 0x6c2bb316L, 0x99a970b9L, 0xfa119448L, 0x2247e964L, - 0xc4a8fc8cL, 0x1aa0f03fL, 0xd8567d2cL, 0xef223390L, - 0xc787494eL, 0xc1d938d1L, 0xfe8ccaa2L, 0x3698d40bL, - 0xcfa6f581L, 0x28a57adeL, 0x26dab78eL, 0xa43fadbfL, - 0xe42c3a9dL, 0x0d507892L, 0x9b6a5fccL, 0x62547e46L, - 0xc2f68d13L, 0xe890d8b8L, 0x5e2e39f7L, 0xf582c3afL, - 0xbe9f5d80L, 0x7c69d093L, 0xa96fd52dL, 0xb3cf2512L, - 0x3bc8ac99L, 0xa710187dL, 0x6ee89c63L, 0x7bdb3bbbL, - 0x09cd2678L, 0xf46e5918L, 0x01ec9ab7L, 0xa8834f9aL, - 0x65e6956eL, 0x7eaaffe6L, 0x0821bccfL, 0xe6ef15e8L, - 0xd9bae79bL, 0xce4a6f36L, 0xd4ea9f09L, 0xd629b07cL, - 0xaf31a4b2L, 0x312a3f23L, 0x30c6a594L, 0xc035a266L, - 0x37744ebcL, 0xa6fc82caL, 0xb0e090d0L, 0x1533a7d8L, - 0x4af10498L, 0xf741ecdaL, 0x0e7fcd50L, 0x2f1791f6L, - 0x8d764dd6L, 0x4d43efb0L, 0x54ccaa4dL, 0xdfe49604L, - 0xe39ed1b5L, 0x1b4c6a88L, 0xb8c12c1fL, 0x7f466551L, - 0x049d5eeaL, 0x5d018c35L, 0x73fa8774L, 0x2efb0b41L, - 0x5ab3671dL, 0x5292dbd2L, 0x33e91056L, 0x136dd647L, - 0x8c9ad761L, 0x7a37a10cL, 0x8e59f814L, 0x89eb133cL, - 0xeecea927L, 0x35b761c9L, 0xede11ce5L, 0x3c7a47b1L, - 0x599cd2dfL, 0x3f55f273L, 0x791814ceL, 0xbf73c737L, - 0xea53f7cdL, 0x5b5ffdaaL, 0x14df3d6fL, 0x867844dbL, - 0x81caaff3L, 0x3eb968c4L, 0x2c382434L, 0x5fc2a340L, - 0x72161dc3L, 0x0cbce225L, 0x8b283c49L, 0x41ff0d95L, - 0x7139a801L, 0xde080cb3L, 0x9cd8b4e4L, 0x906456c1L, - 0x617bcb84L, 0x70d532b6L, 0x74486c5cL, 0x42d0b857L, + 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, + 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, + 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, + 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, + 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, + 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, + 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, + 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, + 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, + 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, + 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, + 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, + 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, + 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, + 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, + 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, + 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, + 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, + 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, + 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, + 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, + 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, + 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, + 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, + 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, + 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, + 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, + 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, + 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, + 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, + 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, + 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, + 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, + 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, + 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, + 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, + 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, + 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, + 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, + 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, + 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, + 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, + 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, + 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, + 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, + 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, + 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, + 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, + 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, + 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, + 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, + 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, + 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, + 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, + 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, + 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, + 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, + 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, + 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, + 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, + 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, + 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, + 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, + 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, ] Td2 = [ - 0xa75051f4L, 0x65537e41L, 0xa4c31a17L, 0x5e963a27L, - 0x6bcb3babL, 0x45f11f9dL, 0x58abacfaL, 0x03934be3L, - 0xfa552030L, 0x6df6ad76L, 0x769188ccL, 0x4c25f502L, - 0xd7fc4fe5L, 0xcbd7c52aL, 0x44802635L, 0xa38fb562L, - 0x5a49deb1L, 0x1b6725baL, 0x0e9845eaL, 0xc0e15dfeL, - 0x7502c32fL, 0xf012814cL, 0x97a38d46L, 0xf9c66bd3L, - 0x5fe7038fL, 0x9c951592L, 0x7aebbf6dL, 0x59da9552L, - 0x832dd4beL, 0x21d35874L, 0x692949e0L, 0xc8448ec9L, - 0x896a75c2L, 0x7978f48eL, 0x3e6b9958L, 0x71dd27b9L, - 0x4fb6bee1L, 0xad17f088L, 0xac66c920L, 0x3ab47dceL, - 0x4a1863dfL, 0x3182e51aL, 0x33609751L, 0x7f456253L, - 0x77e0b164L, 0xae84bb6bL, 0xa01cfe81L, 0x2b94f908L, - 0x68587048L, 0xfd198f45L, 0x6c8794deL, 0xf8b7527bL, - 0xd323ab73L, 0x02e2724bL, 0x8f57e31fL, 0xab2a6655L, - 0x2807b2ebL, 0xc2032fb5L, 0x7b9a86c5L, 0x08a5d337L, - 0x87f23028L, 0xa5b223bfL, 0x6aba0203L, 0x825ced16L, - 0x1c2b8acfL, 0xb492a779L, 0xf2f0f307L, 0xe2a14e69L, - 0xf4cd65daL, 0xbed50605L, 0x621fd134L, 0xfe8ac4a6L, - 0x539d342eL, 0x55a0a2f3L, 0xe132058aL, 0xeb75a4f6L, - 0xec390b83L, 0xefaa4060L, 0x9f065e71L, 0x1051bd6eL, - 0x8af93e21L, 0x063d96ddL, 0x05aedd3eL, 0xbd464de6L, - 0x8db59154L, 0x5d0571c4L, 0xd46f0406L, 0x15ff6050L, - 0xfb241998L, 0xe997d6bdL, 0x43cc8940L, 0x9e7767d9L, - 0x42bdb0e8L, 0x8b880789L, 0x5b38e719L, 0xeedb79c8L, - 0x0a47a17cL, 0x0fe97c42L, 0x1ec9f884L, 0x00000000L, - 0x86830980L, 0xed48322bL, 0x70ac1e11L, 0x724e6c5aL, - 0xfffbfd0eL, 0x38560f85L, 0xd51e3daeL, 0x3927362dL, - 0xd9640a0fL, 0xa621685cL, 0x54d19b5bL, 0x2e3a2436L, - 0x67b10c0aL, 0xe70f9357L, 0x96d2b4eeL, 0x919e1b9bL, - 0xc54f80c0L, 0x20a261dcL, 0x4b695a77L, 0x1a161c12L, - 0xba0ae293L, 0x2ae5c0a0L, 0xe0433c22L, 0x171d121bL, - 0x0d0b0e09L, 0xc7adf28bL, 0xa8b92db6L, 0xa9c8141eL, - 0x198557f1L, 0x074caf75L, 0xddbbee99L, 0x60fda37fL, - 0x269ff701L, 0xf5bc5c72L, 0x3bc54466L, 0x7e345bfbL, - 0x29768b43L, 0xc6dccb23L, 0xfc68b6edL, 0xf163b8e4L, - 0xdccad731L, 0x85104263L, 0x22401397L, 0x112084c6L, - 0x247d854aL, 0x3df8d2bbL, 0x3211aef9L, 0xa16dc729L, - 0x2f4b1d9eL, 0x30f3dcb2L, 0x52ec0d86L, 0xe3d077c1L, - 0x166c2bb3L, 0xb999a970L, 0x48fa1194L, 0x642247e9L, - 0x8cc4a8fcL, 0x3f1aa0f0L, 0x2cd8567dL, 0x90ef2233L, - 0x4ec78749L, 0xd1c1d938L, 0xa2fe8ccaL, 0x0b3698d4L, - 0x81cfa6f5L, 0xde28a57aL, 0x8e26dab7L, 0xbfa43fadL, - 0x9de42c3aL, 0x920d5078L, 0xcc9b6a5fL, 0x4662547eL, - 0x13c2f68dL, 0xb8e890d8L, 0xf75e2e39L, 0xaff582c3L, - 0x80be9f5dL, 0x937c69d0L, 0x2da96fd5L, 0x12b3cf25L, - 0x993bc8acL, 0x7da71018L, 0x636ee89cL, 0xbb7bdb3bL, - 0x7809cd26L, 0x18f46e59L, 0xb701ec9aL, 0x9aa8834fL, - 0x6e65e695L, 0xe67eaaffL, 0xcf0821bcL, 0xe8e6ef15L, - 0x9bd9bae7L, 0x36ce4a6fL, 0x09d4ea9fL, 0x7cd629b0L, - 0xb2af31a4L, 0x23312a3fL, 0x9430c6a5L, 0x66c035a2L, - 0xbc37744eL, 0xcaa6fc82L, 0xd0b0e090L, 0xd81533a7L, - 0x984af104L, 0xdaf741ecL, 0x500e7fcdL, 0xf62f1791L, - 0xd68d764dL, 0xb04d43efL, 0x4d54ccaaL, 0x04dfe496L, - 0xb5e39ed1L, 0x881b4c6aL, 0x1fb8c12cL, 0x517f4665L, - 0xea049d5eL, 0x355d018cL, 0x7473fa87L, 0x412efb0bL, - 0x1d5ab367L, 0xd25292dbL, 0x5633e910L, 0x47136dd6L, - 0x618c9ad7L, 0x0c7a37a1L, 0x148e59f8L, 0x3c89eb13L, - 0x27eecea9L, 0xc935b761L, 0xe5ede11cL, 0xb13c7a47L, - 0xdf599cd2L, 0x733f55f2L, 0xce791814L, 0x37bf73c7L, - 0xcdea53f7L, 0xaa5b5ffdL, 0x6f14df3dL, 0xdb867844L, - 0xf381caafL, 0xc43eb968L, 0x342c3824L, 0x405fc2a3L, - 0xc372161dL, 0x250cbce2L, 0x498b283cL, 0x9541ff0dL, - 0x017139a8L, 0xb3de080cL, 0xe49cd8b4L, 0xc1906456L, - 0x84617bcbL, 0xb670d532L, 0x5c74486cL, 0x5742d0b8L, + 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, + 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, + 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, + 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, + 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, + 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, + 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, + 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, + 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, + 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, + 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, + 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, + 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, + 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, + 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, + 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, + 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, + 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, + 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, + 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, + 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, + 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, + 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, + 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, + 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, + 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, + 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, + 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, + 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, + 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, + 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, + 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, + 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, + 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, + 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, + 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, + 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, + 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, + 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, + 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, + 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, + 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, + 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, + 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, + 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, + 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, + 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, + 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, + 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, + 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, + 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, + 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, + 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, + 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, + 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, + 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, + 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, + 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, + 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, + 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, + 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, + 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, + 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, + 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, ] Td3 = [ - 0xf4a75051L, 0x4165537eL, 0x17a4c31aL, 0x275e963aL, - 0xab6bcb3bL, 0x9d45f11fL, 0xfa58abacL, 0xe303934bL, - 0x30fa5520L, 0x766df6adL, 0xcc769188L, 0x024c25f5L, - 0xe5d7fc4fL, 0x2acbd7c5L, 0x35448026L, 0x62a38fb5L, - 0xb15a49deL, 0xba1b6725L, 0xea0e9845L, 0xfec0e15dL, - 0x2f7502c3L, 0x4cf01281L, 0x4697a38dL, 0xd3f9c66bL, - 0x8f5fe703L, 0x929c9515L, 0x6d7aebbfL, 0x5259da95L, - 0xbe832dd4L, 0x7421d358L, 0xe0692949L, 0xc9c8448eL, - 0xc2896a75L, 0x8e7978f4L, 0x583e6b99L, 0xb971dd27L, - 0xe14fb6beL, 0x88ad17f0L, 0x20ac66c9L, 0xce3ab47dL, - 0xdf4a1863L, 0x1a3182e5L, 0x51336097L, 0x537f4562L, - 0x6477e0b1L, 0x6bae84bbL, 0x81a01cfeL, 0x082b94f9L, - 0x48685870L, 0x45fd198fL, 0xde6c8794L, 0x7bf8b752L, - 0x73d323abL, 0x4b02e272L, 0x1f8f57e3L, 0x55ab2a66L, - 0xeb2807b2L, 0xb5c2032fL, 0xc57b9a86L, 0x3708a5d3L, - 0x2887f230L, 0xbfa5b223L, 0x036aba02L, 0x16825cedL, - 0xcf1c2b8aL, 0x79b492a7L, 0x07f2f0f3L, 0x69e2a14eL, - 0xdaf4cd65L, 0x05bed506L, 0x34621fd1L, 0xa6fe8ac4L, - 0x2e539d34L, 0xf355a0a2L, 0x8ae13205L, 0xf6eb75a4L, - 0x83ec390bL, 0x60efaa40L, 0x719f065eL, 0x6e1051bdL, - 0x218af93eL, 0xdd063d96L, 0x3e05aeddL, 0xe6bd464dL, - 0x548db591L, 0xc45d0571L, 0x06d46f04L, 0x5015ff60L, - 0x98fb2419L, 0xbde997d6L, 0x4043cc89L, 0xd99e7767L, - 0xe842bdb0L, 0x898b8807L, 0x195b38e7L, 0xc8eedb79L, - 0x7c0a47a1L, 0x420fe97cL, 0x841ec9f8L, 0x00000000L, - 0x80868309L, 0x2bed4832L, 0x1170ac1eL, 0x5a724e6cL, - 0x0efffbfdL, 0x8538560fL, 0xaed51e3dL, 0x2d392736L, - 0x0fd9640aL, 0x5ca62168L, 0x5b54d19bL, 0x362e3a24L, - 0x0a67b10cL, 0x57e70f93L, 0xee96d2b4L, 0x9b919e1bL, - 0xc0c54f80L, 0xdc20a261L, 0x774b695aL, 0x121a161cL, - 0x93ba0ae2L, 0xa02ae5c0L, 0x22e0433cL, 0x1b171d12L, - 0x090d0b0eL, 0x8bc7adf2L, 0xb6a8b92dL, 0x1ea9c814L, - 0xf1198557L, 0x75074cafL, 0x99ddbbeeL, 0x7f60fda3L, - 0x01269ff7L, 0x72f5bc5cL, 0x663bc544L, 0xfb7e345bL, - 0x4329768bL, 0x23c6dccbL, 0xedfc68b6L, 0xe4f163b8L, - 0x31dccad7L, 0x63851042L, 0x97224013L, 0xc6112084L, - 0x4a247d85L, 0xbb3df8d2L, 0xf93211aeL, 0x29a16dc7L, - 0x9e2f4b1dL, 0xb230f3dcL, 0x8652ec0dL, 0xc1e3d077L, - 0xb3166c2bL, 0x70b999a9L, 0x9448fa11L, 0xe9642247L, - 0xfc8cc4a8L, 0xf03f1aa0L, 0x7d2cd856L, 0x3390ef22L, - 0x494ec787L, 0x38d1c1d9L, 0xcaa2fe8cL, 0xd40b3698L, - 0xf581cfa6L, 0x7ade28a5L, 0xb78e26daL, 0xadbfa43fL, - 0x3a9de42cL, 0x78920d50L, 0x5fcc9b6aL, 0x7e466254L, - 0x8d13c2f6L, 0xd8b8e890L, 0x39f75e2eL, 0xc3aff582L, - 0x5d80be9fL, 0xd0937c69L, 0xd52da96fL, 0x2512b3cfL, - 0xac993bc8L, 0x187da710L, 0x9c636ee8L, 0x3bbb7bdbL, - 0x267809cdL, 0x5918f46eL, 0x9ab701ecL, 0x4f9aa883L, - 0x956e65e6L, 0xffe67eaaL, 0xbccf0821L, 0x15e8e6efL, - 0xe79bd9baL, 0x6f36ce4aL, 0x9f09d4eaL, 0xb07cd629L, - 0xa4b2af31L, 0x3f23312aL, 0xa59430c6L, 0xa266c035L, - 0x4ebc3774L, 0x82caa6fcL, 0x90d0b0e0L, 0xa7d81533L, - 0x04984af1L, 0xecdaf741L, 0xcd500e7fL, 0x91f62f17L, - 0x4dd68d76L, 0xefb04d43L, 0xaa4d54ccL, 0x9604dfe4L, - 0xd1b5e39eL, 0x6a881b4cL, 0x2c1fb8c1L, 0x65517f46L, - 0x5eea049dL, 0x8c355d01L, 0x877473faL, 0x0b412efbL, - 0x671d5ab3L, 0xdbd25292L, 0x105633e9L, 0xd647136dL, - 0xd7618c9aL, 0xa10c7a37L, 0xf8148e59L, 0x133c89ebL, - 0xa927eeceL, 0x61c935b7L, 0x1ce5ede1L, 0x47b13c7aL, - 0xd2df599cL, 0xf2733f55L, 0x14ce7918L, 0xc737bf73L, - 0xf7cdea53L, 0xfdaa5b5fL, 0x3d6f14dfL, 0x44db8678L, - 0xaff381caL, 0x68c43eb9L, 0x24342c38L, 0xa3405fc2L, - 0x1dc37216L, 0xe2250cbcL, 0x3c498b28L, 0x0d9541ffL, - 0xa8017139L, 0x0cb3de08L, 0xb4e49cd8L, 0x56c19064L, - 0xcb84617bL, 0x32b670d5L, 0x6c5c7448L, 0xb85742d0L, + 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, + 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, + 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, + 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, + 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, + 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, + 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, + 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, + 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, + 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, + 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, + 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, + 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, + 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, + 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, + 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, + 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, + 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, + 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, + 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, + 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, + 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, + 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, + 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, + 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, + 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, + 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, + 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, + 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, + 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, + 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, + 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, + 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, + 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, + 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, + 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, + 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, + 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, + 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, + 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, + 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, + 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, + 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, + 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, + 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, + 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, + 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, + 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, + 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, + 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, + 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, + 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, + 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, + 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, + 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, + 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, + 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, + 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, + 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, + 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, + 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, + 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, + 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, + 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, ] Td4 = [ - 0x52525252L, 0x09090909L, 0x6a6a6a6aL, 0xd5d5d5d5L, - 0x30303030L, 0x36363636L, 0xa5a5a5a5L, 0x38383838L, - 0xbfbfbfbfL, 0x40404040L, 0xa3a3a3a3L, 0x9e9e9e9eL, - 0x81818181L, 0xf3f3f3f3L, 0xd7d7d7d7L, 0xfbfbfbfbL, - 0x7c7c7c7cL, 0xe3e3e3e3L, 0x39393939L, 0x82828282L, - 0x9b9b9b9bL, 0x2f2f2f2fL, 0xffffffffL, 0x87878787L, - 0x34343434L, 0x8e8e8e8eL, 0x43434343L, 0x44444444L, - 0xc4c4c4c4L, 0xdedededeL, 0xe9e9e9e9L, 0xcbcbcbcbL, - 0x54545454L, 0x7b7b7b7bL, 0x94949494L, 0x32323232L, - 0xa6a6a6a6L, 0xc2c2c2c2L, 0x23232323L, 0x3d3d3d3dL, - 0xeeeeeeeeL, 0x4c4c4c4cL, 0x95959595L, 0x0b0b0b0bL, - 0x42424242L, 0xfafafafaL, 0xc3c3c3c3L, 0x4e4e4e4eL, - 0x08080808L, 0x2e2e2e2eL, 0xa1a1a1a1L, 0x66666666L, - 0x28282828L, 0xd9d9d9d9L, 0x24242424L, 0xb2b2b2b2L, - 0x76767676L, 0x5b5b5b5bL, 0xa2a2a2a2L, 0x49494949L, - 0x6d6d6d6dL, 0x8b8b8b8bL, 0xd1d1d1d1L, 0x25252525L, - 0x72727272L, 0xf8f8f8f8L, 0xf6f6f6f6L, 0x64646464L, - 0x86868686L, 0x68686868L, 0x98989898L, 0x16161616L, - 0xd4d4d4d4L, 0xa4a4a4a4L, 0x5c5c5c5cL, 0xccccccccL, - 0x5d5d5d5dL, 0x65656565L, 0xb6b6b6b6L, 0x92929292L, - 0x6c6c6c6cL, 0x70707070L, 0x48484848L, 0x50505050L, - 0xfdfdfdfdL, 0xededededL, 0xb9b9b9b9L, 0xdadadadaL, - 0x5e5e5e5eL, 0x15151515L, 0x46464646L, 0x57575757L, - 0xa7a7a7a7L, 0x8d8d8d8dL, 0x9d9d9d9dL, 0x84848484L, - 0x90909090L, 0xd8d8d8d8L, 0xababababL, 0x00000000L, - 0x8c8c8c8cL, 0xbcbcbcbcL, 0xd3d3d3d3L, 0x0a0a0a0aL, - 0xf7f7f7f7L, 0xe4e4e4e4L, 0x58585858L, 0x05050505L, - 0xb8b8b8b8L, 0xb3b3b3b3L, 0x45454545L, 0x06060606L, - 0xd0d0d0d0L, 0x2c2c2c2cL, 0x1e1e1e1eL, 0x8f8f8f8fL, - 0xcacacacaL, 0x3f3f3f3fL, 0x0f0f0f0fL, 0x02020202L, - 0xc1c1c1c1L, 0xafafafafL, 0xbdbdbdbdL, 0x03030303L, - 0x01010101L, 0x13131313L, 0x8a8a8a8aL, 0x6b6b6b6bL, - 0x3a3a3a3aL, 0x91919191L, 0x11111111L, 0x41414141L, - 0x4f4f4f4fL, 0x67676767L, 0xdcdcdcdcL, 0xeaeaeaeaL, - 0x97979797L, 0xf2f2f2f2L, 0xcfcfcfcfL, 0xcecececeL, - 0xf0f0f0f0L, 0xb4b4b4b4L, 0xe6e6e6e6L, 0x73737373L, - 0x96969696L, 0xacacacacL, 0x74747474L, 0x22222222L, - 0xe7e7e7e7L, 0xadadadadL, 0x35353535L, 0x85858585L, - 0xe2e2e2e2L, 0xf9f9f9f9L, 0x37373737L, 0xe8e8e8e8L, - 0x1c1c1c1cL, 0x75757575L, 0xdfdfdfdfL, 0x6e6e6e6eL, - 0x47474747L, 0xf1f1f1f1L, 0x1a1a1a1aL, 0x71717171L, - 0x1d1d1d1dL, 0x29292929L, 0xc5c5c5c5L, 0x89898989L, - 0x6f6f6f6fL, 0xb7b7b7b7L, 0x62626262L, 0x0e0e0e0eL, - 0xaaaaaaaaL, 0x18181818L, 0xbebebebeL, 0x1b1b1b1bL, - 0xfcfcfcfcL, 0x56565656L, 0x3e3e3e3eL, 0x4b4b4b4bL, - 0xc6c6c6c6L, 0xd2d2d2d2L, 0x79797979L, 0x20202020L, - 0x9a9a9a9aL, 0xdbdbdbdbL, 0xc0c0c0c0L, 0xfefefefeL, - 0x78787878L, 0xcdcdcdcdL, 0x5a5a5a5aL, 0xf4f4f4f4L, - 0x1f1f1f1fL, 0xddddddddL, 0xa8a8a8a8L, 0x33333333L, - 0x88888888L, 0x07070707L, 0xc7c7c7c7L, 0x31313131L, - 0xb1b1b1b1L, 0x12121212L, 0x10101010L, 0x59595959L, - 0x27272727L, 0x80808080L, 0xececececL, 0x5f5f5f5fL, - 0x60606060L, 0x51515151L, 0x7f7f7f7fL, 0xa9a9a9a9L, - 0x19191919L, 0xb5b5b5b5L, 0x4a4a4a4aL, 0x0d0d0d0dL, - 0x2d2d2d2dL, 0xe5e5e5e5L, 0x7a7a7a7aL, 0x9f9f9f9fL, - 0x93939393L, 0xc9c9c9c9L, 0x9c9c9c9cL, 0xefefefefL, - 0xa0a0a0a0L, 0xe0e0e0e0L, 0x3b3b3b3bL, 0x4d4d4d4dL, - 0xaeaeaeaeL, 0x2a2a2a2aL, 0xf5f5f5f5L, 0xb0b0b0b0L, - 0xc8c8c8c8L, 0xebebebebL, 0xbbbbbbbbL, 0x3c3c3c3cL, - 0x83838383L, 0x53535353L, 0x99999999L, 0x61616161L, - 0x17171717L, 0x2b2b2b2bL, 0x04040404L, 0x7e7e7e7eL, - 0xbabababaL, 0x77777777L, 0xd6d6d6d6L, 0x26262626L, - 0xe1e1e1e1L, 0x69696969L, 0x14141414L, 0x63636363L, - 0x55555555L, 0x21212121L, 0x0c0c0c0cL, 0x7d7d7d7dL, + 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, + 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, + 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, + 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, + 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, + 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, + 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, + 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, + 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, + 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, + 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, + 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, + 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, + 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, + 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, + 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, + 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, + 0x86868686, 0x68686868, 0x98989898, 0x16161616, + 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, + 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, + 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, + 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, + 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, + 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, + 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, + 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, + 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, + 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, + 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, + 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, + 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, + 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, + 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, + 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, + 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, + 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, + 0x96969696, 0xacacacac, 0x74747474, 0x22222222, + 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, + 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, + 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, + 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, + 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, + 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, + 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, + 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, + 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, + 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, + 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, + 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, + 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, + 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, + 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, + 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, + 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, + 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, + 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, + 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, + 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, + 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, + 0x83838383, 0x53535353, 0x99999999, 0x61616161, + 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, + 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, + 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, + 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, ] rcon = [ @@ -806,7 +806,7 @@ def rijndaelSetupDecrypt(key, keybits): j -= 4 # apply the inverse MixColumn transform to all round keys but the first and the last: p = 0 - for i in xrange(1, nrounds): + for i in range(1, nrounds): p += 4 rk[p+0] = ( Td0[Te4[(rk[p+0] >> 24) ] & 0xff] ^ @@ -833,7 +833,7 @@ def rijndaelSetupDecrypt(key, keybits): def rijndaelEncrypt(rk, nrounds, plaintext): - assert len(plaintext) == 16 + assert len(plaintext) == 16, str(len(plaintext)) # map byte array block to cipher state # and add initial round key: @@ -931,12 +931,12 @@ def rijndaelEncrypt(rk, nrounds, plaintext): rk[p+3]) ciphertext += PUTU32(s3) - assert len(ciphertext) == 16 + assert len(ciphertext) == 16, str(len(ciphertext)) return ciphertext def rijndaelDecrypt(rk, nrounds, ciphertext): - assert len(ciphertext) == 16 + assert len(ciphertext) == 16, str(len(ciphertext)) # map byte array block to cipher state # and add initial round key: @@ -1034,7 +1034,7 @@ def rijndaelDecrypt(rk, nrounds, ciphertext): rk[p+3]) plaintext += PUTU32(s3) - assert len(plaintext) == 16 + assert len(plaintext) == 16, str(len(plaintext)) return plaintext @@ -1049,39 +1049,27 @@ class RijndaelDecryptor(object): """ def __init__(self, key, keybits=256): - assert len(key) == KEYLENGTH(keybits) + assert len(key) == KEYLENGTH(keybits), str((len(key), KEYLENGTH(keybits))) (self.rk, self.nrounds) = rijndaelSetupDecrypt(key, keybits) - assert len(self.rk) == RKLENGTH(keybits) - assert self.nrounds == NROUNDS(keybits) + assert len(self.rk) == RKLENGTH(keybits), str((len(self.rk), RKLENGTH(keybits))) + assert self.nrounds == NROUNDS(keybits), str((self.nrounds, NROUNDS(keybits))) return def decrypt(self, ciphertext): - assert len(ciphertext) == 16 + assert len(ciphertext) == 16, str(len(ciphertext)) return rijndaelDecrypt(self.rk, self.nrounds, ciphertext) # encrypt(key, fin, fout, keybits=256) class RijndaelEncryptor(object): - """ - >>> key = b'00010203050607080a0b0c0d0f101112'.decode('hex') - >>> plaintext = b'506812a45f08c889b97f5980038b8359'.decode('hex') - >>> RijndaelEncryptor(key, 128).encrypt(plaintext).encode('hex') - 'd8f532538289ef7d06b506a4fd5be9c9' - """ - def __init__(self, key, keybits=256): - assert len(key) == KEYLENGTH(keybits) + assert len(key) == KEYLENGTH(keybits), str((len(key), KEYLENGTH(keybits))) (self.rk, self.nrounds) = rijndaelSetupEncrypt(key, keybits) - assert len(self.rk) == RKLENGTH(keybits) - assert self.nrounds == NROUNDS(keybits) + assert len(self.rk) == RKLENGTH(keybits), str((len(self.rk), RKLENGTH(keybits))) + assert self.nrounds == NROUNDS(keybits), str((self.nrounds, NROUNDS(keybits))) return def encrypt(self, plaintext): - assert len(plaintext) == 16 + assert len(plaintext) == 16, str(len(plaintext)) return rijndaelEncrypt(self.rk, self.nrounds, plaintext) - - -if __name__ == '__main__': - import doctest - doctest.testmod() diff --git a/pdfminer/runlength.py b/pdfminer/runlength.py index ba7b7421..54bc7691 100644 --- a/pdfminer/runlength.py +++ b/pdfminer/runlength.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + # # RunLength decoder (Adobe version) implementation based on PDF Reference # version 1.4 section 3.3.4. @@ -6,6 +6,8 @@ # * public domain * # +import six #Python 2+3 compatibility + def rldecode(data): """ RunLength decoder (Adobe version) implementation based on PDF Reference @@ -19,30 +21,24 @@ def rldecode(data): 129 to 255, the following single byte is to be copied 257 - length (2 to 128) times during decompression. A length value of 128 denotes EOD. - >>> s = b'\x05123456\xfa7\x04abcde\x80junk' - >>> rldecode(s) - '1234567777777abcde' """ - decoded = [] + decoded = b'' i = 0 while i < len(data): #print 'data[%d]=:%d:' % (i,ord(data[i])) - length = ord(data[i]) + length = six.indexbytes(data,i) if length == 128: break if length >= 0 and length < 128: - run = data[i+1:(i+1)+(length+1)] + for j in range(i+1,(i+1)+(length+1)): + decoded+=six.int2byte(six.indexbytes(data,j)) #print 'length=%d, run=%s' % (length+1,run) - decoded.append(run) + i = (i+1) + (length+1) if length > 128: - run = data[i+1]*(257-length) + run = six.int2byte(six.indexbytes(data,i+1))*(257-length) #print 'length=%d, run=%s' % (257-length,run) - decoded.append(run) + decoded+=run i = (i+1) + 1 - return b''.join(decoded) - + return decoded -if __name__ == '__main__': - import doctest - doctest.testmod() diff --git a/pdfminer/settings.py b/pdfminer/settings.py new file mode 100644 index 00000000..2dd99c05 --- /dev/null +++ b/pdfminer/settings.py @@ -0,0 +1,8 @@ +STRICT = False + +try: + from django.conf import settings + STRICT = getattr(settings, 'PDF_MINER_IS_STRICT', STRICT) +except Exception: + # in case it's not a django project + pass diff --git a/pdfminer/utils.py b/pdfminer/utils.py index 307c5e79..339759a1 100644 --- a/pdfminer/utils.py +++ b/pdfminer/utils.py @@ -1,49 +1,91 @@ -#!/usr/bin/env python + """ Miscellaneous Routines. """ import struct -from sys import maxint as INF +# from sys import maxint as INF #doesn't work anymore under Python3, +# but PDF still uses 32 bits ints +INF = (1<<31) - 1 + +import six #Python 2+3 compatibility +if six.PY3: + import chardet # For str encoding detection in Py3 + unicode = str + +def make_compat_bytes(in_str): + "In Py2, does nothing. In Py3, converts to bytes, encoding to unicode." + assert isinstance(in_str, str), str(type(in_str)) + if six.PY2: + return in_str + else: + return in_str.encode() + +def make_compat_str(in_str): + "In Py2, does nothing. In Py3, converts to string, guessing encoding." + assert isinstance(in_str, (bytes, str, unicode)), str(type(in_str)) + if six.PY3 and isinstance(in_str, bytes): + enc = chardet.detect(in_str) + in_str = in_str.decode(enc['encoding']) + return in_str + +def compatible_encode_method(bytesorstring, encoding='utf-8', erraction='ignore'): + "When Py2 str.encode is called, it often means bytes.encode in Py3. This does either." + if six.PY2: + assert isinstance(bytesorstring, (str, unicode)), str(type(bytesorstring)) + return bytesorstring.encode(encoding, erraction) + if six.PY3: + if isinstance(bytesorstring, str): return bytesorstring + assert isinstance(bytesorstring, bytes), str(type(bytesorstring)) + return bytesorstring.decode(encoding, erraction) ## PNG Predictor ## def apply_png_predictor(pred, colors, columns, bitspercomponent, data): if bitspercomponent != 8: # unsupported - raise ValueError("Unsupported `bitspercomponent': %d"%bitspercomponent) - nbytes = colors*columns*bitspercomponent//8 + raise ValueError("Unsupported `bitspercomponent': %d" % + bitspercomponent) + nbytes = colors * columns * bitspercomponent // 8 i = 0 buf = b'' line0 = b'\x00' * columns - for i in xrange(0, len(data), nbytes+1): + for i in range(0, len(data), nbytes+1): ft = data[i] + if six.PY2: + ft = six.byte2int(ft) i += 1 line1 = data[i:i+nbytes] line2 = b'' - if ft == b'\x00': + if ft == 0: # PNG none line2 += line1 - elif ft == b'\x01': + elif ft == 1: # PNG sub (UNTESTED) c = 0 for b in line1: - c = (c+ord(b)) & 255 - line2 += chr(c) - elif ft == b'\x02': + if six.PY2: + b = six.byte2int(b) + c = (c+b) & 255 + line2 += six.int2byte(c) + elif ft == 2: # PNG up for (a, b) in zip(line0, line1): - c = (ord(a)+ord(b)) & 255 - line2 += chr(c) - elif ft == b'\x03': + if six.PY2: + a, b = six.byte2int(a), six.byte2int(b) + c = (a+b) & 255 + line2 += six.int2byte(c) + elif ft == 3: # PNG average (UNTESTED) c = 0 for (a, b) in zip(line0, line1): - c = ((c+ord(a)+ord(b))//2) & 255 - line2 += chr(c) + if six.PY2: + a, b = six.byte2int(a), six.byte2int(b) + c = ((c+a+b)//2) & 255 + line2 += six.int2byte(c) else: # unsupported - raise ValueError("Unsupported predictor value: %d"%ft) + raise ValueError("Unsupported predictor value: %d" % ft) buf += line2 line0 = line2 return buf @@ -89,7 +131,7 @@ def apply_matrix_norm(m, v): # isnumber def isnumber(x): - return isinstance(x, (int, long, float)) + return isinstance(x, (six.integer_types, float)) # uniq def uniq(objs): @@ -126,8 +168,8 @@ def fsplit(pred, objs): # drange def drange(v0, v1, d): """Returns a discrete range.""" - assert v0 < v1 - return xrange(int(v0)//d, int(v1+d)//d) + assert v0 < v1, str((v0, v1, d)) + return range(int(v0)//d, int(v1+d)//d) # get_bound @@ -167,7 +209,7 @@ def choplist(n, seq): # nunpack def nunpack(s, default=0): - """Unpacks 1 to 4 byte integers (big endian).""" + """Unpacks 1 to 4 or 8 byte integers (big endian).""" l = len(s) if not l: return default @@ -179,12 +221,14 @@ def nunpack(s, default=0): return struct.unpack('>L', b'\x00'+s)[0] elif l == 4: return struct.unpack('>L', s)[0] + elif l == 8: + return struct.unpack('>Q', s)[0] else: raise TypeError('invalid length: %d' % l) # decode_text -PDFDocEncoding = ''.join(unichr(x) for x in ( +PDFDocEncoding = ''.join(six.unichr(x) for x in ( 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0017, 0x0017, @@ -223,16 +267,20 @@ def nunpack(s, default=0): def decode_text(s): """Decodes a PDFDocEncoding string to Unicode.""" if s.startswith(b'\xfe\xff'): - return unicode(s[2:], 'utf-16be', 'ignore') + return six.text_type(s[2:], 'utf-16be', 'ignore') else: - return ''.join(PDFDocEncoding[ord(c)] for c in s) + return ''.join(PDFDocEncoding[c] for c in s) # enc def enc(x, codec='ascii'): """Encodes a string for SGML/XML/HTML""" + if isinstance(x, bytes): + return '' x = x.replace('&', '&').replace('>', '>').replace('<', '<').replace('"', '"') - return x.encode(codec, 'xmlcharrefreplace') + if codec: + x = x.encode(codec, 'xmlcharrefreplace') + return x def bbox2str(bbox): diff --git a/samples/Makefile b/samples/Makefile index ed848651..4d76cb6c 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -4,8 +4,10 @@ RM=rm -f CMP=: ECHO=echo PYTHON=python2 +PYTHON3=python3 PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1 -V +PDF2TXT3=PYTHONPATH=.. $(PYTHON3) ../tools/pdf2txt.py -p1 -V FREE= \ simple1 \ @@ -45,6 +47,12 @@ tests: $(CMP) $$i.xml $$i.xml.ref || exit 1; \ $(PDF2TXT) -t text -o $$i.txt $$i.pdf || exit 1; \ $(CMP) $$i.txt $$i.txt.ref || exit 1; \ + $(PDF2TXT3) -t html -o $$i.html $$i.pdf || exit 1; \ + $(CMP) $$i.html $$i.html.ref || exit 1; \ + $(PDF2TXT3) -t xml -o $$i.xml $$i.pdf || exit 1; \ + $(CMP) $$i.xml $$i.xml.ref || exit 1; \ + $(PDF2TXT3) -t text -o $$i.txt $$i.pdf || exit 1; \ + $(CMP) $$i.txt $$i.txt.ref || exit 1; \ done crypts: @@ -54,6 +62,10 @@ crypts: $(CMP) $$i.1.xml $(CRYPT_BASE).xml || exit 1; \ $(PDF2TXT) -t xml -P $(CRYPT_PASS2) -o $$i.2.xml $$i.pdf || exit 1; \ $(CMP) $$i.2.xml $(CRYPT_BASE).xml || exit 1; \ + $(PDF2TXT3) -t xml -P $(CRYPT_PASS1) -o $$i.1.xml $$i.pdf || exit 1; \ + $(CMP) $$i.1.xml $(CRYPT_BASE).xml || exit 1; \ + $(PDF2TXT3) -t xml -P $(CRYPT_PASS2) -o $$i.2.xml $$i.pdf || exit 1; \ + $(CMP) $$i.2.xml $(CRYPT_BASE).xml || exit 1; \ done test: diff --git a/samples/contrib/2b.pdf b/samples/contrib/2b.pdf new file mode 100644 index 00000000..6a806760 Binary files /dev/null and b/samples/contrib/2b.pdf differ diff --git a/samples/contrib/stamp-no.pdf b/samples/contrib/stamp-no.pdf new file mode 100644 index 00000000..2dfb7a15 Binary files /dev/null and b/samples/contrib/stamp-no.pdf differ diff --git a/samples/nonfree/175.pdf b/samples/nonfree/175.pdf new file mode 100644 index 00000000..d5df9230 Binary files /dev/null and b/samples/nonfree/175.pdf differ diff --git a/setup.py b/setup.py index 51779e78..8eab391c 100644 --- a/setup.py +++ b/setup.py @@ -1,44 +1,47 @@ -#!/usr/bin/env python -from distutils.core import setup -from pdfminer import __version__ +from setuptools import setup +import sys + +import pdfminer as package + +requires = ['six', 'pycryptodome'] +if sys.version_info >= (3, 0): + requires.append('chardet') setup( - name='pdfminer', - version=__version__, + name='pdfminer.six', + version=package.__version__, + packages=['pdfminer'], + package_data={'pdfminer': ['cmap/*.pickle.gz']}, + install_requires=requires, description='PDF parser and analyzer', - long_description='''PDFMiner is a tool for extracting information from PDF documents. -Unlike other PDF-related tools, it focuses entirely on getting -and analyzing text data. PDFMiner allows to obtain -the exact location of texts in a page, as well as -other information such as fonts or lines. -It includes a PDF converter that can transform PDF files -into other text formats (such as HTML). It has an extensible -PDF parser that can be used for other purposes instead of text analysis.''', + long_description=package.__doc__, license='MIT/X', - author='Yusuke Shinyama', - author_email='yusuke at cs dot nyu dot edu', - url='http://euske.github.io/pdfminer/index.html', - install_requires=[ - 'pycrypto', - ], - packages=[ - 'pdfminer', - ], - package_data={ - 'pdfminer': ['cmap/*.pickle.gz'] - }, + author='Yusuke Shinyama + Philippe Guglielmetti', + author_email='pdfminer@goulu.net', + url='https://github.com/pdfminer/pdfminer.six', scripts=[ - 'tools/pdf2txt.py', - 'tools/dumppdf.py', - 'tools/latin2ascii.py', + 'tools/pdf2txt.py', + 'tools/dumppdf.py', + 'tools/latin2ascii.py', + ], + keywords=[ + 'pdf parser', + 'pdf converter', + 'layout analysis', + 'text mining', ], - keywords=['pdf parser', 'pdf converter', 'layout analysis', 'text mining'], classifiers=[ - 'Development Status :: 4 - Beta', - 'Environment :: Console', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', - 'Topic :: Text Processing', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Topic :: Text Processing', ], - ) +) diff --git a/tests/test_pdfminer_ccitt.py b/tests/test_pdfminer_ccitt.py new file mode 100644 index 00000000..9f27833e --- /dev/null +++ b/tests/test_pdfminer_ccitt.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python + +# -*- coding: utf-8 -*- + +from nose.tools import assert_equal, assert_true, assert_false +from nose import SkipTest +import nose + +import logging + +from pdfminer.ccitt import * + +## Test cases +## +class TestCCITTG4Parser(): + + def get_parser(self, bits): + parser = CCITTG4Parser(len(bits)) + parser._curline = [int(c) for c in bits] + parser._reset_line() + return parser + + def test_b1(self): + parser = self.get_parser('00000') + parser._do_vertical(0) + assert_equal(parser._curpos, 0) + return + + def test_b2(self): + parser = self.get_parser('10000') + parser._do_vertical(-1) + assert_equal(parser._curpos, 0) + return + + def test_b3(self): + parser = self.get_parser('000111') + parser._do_pass() + assert_equal(parser._curpos, 3) + assert_equal(parser._get_bits(), '111') + return + + def test_b4(self): + parser = self.get_parser('00000') + parser._do_vertical(+2) + assert_equal(parser._curpos, 2) + assert_equal(parser._get_bits(), '11') + return + + def test_b5(self): + parser = self.get_parser('11111111100') + parser._do_horizontal(0, 3) + assert_equal(parser._curpos, 3) + parser._do_vertical(1) + assert_equal(parser._curpos, 10) + assert_equal(parser._get_bits(), '0001111111') + return + + def test_e1(self): + parser = self.get_parser('10000') + parser._do_vertical(0) + assert_equal(parser._curpos, 1) + parser._do_vertical(0) + assert_equal(parser._curpos, 5) + assert_equal(parser._get_bits(), '10000') + return + + def test_e2(self): + parser = self.get_parser('10011') + parser._do_vertical(0) + assert_equal(parser._curpos, 1) + parser._do_vertical(2) + assert_equal(parser._curpos, 5) + assert_equal(parser._get_bits(), '10000') + return + + def test_e3(self): + parser = self.get_parser('011111') + parser._color = 0 + parser._do_vertical(0) + assert_equal(parser._color, 1) + assert_equal(parser._curpos, 1) + parser._do_vertical(-2) + assert_equal(parser._color, 0) + assert_equal(parser._curpos, 4) + parser._do_vertical(0) + assert_equal(parser._curpos, 6) + assert_equal(parser._get_bits(), '011100') + return + + def test_e4(self): + parser = self.get_parser('10000') + parser._do_vertical(0) + assert_equal(parser._curpos, 1) + parser._do_vertical(-2) + assert_equal(parser._curpos, 3) + parser._do_vertical(0) + assert_equal(parser._curpos, 5) + assert_equal(parser._get_bits(), '10011') + return + + def test_e5(self): + parser = self.get_parser('011000') + parser._color = 0 + parser._do_vertical(0) + assert_equal(parser._curpos, 1) + parser._do_vertical(3) + assert_equal(parser._curpos, 6) + assert_equal(parser._get_bits(), '011111') + return + + def test_e6(self): + parser = self.get_parser('11001') + parser._do_pass() + assert_equal(parser._curpos, 4) + parser._do_vertical(0) + assert_equal(parser._curpos, 5) + assert_equal(parser._get_bits(), '11111') + return + + def test_e7(self): + parser = self.get_parser('0000000000') + parser._curpos = 2 + parser._color = 1 + parser._do_horizontal(2, 6) + assert_equal(parser._curpos, 10) + assert_equal(parser._get_bits(), '1111000000') + return + + def test_e8(self): + parser = self.get_parser('001100000') + parser._curpos = 1 + parser._color = 0 + parser._do_vertical(0) + assert_equal(parser._curpos, 2) + parser._do_horizontal(7, 0) + assert_equal(parser._curpos, 9) + assert_equal(parser._get_bits(), '101111111') + return + + def test_m1(self): + parser = self.get_parser('10101') + parser._do_pass() + assert_equal(parser._curpos, 2) + parser._do_pass() + assert_equal(parser._curpos, 4) + assert_equal(parser._get_bits(), '1111') + return + + def test_m2(self): + parser = self.get_parser('101011') + parser._do_vertical(-1) + parser._do_vertical(-1) + parser._do_vertical(1) + parser._do_horizontal(1, 1) + assert_equal(parser._get_bits(), '011101') + return + + def test_m3(self): + parser = self.get_parser('10111011') + parser._do_vertical(-1) + parser._do_pass() + parser._do_vertical(1) + parser._do_vertical(1) + assert_equal(parser._get_bits(), '00000001') + return + +if __name__ == '__main__': + nose.runmodule() \ No newline at end of file diff --git a/tests/test_pdfminer_crypto.py b/tests/test_pdfminer_crypto.py new file mode 100644 index 00000000..fdabd006 --- /dev/null +++ b/tests/test_pdfminer_crypto.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from nose.tools import assert_equal +from nose import SkipTest +import nose + +#test of various compression/encoding modules (previously in doctests): +from pdfminer.ascii85 import * +from pdfminer.arcfour import * +from pdfminer.lzw import * +from pdfminer.runlength import * +from pdfminer.rijndael import * + +import binascii +def hex(b): return binascii.hexlify(b) #encode('hex') +def dehex(b): return binascii.unhexlify(b) #decode('hex') + +class TestAscii85(): + def test_ascii85decode(self): + #The sample string is taken from: http://en.wikipedia.org/w/index.php?title=Ascii85 + assert_equal(ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q'),b'Man is distinguished') + assert_equal(ascii85decode(b'E,9)oF*2M7/c~>'),b'pleasure.') + + def test_asciihexdecode(self): + assert_equal(asciihexdecode(b'61 62 2e6364 65'),b'ab.cde') + assert_equal(asciihexdecode(b'61 62 2e6364 657>'),b'ab.cdep') + assert_equal(asciihexdecode(b'7>'),b'p') + +class TestArcfour(): + def test(self): + + assert_equal(hex(Arcfour(b'Key').process(b'Plaintext')),b'bbf316e8d940af0ad3') + assert_equal(hex(Arcfour(b'Wiki').process(b'pedia')),b'1021bf0420') + assert_equal(hex(Arcfour(b'Secret').process(b'Attack at dawn')),b'45a01f645fc35b383552544b9bf5') + +class TestLzw(): + def test_lzwdecode(self): + assert_equal(lzwdecode(b'\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01'),b'\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42') + +class TestRunlength(): + def test_rldecode(self): + assert_equal(rldecode(b'\x05123456\xfa7\x04abcde\x80junk'),b'1234567777777abcde') + +class TestRijndaelEncryptor(): + def test_RijndaelEncryptor(self): + key = dehex(b'00010203050607080a0b0c0d0f101112') + plaintext = dehex(b'506812a45f08c889b97f5980038b8359') + assert_equal(hex(RijndaelEncryptor(key, 128).encrypt(plaintext)),b'd8f532538289ef7d06b506a4fd5be9c9') + +if __name__ == '__main__': + nose.runmodule() \ No newline at end of file diff --git a/tests/test_pdfminer_psparser.py b/tests/test_pdfminer_psparser.py new file mode 100644 index 00000000..2fbae8eb --- /dev/null +++ b/tests/test_pdfminer_psparser.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +# -*- coding: utf-8 -*- + +from nose.tools import assert_equal, assert_true, assert_false +from nose import SkipTest +import nose + +import logging + +from pdfminer.psparser import * + +## Simplistic Test cases +## +class TestPSBaseParser: + + TESTDATA = br'''%!PS +begin end + " @ # +/a/BCD /Some_Name /foo#5f#xbaa +0 +1 -2 .5 1.234 +(abc) () (abc ( def ) ghi) +(def\040\0\0404ghi) (bach\\slask) (foo\nbaa) +(this % is not a comment.) +(foo +baa) +(foo\ +baa) +<> <20> < 40 4020 > + +func/a/b{(c)do*}def +[ 1 (z) ! ] +<< /foo (bar) >> +''' + + TOKENS = [ + (5, KWD(b'begin')), (11, KWD(b'end')), (16, KWD(b'"')), (19, KWD(b'@')), + (21, KWD(b'#')), (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')), + (41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5), + (65, 1.234), (71, b'abc'), (77, b''), (80, b'abc ( def ) ghi'), + (98, b'def \x00 4ghi'), (118, b'bach\\slask'), (132, b'foo\nbaa'), + (143, b'this % is not a comment.'), (170, b'foo\nbaa'), (180, b'foobaa'), + (191, b''), (194, b' '), (199, b'@@ '), (211, b'\xab\xcd\x00\x124\x05'), + (226, KWD(b'func')), (230, LIT('a')), (232, LIT('b')), + (234, KWD(b'{')), (235, b'c'), (238, KWD(b'do*')), (241, KWD(b'}')), + (242, KWD(b'def')), (246, KWD(b'[')), (248, 1), (250, b'z'), (254, KWD(b'!')), + (256, KWD(b']')), (258, KWD(b'<<')), (261, LIT('foo')), (266, b'bar'), + (272, KWD(b'>>')) + ] + + OBJS = [ + (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')), + (41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5), + (65, 1.234), (71, b'abc'), (77, b''), (80, b'abc ( def ) ghi'), + (98, b'def \x00 4ghi'), (118, b'bach\\slask'), (132, b'foo\nbaa'), + (143, b'this % is not a comment.'), (170, b'foo\nbaa'), (180, b'foobaa'), + (191, b''), (194, b' '), (199, b'@@ '), (211, b'\xab\xcd\x00\x124\x05'), + (230, LIT('a')), (232, LIT('b')), (234, [b'c']), (246, [1, b'z']), + (258, {'foo': b'bar'}), + ] + + def get_tokens(self, s): + from io import BytesIO + + class MyParser(PSBaseParser): + def flush(self): + self.add_results(*self.popall()) + parser = MyParser(BytesIO(s)) + r = [] + try: + while True: + r.append(parser.nexttoken()) + except PSEOF: + pass + return r + + def get_objects(self, s): + from io import BytesIO + + class MyParser(PSStackParser): + def flush(self): + self.add_results(*self.popall()) + parser = MyParser(BytesIO(s)) + r = [] + try: + while True: + r.append(parser.nextobject()) + except PSEOF: + pass + return r + + def test_1(self): + tokens = self.get_tokens(self.TESTDATA) + logging.info(tokens) + assert_equal(tokens, self.TOKENS) + return + + def test_2(self): + objs = self.get_objects(self.TESTDATA) + logging.info(objs) + assert_equal(objs, self.OBJS) + return + +if __name__ == '__main__': + #import logging,sys,os,six + #logging.basicConfig(level=logging.DEBUG, filename='%s_%d.%d.log'%(os.path.basename(__file__),sys.version_info[0],sys.version_info[1])) + nose.runmodule() \ No newline at end of file diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py new file mode 100644 index 00000000..87d74d2d --- /dev/null +++ b/tests/test_tools_dumppdf.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +# -*- coding: utf-8 -*- +import six + +import nose, logging, os + +if six.PY3: + from tools import dumppdf +elif six.PY2: + import os, sys + sys.path.append(os.path.abspath(os.path.curdir)) + import tools.dumppdf as dumppdf + +path=os.path.dirname(os.path.abspath(__file__))+'/' + +def run(datapath,filename,options=None): + i=path+datapath+filename+'.pdf' + o=path+filename+'.xml' + if options: + s='dumppdf -o%s %s %s'%(o,options,i) + else: + s='dumppdf -o%s %s'%(o,i) + dumppdf.main(s.split(' ')) + +class TestDumpPDF(): + + + def test_1(self): + run('../samples/','jo','-t -a') + run('../samples/','simple1','-t -a') + run('../samples/','simple2','-t -a') + run('../samples/','simple3','-t -a') + + def test_2(self): + run('../samples/nonfree/','dmca','-t -a') + + def test_3(self): + run('../samples/nonfree/','f1040nr') + + def test_4(self): + run('../samples/nonfree/','i1040nr') + + def test_5(self): + run('../samples/nonfree/','kampo','-t -a') + + def test_6(self): + run('../samples/nonfree/','naacl06-shinyama','-t -a') + +if __name__ == '__main__': + #import logging,sys,os,six + #logging.basicConfig(level=logging.DEBUG, filename='%s_%d.%d.log'%(os.path.basename(__file__),sys.version_info[0],sys.version_info[1])) + nose.runmodule() diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py new file mode 100644 index 00000000..70e6cf9c --- /dev/null +++ b/tests/test_tools_pdf2txt.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +# -*- coding: utf-8 -*- + +import nose, logging, os + +import tools.pdf2txt as pdf2txt + +path=os.path.dirname(os.path.abspath(__file__))+'/' + +def run(datapath,filename,options=None): + i=path+datapath+filename+'.pdf' + o=path+filename+'.txt' + if options: + s='pdf2txt -o%s %s %s'%(o,options,i) + else: + s='pdf2txt -o%s %s'%(o,i) + pdf2txt.main(s.split(' ')[1:]) + +class TestDumpPDF(): + + def test_1(self): + run('../samples/','jo') + run('../samples/','simple1') + run('../samples/','simple2') + run('../samples/','simple3') + + def test_2(self): + run('../samples/nonfree/','dmca') + + def test_3(self): + run('../samples/nonfree/','f1040nr') + + def test_4(self): + run('../samples/nonfree/','i1040nr') + + def test_5(self): + run('../samples/nonfree/','kampo') + + def test_6(self): + run('../samples/nonfree/','naacl06-shinyama') + + # this test works on Windows but on Linux & Travis-CI it says + # PDFSyntaxError: No /Root object! - Is this really a PDF? + # TODO: Find why + """ + def test_7(self): + run('../samples/contrib/','stamp-no') + """ + + def test_8(self): + run('../samples/contrib/','2b','-A -t xml') + + def test_9(self): + run('../samples/nonfree/','175') # https://github.com/pdfminer/pdfminer.six/issues/65 +if __name__ == '__main__': + nose.runmodule() diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/conv_afm.py b/tools/conv_afm.py index 846e2553..2402a8ea 100755 --- a/tools/conv_afm.py +++ b/tools/conv_afm.py @@ -1,4 +1,5 @@ #!/usr/bin/env python + import sys import fileinput diff --git a/tools/conv_cmap.py b/tools/conv_cmap.py index 88cab575..dea750a4 100755 --- a/tools/conv_cmap.py +++ b/tools/conv_cmap.py @@ -1,9 +1,12 @@ #!/usr/bin/env python + import sys try: import cPickle as pickle except ImportError: import pickle as pickle +import codecs +import six ## CMapConverter @@ -50,20 +53,23 @@ def load(self, fp): if not line: continue values = line.split('\t') if encs is None: - assert values[0] == 'CID' + assert values[0] == 'CID', str(values) encs = values continue def put(dmap, code, cid, force=False): for b in code[:-1]: - b = ord(b) + if six.PY2: + b = ord(b) if b in dmap: dmap = dmap[b] else: d = {} dmap[b] = d dmap = d - b = ord(code[-1]) + b = code[-1] + if six.PY2: + b = ord(b) if force or ((b not in dmap) or dmap[b] == cid): dmap[b] = cid return @@ -83,8 +89,8 @@ def add(unimap, enc, code): return def pick(unimap): - chars = unimap.items() - chars.sort(key=(lambda (c,n):(n,-ord(c))), reverse=True) + chars = list(unimap.items()) + chars.sort(key=(lambda x:(x[1],-ord(x[0]))), reverse=True) (c,_) = chars[0] return c @@ -103,7 +109,7 @@ def pick(unimap): if vertical: code = code[:-1] try: - code = code.decode('hex') + code = codecs.decode(code, 'hex_codec') except: code = chr(int(code, 16)) if vertical: @@ -138,7 +144,7 @@ def dump_cmap(self, fp, enc): IS_VERTICAL=self.is_vertical.get(enc, False), CODE2CID=self.code2cid.get(enc), ) - fp.write(pickle.dumps(data)) + fp.write(pickle.dumps(data, 2)) return def dump_unicodemap(self, fp): @@ -146,7 +152,7 @@ def dump_unicodemap(self, fp): CID2UNICHR_H=self.cid2unichr_h, CID2UNICHR_V=self.cid2unichr_v, ) - fp.write(pickle.dumps(data)) + fp.write(pickle.dumps(data, 2)) return # main @@ -175,7 +181,7 @@ def usage(): converter = CMapConverter(enc2codec) for path in args: print ('reading: %r...' % path) - fp = file(path) + fp = open(path) converter.load(fp) fp.close() diff --git a/tools/conv_glyphlist.py b/tools/conv_glyphlist.py index 93179adb..2a704388 100755 --- a/tools/conv_glyphlist.py +++ b/tools/conv_glyphlist.py @@ -1,4 +1,5 @@ #!/usr/bin/env python + import sys import fileinput diff --git a/tools/dumppdf.py b/tools/dumppdf.py index 29a11449..110f196a 100755 --- a/tools/dumppdf.py +++ b/tools/dumppdf.py @@ -1,4 +1,5 @@ #!/usr/bin/env python + # # dumppdf.py - dump pdf contents in XML format. # @@ -6,7 +7,7 @@ # options: # -i objid : object id # -import sys, os.path, re +import sys, os.path, re, logging from pdfminer.psparser import PSKeyword, PSLiteral, LIT from pdfminer.pdfparser import PDFParser from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines @@ -18,8 +19,12 @@ ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]') def e(s): + if six.PY3 and isinstance(s,six.binary_type): + s=str(s,'latin-1') return ESC_PAT.sub(lambda m:'&#%d;' % ord(m.group(0)), s) +import six # Python 2+3 compatibility + # dumpxml def dumpxml(out, obj, codec=None): @@ -29,7 +34,7 @@ def dumpxml(out, obj, codec=None): if isinstance(obj, dict): out.write('\n' % len(obj)) - for (k,v) in obj.iteritems(): + for (k,v) in six.iteritems(obj): out.write('%s\n' % k) out.write('') dumpxml(out, v) @@ -45,7 +50,7 @@ def dumpxml(out, obj, codec=None): out.write('') return - if isinstance(obj, str): + if isinstance(obj, (six.string_types, six.binary_type)): out.write('%s' % (len(obj), e(obj))) return @@ -113,11 +118,11 @@ def dumpallobjs(out, doc, codec=None): # dumpoutline def dumpoutline(outfp, fname, objids, pagenos, password='', dumpall=False, codec=None, extractdir=None): - fp = file(fname, 'rb') + fp = open(fname, 'rb') parser = PDFParser(fp) doc = PDFDocument(parser, password) pages = dict( (page.pageid, pageno) for (pageno,page) - in enumerate(PDFPage.create_pages(doc)) ) + in enumerate(PDFPage.create_pages(doc), 1) ) def resolve_dest(dest): if isinstance(dest, str): dest = resolve1(doc.get_dest(dest)) @@ -125,6 +130,8 @@ def resolve_dest(dest): dest = resolve1(doc.get_dest(dest.name)) if isinstance(dest, dict): dest = dest['D'] + if isinstance(dest, PDFObjRef): + dest = dest.resolve() return dest try: outlines = doc.get_outlines() @@ -135,10 +142,10 @@ def resolve_dest(dest): dest = resolve_dest(dest) pageno = pages[dest[0].objid] elif a: - action = a.resolve() + action = a if isinstance(action, dict): subtype = action.get('S') - if subtype and repr(subtype) == '/GoTo' and action.get('D'): + if subtype and repr(subtype) == '/\'GoTo\'' and action.get('D'): dest = resolve_dest(action['D']) pageno = pages[dest[0].objid] s = e(title).encode('utf-8', 'xmlcharrefreplace') @@ -183,7 +190,7 @@ def extract1(obj): out.close() return - fp = file(fname, 'rb') + fp = open(fname, 'rb') parser = PDFParser(fp) doc = PDFDocument(parser, password) for xref in doc.xrefs: @@ -191,12 +198,13 @@ def extract1(obj): obj = doc.getobj(objid) if isinstance(obj, dict) and obj.get('Type') is LITERAL_FILESPEC: extract1(obj) + fp.close() return # dumppdf def dumppdf(outfp, fname, objids, pagenos, password='', dumpall=False, codec=None, extractdir=None): - fp = file(fname, 'rb') + fp = open(fname, 'rb') parser = PDFParser(fp) doc = PDFDocument(parser, password) if objids: @@ -229,11 +237,10 @@ def usage(): print ('usage: %s [-d] [-a] [-p pageid] [-P password] [-r|-b|-t] [-T] [-E directory] [-i objid] file ...' % argv[0]) return 100 try: - (opts, args) = getopt.getopt(argv[1:], 'dap:P:rbtTE:i:') + (opts, args) = getopt.getopt(argv[1:], 'dap:P:rbtTE:i:o:') except getopt.GetoptError: return usage() if not args: return usage() - debug = 0 objids = [] pagenos = set() codec = None @@ -243,8 +250,8 @@ def usage(): outfp = sys.stdout extractdir = None for (k, v) in opts: - if k == '-d': debug += 1 - elif k == '-o': outfp = file(v, 'wb') + if k == '-d': logging.getLogger().setLevel(logging.DEBUG) + elif k == '-o': outfp = open(v, 'w') elif k == '-i': objids.extend( int(x) for x in v.split(',') ) elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') ) elif k == '-P': password = v @@ -256,13 +263,13 @@ def usage(): elif k == '-E': extractdir = v proc = extractembedded - # - PDFDocument.debug = debug - PDFParser.debug = debug - # + + if six.PY2 and sys.stdin.encoding: + password = password.decode(sys.stdin.encoding) + for fname in args: proc(outfp, fname, objids, pagenos, password=password, dumpall=dumpall, codec=codec, extractdir=extractdir) - return + outfp.close() if __name__ == '__main__': sys.exit(main(sys.argv)) diff --git a/tools/latin2ascii.py b/tools/latin2ascii.py index aa89e1de..4caf1c72 100755 --- a/tools/latin2ascii.py +++ b/tools/latin2ascii.py @@ -1,4 +1,5 @@ #!/usr/bin/env python + # # latin2ascii.py - converts latin1 characters into ascii. # diff --git a/tools/pdf2html.cgi b/tools/pdf2html.cgi index 4ddba6dc..265866cd 100755 --- a/tools/pdf2html.cgi +++ b/tools/pdf2html.cgi @@ -1,4 +1,4 @@ -#!/usr/bin/python -O +#!/usr/bin/env python -O # # pdf2html.cgi - Gateway script for converting PDF into HTML. # diff --git a/tools/pdf2txt.py b/tools/pdf2txt.py index 5eb24bfd..1e8ec0b4 100755 --- a/tools/pdf2txt.py +++ b/tools/pdf2txt.py @@ -1,116 +1,127 @@ #!/usr/bin/env python + +""" +Converts PDF text content (though not images containing text) to plain text, html, xml or "tags". +""" import sys -from pdfminer.pdfdocument import PDFDocument -from pdfminer.pdfparser import PDFParser -from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter -from pdfminer.pdfdevice import PDFDevice, TagExtractor -from pdfminer.pdfpage import PDFPage -from pdfminer.converter import XMLConverter, HTMLConverter, TextConverter -from pdfminer.cmapdb import CMapDB -from pdfminer.layout import LAParams +import logging +import six +import pdfminer.settings +pdfminer.settings.STRICT = False +import pdfminer.high_level +import pdfminer.layout from pdfminer.image import ImageWriter -# main -def main(argv): - import getopt - def usage(): - print ('usage: %s [-d] [-p pagenos] [-m maxpages] [-P password] [-o output]' - ' [-C] [-n] [-A] [-V] [-M char_margin] [-L line_margin] [-W word_margin]' - ' [-F boxes_flow] [-Y layout_mode] [-O output_dir] [-R rotation] [-S]' - ' [-t text|html|xml|tag] [-c codec] [-s scale]' - ' file ...' % argv[0]) - return 100 - try: - (opts, args) = getopt.getopt(argv[1:], 'dp:m:P:o:CnAVM:L:W:F:Y:O:R:St:c:s:') - except getopt.GetoptError: - return usage() - if not args: return usage() - # debug option - debug = 0 - # input option - password = '' - pagenos = set() - maxpages = 0 - # output option - outfile = None - outtype = None + +def extract_text(files=[], outfile='-', + _py2_no_more_posargs=None, # Bloody Python2 needs a shim + no_laparams=False, all_texts=None, detect_vertical=None, # LAParams + word_margin=None, char_margin=None, line_margin=None, boxes_flow=None, # LAParams + output_type='text', codec='utf-8', strip_control=False, + maxpages=0, page_numbers=None, password="", scale=1.0, rotation=0, + layoutmode='normal', output_dir=None, debug=False, + disable_caching=False, **other): + if _py2_no_more_posargs is not None: + raise ValueError("Too many positional arguments passed.") + if not files: + raise ValueError("Must provide files to work upon!") + + # If any LAParams group arguments were passed, create an LAParams object and + # populate with given args. Otherwise, set it to None. + if not no_laparams: + laparams = pdfminer.layout.LAParams() + for param in ("all_texts", "detect_vertical", "word_margin", "char_margin", "line_margin", "boxes_flow"): + paramv = locals().get(param, None) + if paramv is not None: + setattr(laparams, param, paramv) + else: + laparams = None + imagewriter = None - rotation = 0 - stripcontrol = False - layoutmode = 'normal' - codec = 'utf-8' - pageno = 1 - scale = 1 - caching = True - showpageno = True - laparams = LAParams() - for (k, v) in opts: - if k == '-d': debug += 1 - elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') ) - elif k == '-m': maxpages = int(v) - elif k == '-P': password = v - elif k == '-o': outfile = v - elif k == '-C': caching = False - elif k == '-n': laparams = None - elif k == '-A': laparams.all_texts = True - elif k == '-V': laparams.detect_vertical = True - elif k == '-M': laparams.char_margin = float(v) - elif k == '-L': laparams.line_margin = float(v) - elif k == '-W': laparams.word_margin = float(v) - elif k == '-F': laparams.boxes_flow = float(v) - elif k == '-Y': layoutmode = v - elif k == '-O': imagewriter = ImageWriter(v) - elif k == '-R': rotation = int(v) - elif k == '-S': stripcontrol = True - elif k == '-t': outtype = v - elif k == '-c': codec = v - elif k == '-s': scale = float(v) - # - PDFDocument.debug = debug - PDFParser.debug = debug - CMapDB.debug = debug - PDFPageInterpreter.debug = debug - # - rsrcmgr = PDFResourceManager(caching=caching) - if not outtype: - outtype = 'text' - if outfile: - if outfile.endswith('.htm') or outfile.endswith('.html'): - outtype = 'html' - elif outfile.endswith('.xml'): - outtype = 'xml' - elif outfile.endswith('.tag'): - outtype = 'tag' - if outfile: - outfp = file(outfile, 'w') + if output_dir: + imagewriter = ImageWriter(output_dir) + + if output_type == "text" and outfile != "-": + for override, alttype in ( (".htm", "html"), + (".html", "html"), + (".xml", "xml"), + (".tag", "tag") ): + if outfile.endswith(override): + output_type = alttype + + if outfile == "-": + outfp = sys.stdout + if outfp.encoding is not None: + codec = 'utf-8' else: + outfp = open(outfile, "wb") + + + for fname in files: + with open(fname, "rb") as fp: + pdfminer.high_level.extract_text_to_fp(fp, **locals()) + return outfp + +# main +def main(args=None): + import argparse + P = argparse.ArgumentParser(description=__doc__) + P.add_argument("files", type=str, default=None, nargs="+", help="Files to process.") + P.add_argument("-d", "--debug", default=False, action="store_true", help="Debug output.") + P.add_argument("-p", "--pagenos", type=str, help="Comma-separated list of page numbers to parse. Included for legacy applications, use -P/--page-numbers for more idiomatic argument entry.") + P.add_argument("--page-numbers", type=int, default=None, nargs="+", help="Alternative to --pagenos with space-separated numbers; supercedes --pagenos where it is used.") + P.add_argument("-m", "--maxpages", type=int, default=0, help = "Maximum pages to parse") + P.add_argument("-P", "--password", type=str, default="", help = "Decryption password for PDF") + P.add_argument("-o", "--outfile", type=str, default="-", help="Output file (default/'-' is stdout)") + P.add_argument("-t", "--output_type", type=str, default="text", help = "Output type: text|html|xml|tag (default is text)") + P.add_argument("-c", "--codec", type=str, default="utf-8", help = "Text encoding") + P.add_argument("-s", "--scale", type=float, default=1.0, help = "Scale") + P.add_argument("-A", "--all-texts", default=None, action="store_true", help="LAParams all texts") + P.add_argument("-V", "--detect-vertical", default=None, action="store_true", help="LAParams detect vertical") + P.add_argument("-W", "--word-margin", type=float, default=None, help = "LAParams word margin") + P.add_argument("-M", "--char-margin", type=float, default=None, help = "LAParams char margin") + P.add_argument("-L", "--line-margin", type=float, default=None, help = "LAParams line margin") + P.add_argument("-F", "--boxes-flow", type=float, default=None, help = "LAParams boxes flow") + P.add_argument("-Y", "--layoutmode", default="normal", type=str, help="HTML Layout Mode") + P.add_argument("-n", "--no-laparams", default=False, action="store_true", help = "Pass None as LAParams") + P.add_argument("-R", "--rotation", default=0, type=int, help = "Rotation") + P.add_argument("-O", "--output-dir", default=None, help="Output directory for images") + P.add_argument("-C", "--disable-caching", default=False, action="store_true", help="Disable caching") + P.add_argument("-S", "--strip-control", default=False, action="store_true", help="Strip control in XML mode") + A = P.parse_args(args=args) + + if A.page_numbers: + A.page_numbers = set([x-1 for x in A.page_numbers]) + if A.pagenos: + A.page_numbers = set([int(x)-1 for x in A.pagenos.split(",")]) + + imagewriter = None + if A.output_dir: + imagewriter = ImageWriter(A.output_dir) + + if six.PY2 and sys.stdin.encoding: + A.password = A.password.decode(sys.stdin.encoding) + + if A.output_type == "text" and A.outfile != "-": + for override, alttype in ( (".htm", "html"), + (".html", "html"), + (".xml", "xml" ), + (".tag", "tag" ) ): + if A.outfile.endswith(override): + A.output_type = alttype + + if A.outfile == "-": outfp = sys.stdout - if outtype == 'text': - device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams, - imagewriter=imagewriter) - elif outtype == 'xml': - device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams, - imagewriter=imagewriter, - stripcontrol=stripcontrol) - elif outtype == 'html': - device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale, - layoutmode=layoutmode, laparams=laparams, - imagewriter=imagewriter, debug=debug) - elif outtype == 'tag': - device = TagExtractor(rsrcmgr, outfp, codec=codec) + if outfp.encoding is not None: + # Why ignore outfp.encoding? :-/ stupid cathal? + A.codec = 'utf-8' else: - return usage() - for fname in args: - fp = file(fname, 'rb') - interpreter = PDFPageInterpreter(rsrcmgr, device) - for page in PDFPage.get_pages(fp, pagenos, - maxpages=maxpages, password=password, - caching=caching, check_extractable=True): - page.rotate = (page.rotate+rotation) % 360 - interpreter.process_page(page) - fp.close() - device.close() + outfp = open(A.outfile, "wb") + + ## Test Code + outfp = extract_text(**vars(A)) outfp.close() - return + return 0 + -if __name__ == '__main__': sys.exit(main(sys.argv)) +if __name__ == '__main__': sys.exit(main()) diff --git a/tools/pdf2txt.spec b/tools/pdf2txt.spec new file mode 100644 index 00000000..8baeb77f --- /dev/null +++ b/tools/pdf2txt.spec @@ -0,0 +1,30 @@ +# -*- mode: python -*- + +block_cipher = None + + +a = Analysis(['pdf2txt.py'], + pathex=['C:\\Dev\\Python\\pdfminer.six\\tools'], + binaries=[], + datas=[], + hiddenimports=[], + hookspath=[], + runtime_hooks=[], + excludes=['django','matplotlib','PIL','numpy','qt5'], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher) + +pyz = PYZ(a.pure, a.zipped_data, + cipher=block_cipher) +exe = EXE(pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + name='pdf2txt', + debug=False, + strip=False, + upx=True, + runtime_tmpdir=None, + console=True ) diff --git a/tools/pdfdiff.py b/tools/pdfdiff.py new file mode 100644 index 00000000..17e41e89 --- /dev/null +++ b/tools/pdfdiff.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python + +""" +compares rwo pdf files. +""" +import sys +import logging +import six +import pdfminer.settings +pdfminer.settings.STRICT = False +import pdfminer.high_level +import pdfminer.layout + +def compare(file1,file2,**args): + if args.get('_py2_no_more_posargs',None) is not None: + raise ValueError("Too many positional arguments passed.") + + + # If any LAParams group arguments were passed, create an LAParams object and + # populate with given args. Otherwise, set it to None. + if args.get('laparams',None) is None: + laparams = pdfminer.layout.LAParams() + for param in ("all_texts", "detect_vertical", "word_margin", "char_margin", "line_margin", "boxes_flow"): + paramv = args.get(param, None) + if paramv is not None: + laparams[param]=paramv + args['laparams']=laparams + + s1=six.StringIO() + with open(file1, "rb") as fp: + pdfminer.high_level.extract_text_to_fp(fp,s1, **args) + + s2=six.StringIO() + with open(file2, "rb") as fp: + pdfminer.high_level.extract_text_to_fp(fp,s2, **args) + + import difflib + s1.seek(0) + s2.seek(0) + s1,s2=s1.readlines(), s2.readlines() + + import os.path + try: + extension = os.path.splitext(args['outfile'])[1][1:4] + if extension.lower()=='htm': + return difflib.HtmlDiff().make_file(s1,s2) + except KeyError: + pass + return difflib.unified_diff(s1,s2,n=args['context_lines']) + + +# main +def main(args=None): + import argparse + P = argparse.ArgumentParser(description=__doc__) + P.add_argument("file1", type=str, default=None, help="File 1 to compare.") + P.add_argument("file2", type=str, default=None, help="File 2 to compare.") + P.add_argument("-o", "--outfile", type=str, default="-", + help="Output file (default/'-' is stdout) \ + if .htm or .html, create an HTML table (or a complete HTML file containing the table) \ + showing a side by side, line by line comparison of text with inter-line \ + and intra-line change highlights. \ + The table can be generated in either full or contextual difference mode." + ) + P.add_argument("-N", "--context-lines", default=3, type=int, help = "context lines shown") + P.add_argument("-d", "--debug", default=False, action="store_true", help="Debug output.") + + # params for pdf2txt + P.add_argument("-p", "--pagenos", type=str, help="Comma-separated list of page numbers to parse. Included for legacy applications, use -P/--page-numbers for more idiomatic argument entry.") + P.add_argument("--page-numbers", type=int, default=None, nargs="+", help="Alternative to --pagenos with space-separated numbers; supercedes --pagenos where it is used.") + P.add_argument("-m", "--maxpages", type=int, default=0, help = "Maximum pages to parse") + P.add_argument("-P", "--password", type=str, default="", help = "Decryption password for both PDFs") + P.add_argument("-t", "--output_type", type=str, default="text", help = "pdf2txt type: text|html|xml|tag (default is text)") + P.add_argument("-c", "--codec", type=str, default="utf-8", help = "Text encoding") + P.add_argument("-s", "--scale", type=float, default=1.0, help = "Scale") + P.add_argument("-A", "--all-texts", default=None, action="store_true", help="LAParams all texts") + P.add_argument("-V", "--detect-vertical", default=None, action="store_true", help="LAParams detect vertical") + P.add_argument("-W", "--word-margin", type=float, default=None, help = "LAParams word margin") + P.add_argument("-M", "--char-margin", type=float, default=None, help = "LAParams char margin") + P.add_argument("-L", "--line-margin", type=float, default=None, help = "LAParams line margin") + P.add_argument("-F", "--boxes-flow", type=float, default=None, help = "LAParams boxes flow") + P.add_argument("-Y", "--layoutmode", default="normal", type=str, help="HTML Layout Mode") + P.add_argument("-n", "--no-laparams", default=False, action="store_true", help = "Pass None as LAParams") + P.add_argument("-R", "--rotation", default=0, type=int, help = "Rotation") + P.add_argument("-O", "--output-dir", default=None, help="Output directory for images") + P.add_argument("-C", "--disable-caching", default=False, action="store_true", help="Disable caching") + P.add_argument("-S", "--strip-control", default=False, action="store_true", help="Strip control in XML mode") + + + A = P.parse_args(args=args) + + if A.page_numbers: + A.page_numbers = set([x-1 for x in A.page_numbers]) + if A.pagenos: + A.page_numbers = set([int(x)-1 for x in A.pagenos.split(",")]) + + if six.PY2 and sys.stdin.encoding: + A.password = A.password.decode(sys.stdin.encoding) + + if A.output_type == "text" and A.outfile != "-": + for override, alttype in ( (".htm", "html"), + (".html", "html"), + (".xml", "xml" ), + (".tag", "tag" ) ): + if A.outfile.endswith(override): + A.output_type = alttype + + if A.outfile == "-": + outfp = sys.stdout + else: + outfp = open(A.outfile, "w", encoding='utf-8') + outfp.writelines(compare(**vars(A))) + outfp.close() + return 0 + + +if __name__ == '__main__': sys.exit(main()) diff --git a/tools/pdfdiff.spec b/tools/pdfdiff.spec new file mode 100644 index 00000000..e90a37f5 --- /dev/null +++ b/tools/pdfdiff.spec @@ -0,0 +1,29 @@ +# -*- mode: python -*- + +block_cipher = None + + +a = Analysis(['pdfdiff.py'], + pathex=['C:\\Dev\\Python\\pdfminer.six\\tools'], + binaries=[], + datas=[], + hiddenimports=[], + hookspath=[], + runtime_hooks=[], + excludes=['django','matplotlib','PIL','numpy','qt5'], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher) +pyz = PYZ(a.pure, a.zipped_data, + cipher=block_cipher) +exe = EXE(pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + name='pdfdiff', + debug=False, + strip=False, + upx=True, + runtime_tmpdir=None, + console=True ) diff --git a/tools/pdfstats.py b/tools/pdfstats.py new file mode 100755 index 00000000..f3ecbbe7 --- /dev/null +++ b/tools/pdfstats.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 + +# Exercise pdfminer, looking deeply into a PDF document, print some stats to stdout +# Usage: pdfstats.py + +import sys, os +import collections + +from pdfminer.pdfparser import PDFParser +from pdfminer.pdfdocument import PDFDocument +from pdfminer.pdfpage import PDFPage, PDFTextExtractionNotAllowed +from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter +from pdfminer.pdfdevice import PDFDevice +from pdfminer.converter import PDFPageAggregator +from pdfminer.layout import LAParams, LTContainer + + +_, SCRIPT = os.path.split(__file__) + +def msg(*args, **kwargs): + print(' '.join(map(str, args)), file=sys.stdout, **kwargs) + +def flat_iter(obj): + yield obj + if isinstance(obj, LTContainer): + for ob in obj: + yield from flat_iter(ob) + +def main(args): + msg(SCRIPT, args) + + if len(args) != 1: + msg('Parse a PDF file and print some pdfminer-specific stats') + msg('Usage:', SCRIPT, '') + return 1 + + infilename, = args + + lt_types = collections.Counter() + + with open(infilename, 'rb') as pdf_file: + + # Create a PDF parser object associated with the file object. + parser = PDFParser(pdf_file) + + # Create a PDF document object that stores the document structure. + # Supply the password for initialization. + password = '' + document = PDFDocument(parser, password) + # Check if the document allows text extraction. + if not document.is_extractable: + raise PDFTextExtractionNotAllowed(filename) + + # Make a page iterator + pages = PDFPage.create_pages(document) + + + # Set up for some analysis + rsrcmgr = PDFResourceManager() + laparams = LAParams( + detect_vertical=True, + all_texts=True, + ) + #device = PDFDevice(rsrcmgr) + device = PDFPageAggregator(rsrcmgr, laparams=laparams) + interpreter = PDFPageInterpreter(rsrcmgr, device) + + # Look at all (nested) objects on each page + for page_count, page in enumerate(pages, 1): + # oh so stateful + interpreter.process_page(page) + layout = device.get_result() + + lt_types.update(type(item).__name__ for item in flat_iter(layout)) + + msg('page_count', page_count) + msg('lt_types:', ' '.join('{}:{}'.format(*tc) for tc in lt_types.items())) + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/tools/prof.py b/tools/prof.py index 5402de45..aaf6712b 100644 --- a/tools/prof.py +++ b/tools/prof.py @@ -1,4 +1,5 @@ #!/usr/bin/env python + import sys def prof_main(argv): diff --git a/tools/runapp.py b/tools/runapp.py index 6bdff8be..6b953bec 100755 --- a/tools/runapp.py +++ b/tools/runapp.py @@ -1,4 +1,5 @@ #!/usr/bin/env python + ## ## WebApp class runner ## @@ -8,9 +9,9 @@ import sys import urllib -from httplib import responses -from BaseHTTPServer import HTTPServer -from SimpleHTTPServer import SimpleHTTPRequestHandler +from six.moves.http_client import responses +from six.moves.BaseHTTPServer import HTTPServer +from six.moves.SimpleHTTPServer import SimpleHTTPRequestHandler ## WebAppHandler ## diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..cba10979 --- /dev/null +++ b/tox.ini @@ -0,0 +1,10 @@ +[tox] +envlist = py26,py27,py34,py35,py36 + +[testenv] +commands = nosetests --nologcapture +deps = + six + pycryptodome + chardet + nose