From 63520efe6fcfd2c87bb1c7ecbd2be41455c5aa74 Mon Sep 17 00:00:00 2001 From: dkgitdev <36101416+dkgitdev@users.noreply.github.com> Date: Sat, 3 Feb 2018 17:32:06 +0700 Subject: [PATCH] Fixed bug with some files could not be extracted Some files still can be extracted even though it raised exception (I've tried to comment the if completely, and it has extracted just fine). Here's the file I've done testing with: https://drive.google.com/file/d/1DerQHjbuxrCI3RkivBwH5WJef0qYP9-B/view?usp=sharing --- pdfminer/pdfpage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py index a48767c6..297c84b1 100644 --- a/pdfminer/pdfpage.py +++ b/pdfminer/pdfpage.py @@ -121,7 +121,7 @@ def get_pages(klass, fp, # Create a PDF document object that stores the document structure. doc = PDFDocument(parser, password=password, caching=caching) # Check if the document allows text extraction. If not, abort. - if check_extractable and not doc.is_extractable: + if not check_extractable and not doc.is_extractable: raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp) # Process each page contained in the document. for (pageno, page) in enumerate(klass.create_pages(doc)):