Skip to content

Commit

Permalink
"upgrade" from PyPDF3 to pypdf
Browse files Browse the repository at this point in the history
I picked the wrong fork (pypdf3 instead of pypdf2).
PyPDF2 was a fork from the original pyPdf.
After several years, the fork was merged back into pypdf (now all lowercase).
pypdf3 is now unmaintained.

pypdf meanwhile has had a lot of interesting updates, which I should
look at.
  • Loading branch information
captn3m0 committed Aug 12, 2024
1 parent 16e054f commit b22459f
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 49 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ __pycache__/*
.idea
.vscode
tags
src/pystitcher/_version.py

# Package files
*.egg
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
pystitcher stitches your PDF files together, generating nice
customizable bookmarks for you using a declarative input in the form of
a markdown file. It is written in pure python and uses
[PyPDF3](https://pypi.org/project/PyPDF3/) for reading and writing PDF
[pypdf](https://pypi.org/project/pypdf/) for reading and writing PDF
files.

## Installation
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dependencies = [
"html5lib>=1.1",
"importlib-metadata; python_version<\"3.8\"",
"Markdown>=3.6",
"PyPDF3>=1.0.6",
"pypdf>=4.3.1",
"validators>=0.33.0",
]

Expand Down
16 changes: 0 additions & 16 deletions src/pystitcher/_version.py

This file was deleted.

30 changes: 15 additions & 15 deletions src/pystitcher/stitcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import html5lib
import markdown

from PyPDF3 import PdfFileWriter, PdfFileReader
from PyPDF3.generic import FloatObject
from pypdf import PdfWriter, PdfReader
from pypdf.generic import Fit
from pystitcher import __version__
from .bookmark import Bookmark

Expand Down Expand Up @@ -70,8 +70,8 @@ def _cacheURL(self, url):
def _get_pdf_number_of_pages(self, filename):
assert os.path.isfile(filename) and os.access(filename, os.R_OK), \
"File {} doesn't exist or isn't readable".format(filename)
pdf_reader = PdfFileReader(open(filename, "rb"))
return pdf_reader.numPages
pdf_reader = PdfReader(open(filename, "rb"))
return pdf_reader.get_num_pages()

"""
Return an attribute with a default value of None
Expand Down Expand Up @@ -186,7 +186,7 @@ def _iterate_old_bookmarks(self, pdf, startPage, bookmarks, level = 1):
for inner_bookmark in bookmarks:
self._iterate_old_bookmarks(pdf, startPage, inner_bookmark, level+1)
else:
localPageNumber = pdf.getDestinationPageNumber(bookmarks)
localPageNumber = pdf.get_destination_page_number(bookmarks)
globalPageNumber = startPage + localPageNumber - 1
b = Bookmark(globalPageNumber, bookmarks.title, level, self.defaultFit)
self.oldBookmarks.append(b)
Expand All @@ -198,9 +198,9 @@ def _iterate_old_bookmarks(self, pdf, startPage, bookmarks, level = 1):
"""
def _insert_bookmarks(self, old_filename, outputFilename):
stack = []
pdfInput = PdfFileReader(open(old_filename, 'rb'))
pdfOutput = PdfFileWriter()
pdfOutput.cloneDocumentFromReader(pdfInput)
pdfInput = PdfReader(open(old_filename, 'rb'))
pdfOutput = PdfWriter()
pdfOutput.clone_document_from_reader(pdfInput)
for b in self.bookmarks:
existingRef = None
# Trim the stack till the top is useful (stack.level < b.level)
Expand All @@ -209,9 +209,9 @@ def _insert_bookmarks(self, old_filename, outputFilename):
# If stack has something, use it
if (len(stack) > 0):
existingRef = stack[len(stack) - 1][1]
bookmargArgs = [b.title, b.page-1, existingRef, None, False, False, b.fit] + b.cords
stack.append((b, pdfOutput.addBookmark(*bookmargArgs)))
pdfOutput.addMetadata(self._getMetadata())
bookmargArgs = [b.title, b.page-1, existingRef, None, False, False, Fit(b.fit)] + b.cords
stack.append((b, pdfOutput.add_outline_item(*bookmargArgs)))
pdfOutput.add_metadata(self._getMetadata())
pdfOutput.write(open(outputFilename, 'wb'))

"""
Expand All @@ -220,15 +220,15 @@ def _insert_bookmarks(self, old_filename, outputFilename):
as we're reading them
"""
def _merge(self, output):
writer = PdfFileWriter()
writer = PdfWriter()
for (inputFile,startPage,filters) in self.files:
assert os.path.isfile(inputFile), ERROR_PATH.format(inputFile)
reader = PdfFileReader(open(inputFile, 'rb'))
reader = PdfReader(open(inputFile, 'rb'))
# Recursively iterate through the old bookmarks
self._iterate_old_bookmarks(reader, startPage, reader.getOutlines())
self._iterate_old_bookmarks(reader, startPage, reader.outline)
rotate, start, end = filters
for page in range(start, end + 1):
writer.addPage(reader.getPage(page - 1).rotateClockwise(rotate))
writer.add_page(reader.get_page(page - 1).rotate(rotate))

writer.write(output)
output.close()
Expand Down
32 changes: 16 additions & 16 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import io

import PyPDF3
import pypdf
from pystitcher.stitcher import Stitcher
from pystitcher import __version__

Expand Down Expand Up @@ -55,16 +55,16 @@ def flatten_bookmarks(bookmarks, level=0):

def get_all_bookmarks(pdf):
""" Returns a list of all bookmarks with title, page number, and level in a PDF file"""
bookmarks = flatten_bookmarks(pdf.getOutlines())
return [(d[0]['/Title'], pdf.getDestinationPageNumber(d[0]), d[1]) for d in bookmarks]
bookmarks = flatten_bookmarks(pdf.outline)
return [(d[0]['/Title'], pdf.get_destination_page_number(d[0]), d[1]) for d in bookmarks]

@pytest.mark.parametrize("name,pages,metadata,bookmarks", TEST_DATA)
def test_book(name, pages, metadata, bookmarks):
output_file = render(name)
pdf = PyPDF3.PdfFileReader(output_file)
assert pages == pdf.getNumPages()
pdf = pypdf.PdfReader(output_file)
assert pages == pdf.get_num_pages()
assert bookmarks == get_all_bookmarks(pdf)
info = pdf.getDocumentInfo()
info = pdf.metadata
identity = "pystitcher/%s" % __version__
assert identity == info['/Producer']
assert identity == info['/Creator']
Expand All @@ -74,23 +74,23 @@ def test_book(name, pages, metadata, bookmarks):
def test_rotation():
""" Validates the book-rotate.pdf with pages rotated."""
output_file = render("rotate")
pdf = PyPDF3.PdfFileReader(output_file)
pdf = pypdf.PdfReader(output_file)
# Note that inputs to getPage are 0-indexed
assert 90 == pdf.getPage(3)['/Rotate']
assert 90 == pdf.getPage(4)['/Rotate']
assert 90 == pdf.getPage(5)['/Rotate']
assert 180 == pdf.getPage(6)['/Rotate']
assert 180 == pdf.getPage(7)['/Rotate']
assert 180 == pdf.getPage(8)['/Rotate']
assert 90 == pdf.get_page(3)['/Rotate']
assert 90 == pdf.get_page(4)['/Rotate']
assert 90 == pdf.get_page(5)['/Rotate']
assert 180 == pdf.get_page(6)['/Rotate']
assert 180 == pdf.get_page(7)['/Rotate']
assert 180 == pdf.get_page(8)['/Rotate']

def test_cleanup_disabled():
f = io.StringIO()
with redirect_stdout(f):
output_file = render("min", False)
temp_filename = f.getvalue()[29:-1]
assert os.path.exists(temp_filename)
pdf = PyPDF3.PdfFileReader(temp_filename)
assert 3 == pdf.getNumPages()
assert [] == pdf.getOutlines()
pdf = pypdf.PdfReader(temp_filename)
assert 3 == pdf.get_num_pages()
assert [] == pdf.outline
# Clean it up manually to avoid cluttering
os.remove(temp_filename)

0 comments on commit b22459f

Please sign in to comment.