Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fillable forms #89

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ build
htmlcov/
.coverage
MANIFEST
.venv
.vscode

20 changes: 19 additions & 1 deletion pypdf/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def appendPagesFromReader(self, reader, afterPageAppend=None):
if callable(afterPageAppend):
afterPageAppend(writerPage)

def updatePageFormFieldValues(self, page, fields):
def updatePageFormFieldValues(self, page, fields, read_only = False):
"""
Update the form field values for a given page from a fields dictionary.
Copy field texts and values from fields to page.
Expand All @@ -545,6 +545,24 @@ def updatePageFormFieldValues(self, page, fields):
writer_annot.update(
{NameObject("/V"): TextStringObject(fields[field])}
)
if read_only:
writer_annot.update({NameObject("/Ff"): NumberObject(1)})

def have_viewer_render_fields(self):
"""
Some PDF viewers need to be coaxed into rendering field values.
This does so by setting a `/NeedAppearances` attribute to True
(which adds to the processing time slightly).
Credit for figuring this out: https://stackoverflow.com/users/8382028/viatech
"""
try:
catalog = self._rootObject
if "/AcroForm" not in catalog:
self._rootObject.update({NameObject("/AcroForm"): IndirectObject(len(self._objects), 0, self)})
need_appearances = NameObject("/NeedAppearances")
self._rootObject["/AcroForm"][need_appearances] = BooleanObject(True)
except Exception as e:
warnings.warn("Unable to set the /NeedAppearances flag. Filled-in field values may not render correctly. [{}]".format(repr(e)))

def cloneReaderDocumentRoot(self, reader):
"""
Expand Down
31 changes: 27 additions & 4 deletions samplecode/README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,38 @@
# PyPDF4 Sample Code Folder
This will contain demonstrations of the many features PyPDF4 is capable of.
Example code should make it easy for users to know how to use all aspects of
PyPDF4.
This folder contains demonstrations of just a few of PyPDF4's many features.


## How to run
Invoke the Python interpeter you prefer by specifying the script you wish to
run, e.g.:
```
python2 ./samplecode/basic_features.py
python3 ./samplecode/basic_features.py
```
```

## `basic_features.py`

Sample code that demonstrates:

* Getting metadata from a PDF.
* Copying a PDF, one page at a time, and performing different operations on each page (resize, rotate, add a watermark).
* Encrypting a PDF.
* Adding javascript that runs when the PDF is opened.


## `basic_merging.py`

Sample code that demonstrates merging together three PDFs into one, picking and choosing which pages appear in which order.
Selected pages can be added to the end of the output PDF being built, or inserted in the middle.


## `fillable_fields.py`

Sample code that copies a PDF, changing field values along the way (i.e. using
a PDF with fillable fields as a template).

FYI: The fillable_form.pdf used in this demo was created via LibreOffice.


## Contributing to `samplecode`
Feel free to add any type of PDF file or sample code, either by:
Expand Down
5 changes: 5 additions & 0 deletions samplecode/basic_features.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/usr/bin/env python
"""
Showcases basic features of PyPDF.

* Getting metadata from a PDF.
* Copying a PDF, one page at a time, and performing different operations on each page (resize, rotate, add a watermark).
* Encrypting a PDF.
* Adding javascript that runs when the PDF is opened.
"""
from __future__ import print_function

Expand Down
4 changes: 3 additions & 1 deletion samplecode/basic_merging.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env python
"""
Merges three PDF documents input from the command line.
Sample code that demonstrates merging together three PDFs into one, picking and choosing which pages appear in which order.
Selected pages can be added to the end of the output PDF being built, or inserted in the middle.
This example takes input from the command line.
"""
from __future__ import print_function

Expand Down
61 changes: 61 additions & 0 deletions samplecode/fillable_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
Sample code that copies a PDF, changing field values along the way (i.e. using
a PDF with fillable fields as a template).

FYI: The fillable_form.pdf used in this demo was created via LibreOffice.
"""
import sys
from pypdf import PdfFileWriter, PdfFileReader

root_folder = "samplecode/"
template_name = "fillable_form.pdf"

def discover_fields(template_pdf):
available_fields = template_pdf.getFields()
if available_fields:
print("Available fields:")
for fieldname in available_fields:
print(" %s" % fieldname)
else:
print("ERROR: '" + template_name + "' has no text fields.")
sys.exit(1)

def fill_in_pdf(template_pdf, field_values, filename):
output = PdfFileWriter(filename)
output.have_viewer_render_fields()
for page_no in range(template_pdf.numPages):
template_page = template_pdf.getPage(page_no)
output.addPage(template_page)
page = output.getPage(page_no)
output.updatePageFormFieldValues(page, field_values, read_only=True)
output.write()
print("Created '%s'" % (filename))


def main():
template_pdf = PdfFileReader(open(root_folder + template_name, "rb"),
strict=False)

employee_john = {
"employee_name": "John Hardworker",
"employee_id": "0123",
"department": "Human Resources",
"manager_name": "Doris Stickler",
"manager_id": "0072"
}
employee_cyndi = {
"employee_name": "Cyndi Smartworker",
"employee_id": "0199",
"department": "Engineering",
"manager_name": "Ida Wright",
"manager_id": "0051"
}

discover_fields(template_pdf)
fill_in_pdf(template_pdf, employee_john, root_folder +
"JohnHardworder.pdf")
fill_in_pdf(template_pdf, employee_cyndi, root_folder +
"CyndiSmartworker.pdf")

if __name__ == "__main__":
main()
Binary file added samplecode/fillable_form.odt
Binary file not shown.
Binary file added samplecode/fillable_form.pdf
Binary file not shown.
Binary file not shown.
88 changes: 80 additions & 8 deletions tests/test_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import binascii
from io import BytesIO
import os
from os import remove
from os.path import abspath, basename, dirname, join, pardir
import sys
import tempfile
Expand Down Expand Up @@ -438,6 +439,75 @@ def testProperties(self):
"%s.%s() is not callable" % (PdfFileReader.__name__, m),
)

def testHave_viewer_render_fields(self):
"""
Tests that PdfFileWriter.have_viewer_render_fields() adds
/AcroForm/NeedAppearances to the catalog.
"""

testfile_handle, testfile_name = tempfile.mkstemp()
try:
with PdfFileReader(join(TEST_DATA_ROOT, "testUpdatePageFormFieldValues/fillable_form.pdf")) as reader:
with PdfFileWriter(testfile_name) as writer:
writer.have_viewer_render_fields()
template_page = reader.getPage(0)
writer.addPage(template_page)
writer.write()

with PdfFileReader(testfile_name) as pdf:
catalog = pdf._trailer["/Root"].getObject()
self.assertTrue("/AcroForm" in catalog)
self.assertTrue("/NeedAppearances" in catalog["/AcroForm"])

finally:
os.close(testfile_handle)
os.remove(testfile_name)

def testUpdatePageFormFieldValues(self):
"""
Tests that PdfFileWriter.updatePageFormFieldValues() populates fields
(annotations) with corresponding values.
"""

testfile_handle, testfile_name = tempfile.mkstemp()

field_values = {
"employee_name": "John Hardworker",
"employee_id": "0123",
"department": "Human Resources",
"manager_name": "Doris Stickler",
"manager_id": "0072"
}
try:
# copy fillable_fields.pdf, filling in the fields along the way
with PdfFileReader(join(TEST_DATA_ROOT, "testUpdatePageFormFieldValues/fillable_form.pdf")) as reader:
with PdfFileWriter(testfile_name) as writer:
writer.have_viewer_render_fields()
template_page = reader.getPage(0)
writer.addPage(template_page)
page = writer.getPage(0)
writer.updatePageFormFieldValues(page, field_values, read_only=True)
writer.write()

# check the results by depleating entries from field_values_sought
# until it's empty
field_values_sought = field_values
with PdfFileReader(testfile_name) as pdf:
# For caching _cachedObjects data
for page_no in range(pdf.numPages):
page = pdf.getPage(0)
for j in range(len(page["/Annots"])):
annotation = page["/Annots"][j].getObject()
if (field := annotation.get("/T")):
if (field_values_sought[field] == annotation.get("/V")
and (annotation.get("/Ff") == 1)):
field_values_sought.pop(field)

self.assertEqual(len(field_values_sought),0)
finally:
os.close(testfile_handle)
os.remove(testfile_name)

def testAddAttachment(self):
"""
Tests the addAttachment function for attaching a single file.
Expand All @@ -447,12 +517,12 @@ def testAddAttachment(self):
to check for two entries per attached file.
"""

_, testfile = tempfile.mkstemp()
testfile_handle, testfile_name = tempfile.mkstemp()

try:
# Make PDF with attachment
with PdfFileReader(join(TEST_DATA_ROOT, "jpeg.pdf")) as reader:
with PdfFileWriter(testfile) as writer:
with PdfFileWriter(testfile_name) as writer:
writer.appendPagesFromReader(reader)
with open(
join( # pylint: disable=bad-continuation
Expand All @@ -465,7 +535,7 @@ def testAddAttachment(self):
writer.write()

# Check for attachment entries
with PdfFileReader(testfile) as pdf:
with PdfFileReader(testfile_name) as pdf:
# For caching _cachedObjects data
pdf.numPages # pylint: disable=pointless-statement
for _k, v in pdf._cachedObjects.items():
Expand All @@ -475,7 +545,8 @@ def testAddAttachment(self):
real = len(v["/Names"]["/EmbeddedFiles"]["/Names"])
self.assertEqual(2, real)
finally:
os.remove(testfile)
os.close(testfile_handle)
os.remove(testfile_name)

def testAttachFiles(self):
"""
Expand All @@ -487,12 +558,12 @@ def testAttachFiles(self):
"""

numAttachments = 3
_, testfile = tempfile.mkstemp()
testfile_handle, testfile_name = tempfile.mkstemp()

try:
# Make PDF with attachment
with PdfFileReader(join(TEST_DATA_ROOT, "jpeg.pdf")) as reader:
with PdfFileWriter(testfile) as writer:
with PdfFileWriter(testfile_name) as writer:
writer.appendPagesFromReader(reader)

writer.attachFiles(
Expand All @@ -501,7 +572,7 @@ def testAttachFiles(self):
writer.write()

# Check for attachment entries
with PdfFileReader(testfile) as pdf:
with PdfFileReader(testfile_name) as pdf:
# For caching _cachedObjects data
pdf.numPages # pylint: disable=pointless-statement
for _k, v in pdf._cachedObjects.items():
Expand All @@ -511,7 +582,8 @@ def testAttachFiles(self):
real = len(v["/Names"]["/EmbeddedFiles"]["/Names"])
self.assertEqual(numAttachments * 2, real)
finally:
os.remove(testfile)
os.close(testfile_handle)
os.remove(testfile_name)


class AddJsTestCase(unittest.TestCase):
Expand Down