claird · polyglot-jones · Aug 11, 2020 · Aug 11, 2020 · Aug 11, 2020 · Aug 11, 2020
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,6 @@ build
 htmlcov/
 .coverage
 MANIFEST
+.venv
+.vscode
+
diff --git a/pypdf/pdf.py b/pypdf/pdf.py
@@ -526,7 +526,7 @@ def appendPagesFromReader(self, reader, afterPageAppend=None):
             if callable(afterPageAppend):
                 afterPageAppend(writerPage)
 
-    def updatePageFormFieldValues(self, page, fields):
+    def updatePageFormFieldValues(self, page, fields, read_only = False):
         """
         Update the form field values for a given page from a fields dictionary.
         Copy field texts and values from fields to page.
@@ -545,6 +545,24 @@ def updatePageFormFieldValues(self, page, fields):
                     writer_annot.update(
                         {NameObject("/V"): TextStringObject(fields[field])}
                     )
+                    if read_only:
+                        writer_annot.update({NameObject("/Ff"): NumberObject(1)})
+
+    def have_viewer_render_fields(self):
+        """
+        Some PDF viewers need to be coaxed into rendering field values.
+        This does so by setting a `/NeedAppearances` attribute to True
+        (which adds to the processing time slightly).
+        Credit for figuring this out: https://stackoverflow.com/users/8382028/viatech
+        """
+        try:
+            catalog = self._rootObject
+            if "/AcroForm" not in catalog:
+                self._rootObject.update({NameObject("/AcroForm"): IndirectObject(len(self._objects), 0, self)})
+            need_appearances = NameObject("/NeedAppearances")
+            self._rootObject["/AcroForm"][need_appearances] = BooleanObject(True)
+        except Exception as e:
+            warnings.warn("Unable to set the /NeedAppearances flag. Filled-in field values may not render correctly. [{}]".format(repr(e)))
 
     def cloneReaderDocumentRoot(self, reader):
         """

diff --git a/samplecode/README.md b/samplecode/README.md
@@ -1,15 +1,38 @@
 # PyPDF4 Sample Code Folder
-This will contain demonstrations of the many features PyPDF4 is capable of.
-Example code should make it easy for users to know how to use all aspects of
-PyPDF4.
+This folder contains demonstrations of just a few of PyPDF4's many features.
+
 
 ## How to run
 Invoke the Python interpeter you prefer by specifying the script you wish to
 run, e.g.:
 ```
 python2 ./samplecode/basic_features.py
 python3 ./samplecode/basic_features.py
-``` 
+```
+
+## `basic_features.py`
+
+Sample code that demonstrates:
+
+* Getting metadata from a PDF.
+* Copying a PDF, one page at a time, and performing different operations on each page (resize, rotate, add a watermark).
+* Encrypting a PDF.
+* Adding javascript that runs when the PDF is opened.
+
+
+## `basic_merging.py`
+
+Sample code that demonstrates merging together three PDFs into one, picking and choosing which pages appear in which order.
+Selected pages can be added to the end of the output PDF being built, or inserted in the middle.
+
+
+## `fillable_fields.py`
+
+Sample code that copies a PDF, changing field values along the way (i.e. using
+a PDF with fillable fields as a template).
+
+FYI: The fillable_form.pdf used in this demo was created via LibreOffice.
+
 
 ## Contributing to `samplecode`
 Feel free to add any type of PDF file or sample code, either by:

diff --git a/samplecode/basic_features.py b/samplecode/basic_features.py
@@ -1,6 +1,11 @@
 #!/usr/bin/env python
 """
 Showcases basic features of PyPDF.
+
+* Getting metadata from a PDF.
+* Copying a PDF, one page at a time, and performing different operations on each page (resize, rotate, add a watermark).
+* Encrypting a PDF.
+* Adding javascript that runs when the PDF is opened.
 """
 from __future__ import print_function
 

diff --git a/samplecode/basic_merging.py b/samplecode/basic_merging.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 """
-Merges three PDF documents input from the command line.
+Sample code that demonstrates merging together three PDFs into one, picking and choosing which pages appear in which order.
+Selected pages can be added to the end of the output PDF being built, or inserted in the middle.
+This example takes input from the command line.
 """
 from __future__ import print_function
 

diff --git a/samplecode/fillable_fields.py b/samplecode/fillable_fields.py
@@ -0,0 +1,61 @@
+"""
+Sample code that copies a PDF, changing field values along the way (i.e. using
+a PDF with fillable fields as a template).
+
+FYI: The fillable_form.pdf used in this demo was created via LibreOffice.
+"""
+import sys
+from pypdf import PdfFileWriter, PdfFileReader
+
+root_folder = "samplecode/"
+template_name = "fillable_form.pdf"
+
+def discover_fields(template_pdf):
+    available_fields = template_pdf.getFields()
+    if available_fields:
+        print("Available fields:")
+        for fieldname in available_fields:
+            print("    %s" % fieldname)
+    else:
+        print("ERROR: '" + template_name + "' has no text fields.")
+        sys.exit(1)
+
+def fill_in_pdf(template_pdf, field_values, filename):
+    output = PdfFileWriter(filename)
+    output.have_viewer_render_fields()
+    for page_no in range(template_pdf.numPages):
+        template_page = template_pdf.getPage(page_no)
+        output.addPage(template_page)
+        page = output.getPage(page_no)
+        output.updatePageFormFieldValues(page, field_values, read_only=True)
+    output.write()
+    print("Created '%s'" % (filename))
+
+
+def main():
+    template_pdf = PdfFileReader(open(root_folder + template_name, "rb"),
+        strict=False)
+
+    employee_john = {
+        "employee_name": "John Hardworker",
+        "employee_id": "0123",
+        "department": "Human Resources",
+        "manager_name": "Doris Stickler",
+        "manager_id": "0072"
+    }
+    employee_cyndi = {
+        "employee_name": "Cyndi Smartworker",
+        "employee_id": "0199",
+        "department": "Engineering",
+        "manager_name": "Ida Wright",
+        "manager_id": "0051"
+    }
+
+    discover_fields(template_pdf)
+    fill_in_pdf(template_pdf, employee_john, root_folder +
+        "JohnHardworder.pdf")
+    fill_in_pdf(template_pdf, employee_cyndi, root_folder +
+        "CyndiSmartworker.pdf")
+
+if __name__ == "__main__":
+    main()
diff --git a/samplecode/fillable_form.odt b/samplecode/fillable_form.odt
diff --git a/samplecode/fillable_form.pdf b/samplecode/fillable_form.pdf
diff --git a/tests/fixture_data/testUpdatePageFormFieldValues/fillable_form.pdf b/tests/fixture_data/testUpdatePageFormFieldValues/fillable_form.pdf
diff --git a/tests/test_pdf.py b/tests/test_pdf.py
@@ -10,6 +10,7 @@
 import binascii
 from io import BytesIO
 import os
+from os import remove
 from os.path import abspath, basename, dirname, join, pardir
 import sys
 import tempfile
@@ -438,6 +439,75 @@ def testProperties(self):
                 "%s.%s() is not callable" % (PdfFileReader.__name__, m),
             )
 
+    def testHave_viewer_render_fields(self):
+        """
+        Tests that PdfFileWriter.have_viewer_render_fields() adds
+        /AcroForm/NeedAppearances to the catalog.
+        """
+
+        testfile_handle, testfile_name = tempfile.mkstemp()
+        try:
+            with PdfFileReader(join(TEST_DATA_ROOT, "testUpdatePageFormFieldValues/fillable_form.pdf")) as reader:
+                with PdfFileWriter(testfile_name) as writer:
+                    writer.have_viewer_render_fields()
+                    template_page = reader.getPage(0)
+                    writer.addPage(template_page)
+                    writer.write()
+
+            with PdfFileReader(testfile_name) as pdf:
+                catalog = pdf._trailer["/Root"].getObject()
+                self.assertTrue("/AcroForm" in catalog)
+                self.assertTrue("/NeedAppearances" in catalog["/AcroForm"])
+
+        finally:
+            os.close(testfile_handle)
+            os.remove(testfile_name)
+
+    def testUpdatePageFormFieldValues(self):
+        """
+        Tests that PdfFileWriter.updatePageFormFieldValues() populates fields
+        (annotations) with corresponding values.
+        """
+
+        testfile_handle, testfile_name = tempfile.mkstemp()
+
+        field_values = {
+            "employee_name": "John Hardworker",
+            "employee_id": "0123",
+            "department": "Human Resources",
+            "manager_name": "Doris Stickler",
+            "manager_id": "0072"
+        }
+        try:
+            # copy fillable_fields.pdf, filling in the fields along the way
+            with PdfFileReader(join(TEST_DATA_ROOT, "testUpdatePageFormFieldValues/fillable_form.pdf")) as reader:
+                with PdfFileWriter(testfile_name) as writer:
+                    writer.have_viewer_render_fields()
+                    template_page = reader.getPage(0)
+                    writer.addPage(template_page)
+                    page = writer.getPage(0)
+                    writer.updatePageFormFieldValues(page, field_values, read_only=True)
+                    writer.write()
+
+            # check the results by depleating entries from field_values_sought
+            # until it's empty
+            field_values_sought = field_values
+            with PdfFileReader(testfile_name) as pdf:
+                # For caching _cachedObjects data
+                for page_no in range(pdf.numPages):
+                    page = pdf.getPage(0)
+                    for j in range(len(page["/Annots"])):
+                        annotation = page["/Annots"][j].getObject()
+                        if (field := annotation.get("/T")):
+                            if (field_values_sought[field] == annotation.get("/V")
+                                and (annotation.get("/Ff") == 1)):
+                                field_values_sought.pop(field)
+
+            self.assertEqual(len(field_values_sought),0)
+        finally:
+            os.close(testfile_handle)
+            os.remove(testfile_name)
+
     def testAddAttachment(self):
         """
         Tests the addAttachment function for attaching a single file.
@@ -447,12 +517,12 @@ def testAddAttachment(self):
         to check for two entries per attached file.
         """
 
-        _, testfile = tempfile.mkstemp()
+        testfile_handle, testfile_name = tempfile.mkstemp()
 
         try:
             # Make PDF with attachment
             with PdfFileReader(join(TEST_DATA_ROOT, "jpeg.pdf")) as reader:
-                with PdfFileWriter(testfile) as writer:
+                with PdfFileWriter(testfile_name) as writer:
                     writer.appendPagesFromReader(reader)
                     with open(
                         join(  # pylint: disable=bad-continuation
@@ -465,7 +535,7 @@ def testAddAttachment(self):
                     writer.write()
 
             # Check for attachment entries
-            with PdfFileReader(testfile) as pdf:
+            with PdfFileReader(testfile_name) as pdf:
                 # For caching _cachedObjects data
                 pdf.numPages  # pylint: disable=pointless-statement
                 for _k, v in pdf._cachedObjects.items():
@@ -475,7 +545,8 @@ def testAddAttachment(self):
                             real = len(v["/Names"]["/EmbeddedFiles"]["/Names"])
                             self.assertEqual(2, real)
         finally:
-            os.remove(testfile)
+            os.close(testfile_handle)
+            os.remove(testfile_name)
 
     def testAttachFiles(self):
         """
@@ -487,12 +558,12 @@ def testAttachFiles(self):
         """
 
         numAttachments = 3
-        _, testfile = tempfile.mkstemp()
+        testfile_handle, testfile_name = tempfile.mkstemp()
 
         try:
             # Make PDF with attachment
             with PdfFileReader(join(TEST_DATA_ROOT, "jpeg.pdf")) as reader:
-                with PdfFileWriter(testfile) as writer:
+                with PdfFileWriter(testfile_name) as writer:
                     writer.appendPagesFromReader(reader)
 
                     writer.attachFiles(
@@ -501,7 +572,7 @@ def testAttachFiles(self):
                     writer.write()
 
             # Check for attachment entries
-            with PdfFileReader(testfile) as pdf:
+            with PdfFileReader(testfile_name) as pdf:
                 # For caching _cachedObjects data
                 pdf.numPages  # pylint: disable=pointless-statement
                 for _k, v in pdf._cachedObjects.items():
@@ -511,7 +582,8 @@ def testAttachFiles(self):
                             real = len(v["/Names"]["/EmbeddedFiles"]["/Names"])
                             self.assertEqual(numAttachments * 2, real)
         finally:
-            os.remove(testfile)
+            os.close(testfile_handle)
+            os.remove(testfile_name)
 
 
 class AddJsTestCase(unittest.TestCase):