Skip to content

Commit

Permalink
BUG: Fix fields update where annotations are kids of field (#2570)
Browse files Browse the repository at this point in the history
Closes #2234.
Closes #2512.

Replaces  #2333.
  • Loading branch information
pubpub-zz authored Apr 2, 2024
1 parent 987cfc9 commit 50bb7f2
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 67 deletions.
139 changes: 72 additions & 67 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,13 +747,24 @@ def append_pages_from_reader(
if callable(after_page_append):
after_page_append(writer_page)

def _update_text_field(self, field: DictionaryObject) -> None:
def _update_field_annotation(
self, field: DictionaryObject, anno: DictionaryObject
) -> None:
# Calculate rectangle dimensions
_rct = cast(RectangleObject, field[AA.Rect])
_rct = cast(RectangleObject, anno[AA.Rect])
rct = RectangleObject((0, 0, _rct[2] - _rct[0], _rct[3] - _rct[1]))

# Extract font information
da = cast(str, field[AA.DA])
da = anno.get_inherited(
AA.DA,
cast(DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]).get(
AA.DA, None
),
)
if da is None:
da = TextStringObject("/Helv 0 Tf 0 g")
else:
da = da.get_object()
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
font_properties = [x for x in font_properties if x != ""]
font_name = font_properties[font_properties.index("Tf") - 2]
Expand All @@ -767,19 +778,27 @@ def _update_text_field(self, field: DictionaryObject) -> None:
# Retrieve font information from local DR ...
dr: Any = cast(
DictionaryObject,
cast(DictionaryObject, field.get("/DR", DictionaryObject())).get_object(),
cast(
DictionaryObject,
anno.get_inherited(
"/DR",
cast(
DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
).get("/DR", DictionaryObject()),
),
).get_object(),
)
dr = dr.get("/Font", DictionaryObject()).get_object()
if font_name not in dr:
# ...or AcroForm dictionary
dr = cast(
Dict[Any, Any],
cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {}),
cast(
DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]
).get("/DR", {}),
)
if isinstance(dr, IndirectObject): # pragma: no cover
dr = dr.get_object()
dr = dr.get("/Font", DictionaryObject()).get_object()
font_res = dr.get(font_name)
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
font_res = dr.get(font_name, None)
if font_res is not None:
font_res = cast(DictionaryObject, font_res.get_object())
font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
Expand All @@ -806,7 +825,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
# Retrieve field text and selected values
field_flags = field.get(FA.Ff, 0)
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
txt = "\n".join(field.get(FA.Opt, {}))
txt = "\n".join(anno.get_inherited(FA.Opt, []))
sel = field.get("/V", [])
if not isinstance(sel, list):
sel = [sel]
Expand All @@ -822,7 +841,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
# may be improved but can not find how get fill working => replaced with lined box
ap_stream += (
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
f"0.5 0.5 0.5 rg s\n{field[AA.DA]}\n"
f"0.5 0.5 0.5 rg s\n{da}\n"
).encode()
if line_number == 0:
ap_stream += f"2 {y_offset} Td\n".encode()
Expand Down Expand Up @@ -862,16 +881,16 @@ def _update_text_field(self, field: DictionaryObject) -> None:
)
}
)
if AA.AP not in field:
field[NameObject(AA.AP)] = DictionaryObject(
if AA.AP not in anno:
anno[NameObject(AA.AP)] = DictionaryObject(
{NameObject("/N"): self._add_object(dct)}
)
elif "/N" not in cast(DictionaryObject, field[AA.AP]):
cast(DictionaryObject, field[NameObject(AA.AP)])[
elif "/N" not in cast(DictionaryObject, anno[AA.AP]):
cast(DictionaryObject, anno[NameObject(AA.AP)])[
NameObject("/N")
] = self._add_object(dct)
else: # [/AP][/N] exists
n = field[AA.AP]["/N"].indirect_reference.idnum # type: ignore
n = anno[AA.AP]["/N"].indirect_reference.idnum # type: ignore
self._objects[n - 1] = dct
dct.indirect_reference = IndirectObject(n, 0, self)

Expand Down Expand Up @@ -906,65 +925,51 @@ def update_page_form_field_values(
raise PyPdfError("No /Fields dictionary in Pdf in PdfWriter Object")
if isinstance(auto_regenerate, bool):
self.set_need_appearances_writer(auto_regenerate)
# Iterate through pages, update field values
if PG.ANNOTS not in page:
logger_warning("No fields to update on this page", __name__)
return
# /Helvetica is just in case of but this is normally insufficient as we miss the font resource
default_da = af.get(
InteractiveFormDictEntries.DA, TextStringObject("/Helvetica 0 Tf 0 g")
)
for writer_annot in page[PG.ANNOTS]: # type: ignore
writer_annot = cast(DictionaryObject, writer_annot.get_object())
# retrieve parent field values, if present
writer_parent_annot = writer_annot.get(
PG.PARENT, DictionaryObject()
).get_object()
if writer_annot.get("/Subtype", "") != "/Widget":
continue
if "/FT" in writer_annot and "/T" in writer_annot:
writer_parent_annot = writer_annot
else:
writer_parent_annot = writer_annot.get(
PG.PARENT, DictionaryObject()
).get_object()

for field, value in fields.items():
if (
writer_annot.get(FA.T) == field
or self._get_qualified_field_name(writer_annot) == field
if not (
self._get_qualified_field_name(writer_parent_annot) == field
or writer_parent_annot.get("/T", None) == field
):
if isinstance(value, list):
lst = ArrayObject(TextStringObject(v) for v in value)
writer_annot[NameObject(FA.V)] = lst
else:
writer_annot[NameObject(FA.V)] = TextStringObject(value)
if writer_annot.get(FA.FT) in ("/Btn"):
# case of Checkbox button (no /FT found in Radio widgets
writer_annot[NameObject(AA.AS)] = NameObject(value)
elif (
writer_annot.get(FA.FT) == "/Tx"
or writer_annot.get(FA.FT) == "/Ch"
):
# textbox
if AA.DA not in writer_annot:
f = writer_annot
da = default_da
while AA.DA not in f:
f = f.get("/Parent")
if f is None:
break
f = f.get_object()
if AA.DA in f:
da = f[AA.DA]
writer_annot[NameObject(AA.DA)] = da
self._update_text_field(writer_annot)
elif writer_annot.get(FA.FT) == "/Sig":
# signature
logger_warning("Signature forms not implemented yet", __name__)
if flags:
writer_annot[NameObject(FA.Ff)] = NumberObject(flags)
continue
if flags:
writer_annot[NameObject(FA.Ff)] = NumberObject(flags)
if isinstance(value, list):
lst = ArrayObject(TextStringObject(v) for v in value)
writer_parent_annot[NameObject(FA.V)] = lst
else:
writer_parent_annot[NameObject(FA.V)] = TextStringObject(value)
if writer_parent_annot.get(FA.FT) in ("/Btn"):
# case of Checkbox button (no /FT found in Radio widgets
v = NameObject(value)
if v not in writer_annot[NameObject(AA.AP)][NameObject("/N")]:
v = NameObject("/Off")
# other cases will be updated through the for loop
writer_annot[NameObject(AA.AS)] = v
elif (
writer_parent_annot.get(FA.T) == field
or self._get_qualified_field_name(writer_parent_annot) == field
writer_parent_annot.get(FA.FT) == "/Tx"
or writer_parent_annot.get(FA.FT) == "/Ch"
):
writer_parent_annot[NameObject(FA.V)] = TextStringObject(value)
for k in writer_parent_annot[NameObject(FA.Kids)]:
k = k.get_object()
k[NameObject(AA.AS)] = NameObject(
value if value in k[AA.AP]["/N"] else "/Off"
)
# textbox
self._update_field_annotation(writer_parent_annot, writer_annot)
elif (
writer_annot.get(FA.FT) == "/Sig"
): # deprecated # not implemented yet
# signature
logger_warning("Signature forms not implemented yet", __name__)

def reattach_fields(
self, page: Optional[PageObject] = None
Expand Down Expand Up @@ -2328,7 +2333,7 @@ def merge(
Raises:
TypeError: The pages attribute is not configured properly
"""
if isinstance(fileobj, PdfReader):
if isinstance(fileobj, PdfDocCommon):
reader = fileobj
else:
stream, encryption_obj = self._create_stream(fileobj)
Expand Down
24 changes: 24 additions & 0 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,30 @@ def _clone(
def raw_get(self, key: Any) -> Any:
return dict.__getitem__(self, key)

def get_inherited(self, key: str, default: Any = None) -> Any:
"""
Returns the value of a key or from the parent if not found.
If not found returns default.
Args:
key: string identifying the field to return
default: default value to return
Returns:
Current key or inherited one, otherwise default value.
"""
if key in self:
return self[key]
try:
if "/Parent" not in self:
return default
raise KeyError("not present")
except KeyError:
return cast("DictionaryObject", self["/Parent"].get_object()).get_inherited(
key, default
)

def __setitem__(self, key: Any, value: Any) -> Any:
if not isinstance(key, PdfObject):
raise ValueError("key must be PdfObject")
Expand Down
96 changes: 96 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1499,9 +1499,105 @@ def test_update_form_fields(tmp_path):
assert all(x in flds["RadioGroup1"]["/_States_"] for x in ["/1", "/2", "/3"])
assert all(x in flds["Liste1"]["/_States_"] for x in ["Liste1", "Liste2", "Liste3"])

writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf")
writer.add_annotation(
page_number=0,
annotation=Link(target_page_index=1, rect=RectangleObject([0, 0, 100, 100])),
)
del writer.root_object["/AcroForm"]["/Fields"][1].get_object()["/DA"]
del writer.root_object["/AcroForm"]["/Fields"][1].get_object()["/DR"]["/Font"]
writer.update_page_form_field_values(
writer.pages[0],
{"Text1": "my Text1", "Text2": "ligne1\nligne2\nligne3"},
auto_regenerate=False,
)
assert b"/Helv " in writer.pages[0]["/Annots"][1]["/AP"]["/N"].get_data()

Path(write_data_here).unlink()


@pytest.mark.enable_socket()
def test_update_form_fields2():
myFiles = {
"test1": {
"name": "Test1 Form",
"url": "https://github.com/py-pdf/pypdf/files/14817365/test1.pdf",
"path": "iss2234a.pdf",
"usage": {
"fields": {
"First Name": "Reed",
"Middle Name": "R",
"MM": "04",
"DD": "21",
"YY": "24",
"Initial": "RRG",
# "I DO NOT Agree": null,
# "Last Name": null
},
},
},
"test2": {
"name": "Test2 Form",
"url": "https://github.com/py-pdf/pypdf/files/14817366/test2.pdf",
"path": "iss2234b.pdf",
"usage": {
"fields": {
"p2 First Name": "Joe",
"p2 Middle Name": "S",
"p2 MM": "03",
"p2 DD": "31",
"p2 YY": "24",
"Initial": "JSS",
# "p2 I DO NOT Agree": "null",
"p2 Last Name": "Smith",
"p3 First Name": "John",
"p3 Middle Name": "R",
"p3 MM": "01",
"p3 DD": "25",
"p3 YY": "21",
},
},
},
}
merger = PdfWriter()

for file in myFiles:
reader = PdfReader(
BytesIO(get_data_from_url(myFiles[file]["url"], name=myFiles[file]["path"]))
)
reader.add_form_topname(file)
writer = PdfWriter(clone_from=reader)

for page in writer.pages:
writer.update_page_form_field_values(
page, myFiles[file]["usage"]["fields"], auto_regenerate=True
)
merger.append(writer)
assert merger.get_form_text_fields(True) == {
"test1.First Name": "Reed",
"test1.Middle Name": "R",
"test1.MM": "04",
"test1.DD": "21",
"test1.YY": "24",
"test1.Initial": "RRG",
"test1.I DO NOT Agree": None,
"test1.Last Name": None,
"test2.p2 First Name": "Joe",
"test2.p2 Middle Name": "S",
"test2.p2 MM": "03",
"test2.p2 DD": "31",
"test2.p2 YY": "24",
"test2.Initial": "JSS",
"test2.p2 I DO NOT Agree": None,
"test2.p2 Last Name": "Smith",
"test2.p3 First Name": "John",
"test2.p3 Middle Name": "R",
"test2.p3 MM": "01",
"test2.p3 DD": "25",
"test2.p3 YY": "21",
}


@pytest.mark.enable_socket()
def test_iss1862():
# The file here has "/B" entry to define the font in a object below the page
Expand Down

0 comments on commit 50bb7f2

Please sign in to comment.