diff --git a/pypdf/_writer.py b/pypdf/_writer.py index c7569e31e..88e280ee5 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -747,13 +747,24 @@ def append_pages_from_reader( if callable(after_page_append): after_page_append(writer_page) - def _update_text_field(self, field: DictionaryObject) -> None: + def _update_field_annotation( + self, field: DictionaryObject, anno: DictionaryObject + ) -> None: # Calculate rectangle dimensions - _rct = cast(RectangleObject, field[AA.Rect]) + _rct = cast(RectangleObject, anno[AA.Rect]) rct = RectangleObject((0, 0, _rct[2] - _rct[0], _rct[3] - _rct[1])) # Extract font information - da = cast(str, field[AA.DA]) + da = anno.get_inherited( + AA.DA, + cast(DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM]).get( + AA.DA, None + ), + ) + if da is None: + da = TextStringObject("/Helv 0 Tf 0 g") + else: + da = da.get_object() font_properties = da.replace("\n", " ").replace("\r", " ").split(" ") font_properties = [x for x in font_properties if x != ""] font_name = font_properties[font_properties.index("Tf") - 2] @@ -767,19 +778,27 @@ def _update_text_field(self, field: DictionaryObject) -> None: # Retrieve font information from local DR ... dr: Any = cast( DictionaryObject, - cast(DictionaryObject, field.get("/DR", DictionaryObject())).get_object(), + cast( + DictionaryObject, + anno.get_inherited( + "/DR", + cast( + DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM] + ).get("/DR", DictionaryObject()), + ), + ).get_object(), ) dr = dr.get("/Font", DictionaryObject()).get_object() if font_name not in dr: # ...or AcroForm dictionary dr = cast( Dict[Any, Any], - cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {}), + cast( + DictionaryObject, self.root_object[CatalogDictionary.ACRO_FORM] + ).get("/DR", {}), ) - if isinstance(dr, IndirectObject): # pragma: no cover - dr = dr.get_object() - dr = dr.get("/Font", DictionaryObject()).get_object() - font_res = dr.get(font_name) + dr = dr.get_object().get("/Font", DictionaryObject()).get_object() + font_res = dr.get(font_name, None) if font_res is not None: font_res = cast(DictionaryObject, font_res.get_object()) font_subtype, _, font_encoding, font_map = build_char_map_from_dict( @@ -806,7 +825,7 @@ def _update_text_field(self, field: DictionaryObject) -> None: # Retrieve field text and selected values field_flags = field.get(FA.Ff, 0) if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0: - txt = "\n".join(field.get(FA.Opt, {})) + txt = "\n".join(anno.get_inherited(FA.Opt, [])) sel = field.get("/V", []) if not isinstance(sel, list): sel = [sel] @@ -822,7 +841,7 @@ def _update_text_field(self, field: DictionaryObject) -> None: # may be improved but can not find how get fill working => replaced with lined box ap_stream += ( f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n" - f"0.5 0.5 0.5 rg s\n{field[AA.DA]}\n" + f"0.5 0.5 0.5 rg s\n{da}\n" ).encode() if line_number == 0: ap_stream += f"2 {y_offset} Td\n".encode() @@ -862,16 +881,16 @@ def _update_text_field(self, field: DictionaryObject) -> None: ) } ) - if AA.AP not in field: - field[NameObject(AA.AP)] = DictionaryObject( + if AA.AP not in anno: + anno[NameObject(AA.AP)] = DictionaryObject( {NameObject("/N"): self._add_object(dct)} ) - elif "/N" not in cast(DictionaryObject, field[AA.AP]): - cast(DictionaryObject, field[NameObject(AA.AP)])[ + elif "/N" not in cast(DictionaryObject, anno[AA.AP]): + cast(DictionaryObject, anno[NameObject(AA.AP)])[ NameObject("/N") ] = self._add_object(dct) else: # [/AP][/N] exists - n = field[AA.AP]["/N"].indirect_reference.idnum # type: ignore + n = anno[AA.AP]["/N"].indirect_reference.idnum # type: ignore self._objects[n - 1] = dct dct.indirect_reference = IndirectObject(n, 0, self) @@ -906,65 +925,51 @@ def update_page_form_field_values( raise PyPdfError("No /Fields dictionary in Pdf in PdfWriter Object") if isinstance(auto_regenerate, bool): self.set_need_appearances_writer(auto_regenerate) - # Iterate through pages, update field values if PG.ANNOTS not in page: logger_warning("No fields to update on this page", __name__) return - # /Helvetica is just in case of but this is normally insufficient as we miss the font resource - default_da = af.get( - InteractiveFormDictEntries.DA, TextStringObject("/Helvetica 0 Tf 0 g") - ) for writer_annot in page[PG.ANNOTS]: # type: ignore writer_annot = cast(DictionaryObject, writer_annot.get_object()) - # retrieve parent field values, if present - writer_parent_annot = writer_annot.get( - PG.PARENT, DictionaryObject() - ).get_object() + if writer_annot.get("/Subtype", "") != "/Widget": + continue + if "/FT" in writer_annot and "/T" in writer_annot: + writer_parent_annot = writer_annot + else: + writer_parent_annot = writer_annot.get( + PG.PARENT, DictionaryObject() + ).get_object() + for field, value in fields.items(): - if ( - writer_annot.get(FA.T) == field - or self._get_qualified_field_name(writer_annot) == field + if not ( + self._get_qualified_field_name(writer_parent_annot) == field + or writer_parent_annot.get("/T", None) == field ): - if isinstance(value, list): - lst = ArrayObject(TextStringObject(v) for v in value) - writer_annot[NameObject(FA.V)] = lst - else: - writer_annot[NameObject(FA.V)] = TextStringObject(value) - if writer_annot.get(FA.FT) in ("/Btn"): - # case of Checkbox button (no /FT found in Radio widgets - writer_annot[NameObject(AA.AS)] = NameObject(value) - elif ( - writer_annot.get(FA.FT) == "/Tx" - or writer_annot.get(FA.FT) == "/Ch" - ): - # textbox - if AA.DA not in writer_annot: - f = writer_annot - da = default_da - while AA.DA not in f: - f = f.get("/Parent") - if f is None: - break - f = f.get_object() - if AA.DA in f: - da = f[AA.DA] - writer_annot[NameObject(AA.DA)] = da - self._update_text_field(writer_annot) - elif writer_annot.get(FA.FT) == "/Sig": - # signature - logger_warning("Signature forms not implemented yet", __name__) - if flags: - writer_annot[NameObject(FA.Ff)] = NumberObject(flags) + continue + if flags: + writer_annot[NameObject(FA.Ff)] = NumberObject(flags) + if isinstance(value, list): + lst = ArrayObject(TextStringObject(v) for v in value) + writer_parent_annot[NameObject(FA.V)] = lst + else: + writer_parent_annot[NameObject(FA.V)] = TextStringObject(value) + if writer_parent_annot.get(FA.FT) in ("/Btn"): + # case of Checkbox button (no /FT found in Radio widgets + v = NameObject(value) + if v not in writer_annot[NameObject(AA.AP)][NameObject("/N")]: + v = NameObject("/Off") + # other cases will be updated through the for loop + writer_annot[NameObject(AA.AS)] = v elif ( - writer_parent_annot.get(FA.T) == field - or self._get_qualified_field_name(writer_parent_annot) == field + writer_parent_annot.get(FA.FT) == "/Tx" + or writer_parent_annot.get(FA.FT) == "/Ch" ): - writer_parent_annot[NameObject(FA.V)] = TextStringObject(value) - for k in writer_parent_annot[NameObject(FA.Kids)]: - k = k.get_object() - k[NameObject(AA.AS)] = NameObject( - value if value in k[AA.AP]["/N"] else "/Off" - ) + # textbox + self._update_field_annotation(writer_parent_annot, writer_annot) + elif ( + writer_annot.get(FA.FT) == "/Sig" + ): # deprecated # not implemented yet + # signature + logger_warning("Signature forms not implemented yet", __name__) def reattach_fields( self, page: Optional[PageObject] = None @@ -2328,7 +2333,7 @@ def merge( Raises: TypeError: The pages attribute is not configured properly """ - if isinstance(fileobj, PdfReader): + if isinstance(fileobj, PdfDocCommon): reader = fileobj else: stream, encryption_obj = self._create_stream(fileobj) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 838336a16..d7d9facc9 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -367,6 +367,30 @@ def _clone( def raw_get(self, key: Any) -> Any: return dict.__getitem__(self, key) + def get_inherited(self, key: str, default: Any = None) -> Any: + """ + Returns the value of a key or from the parent if not found. + If not found returns default. + + Args: + key: string identifying the field to return + + default: default value to return + + Returns: + Current key or inherited one, otherwise default value. + """ + if key in self: + return self[key] + try: + if "/Parent" not in self: + return default + raise KeyError("not present") + except KeyError: + return cast("DictionaryObject", self["/Parent"].get_object()).get_inherited( + key, default + ) + def __setitem__(self, key: Any, value: Any) -> Any: if not isinstance(key, PdfObject): raise ValueError("key must be PdfObject") diff --git a/tests/test_writer.py b/tests/test_writer.py index c4ecd5fec..baf0134e2 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -1499,9 +1499,105 @@ def test_update_form_fields(tmp_path): assert all(x in flds["RadioGroup1"]["/_States_"] for x in ["/1", "/2", "/3"]) assert all(x in flds["Liste1"]["/_States_"] for x in ["Liste1", "Liste2", "Liste3"]) + writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf") + writer.add_annotation( + page_number=0, + annotation=Link(target_page_index=1, rect=RectangleObject([0, 0, 100, 100])), + ) + del writer.root_object["/AcroForm"]["/Fields"][1].get_object()["/DA"] + del writer.root_object["/AcroForm"]["/Fields"][1].get_object()["/DR"]["/Font"] + writer.update_page_form_field_values( + writer.pages[0], + {"Text1": "my Text1", "Text2": "ligne1\nligne2\nligne3"}, + auto_regenerate=False, + ) + assert b"/Helv " in writer.pages[0]["/Annots"][1]["/AP"]["/N"].get_data() + Path(write_data_here).unlink() +@pytest.mark.enable_socket() +def test_update_form_fields2(): + myFiles = { + "test1": { + "name": "Test1 Form", + "url": "https://github.com/py-pdf/pypdf/files/14817365/test1.pdf", + "path": "iss2234a.pdf", + "usage": { + "fields": { + "First Name": "Reed", + "Middle Name": "R", + "MM": "04", + "DD": "21", + "YY": "24", + "Initial": "RRG", + # "I DO NOT Agree": null, + # "Last Name": null + }, + }, + }, + "test2": { + "name": "Test2 Form", + "url": "https://github.com/py-pdf/pypdf/files/14817366/test2.pdf", + "path": "iss2234b.pdf", + "usage": { + "fields": { + "p2 First Name": "Joe", + "p2 Middle Name": "S", + "p2 MM": "03", + "p2 DD": "31", + "p2 YY": "24", + "Initial": "JSS", + # "p2 I DO NOT Agree": "null", + "p2 Last Name": "Smith", + "p3 First Name": "John", + "p3 Middle Name": "R", + "p3 MM": "01", + "p3 DD": "25", + "p3 YY": "21", + }, + }, + }, + } + merger = PdfWriter() + + for file in myFiles: + reader = PdfReader( + BytesIO(get_data_from_url(myFiles[file]["url"], name=myFiles[file]["path"])) + ) + reader.add_form_topname(file) + writer = PdfWriter(clone_from=reader) + + for page in writer.pages: + writer.update_page_form_field_values( + page, myFiles[file]["usage"]["fields"], auto_regenerate=True + ) + merger.append(writer) + assert merger.get_form_text_fields(True) == { + "test1.First Name": "Reed", + "test1.Middle Name": "R", + "test1.MM": "04", + "test1.DD": "21", + "test1.YY": "24", + "test1.Initial": "RRG", + "test1.I DO NOT Agree": None, + "test1.Last Name": None, + "test2.p2 First Name": "Joe", + "test2.p2 Middle Name": "S", + "test2.p2 MM": "03", + "test2.p2 DD": "31", + "test2.p2 YY": "24", + "test2.Initial": "JSS", + "test2.p2 I DO NOT Agree": None, + "test2.p2 Last Name": "Smith", + "test2.p3 First Name": "John", + "test2.p3 Middle Name": "R", + "test2.p3 MM": "01", + "test2.p3 DD": "25", + "test2.p3 YY": "21", + } + + @pytest.mark.enable_socket() def test_iss1862(): # The file here has "/B" entry to define the font in a object below the page