From b289394fd887f1e3babe676643bd733057d532ff Mon Sep 17 00:00:00 2001 From: "Cimon Lucas (LCM)" Date: Tue, 12 Nov 2024 11:46:58 +0100 Subject: [PATCH] Adding support for reading .metadata.keywords --- pypdf/_doc_common.py | 15 +++++++++++++++ tests/test_reader.py | 2 ++ 2 files changed, 17 insertions(+) diff --git a/pypdf/_doc_common.py b/pypdf/_doc_common.py index 69789d712..ba71e144d 100644 --- a/pypdf/_doc_common.py +++ b/pypdf/_doc_common.py @@ -243,6 +243,21 @@ def modification_date_raw(self) -> Optional[str]: """ return self.get(DI.MOD_DATE) + @property + def keywords(self) -> Optional[str]: + """ + Read-only property accessing the document's keywords. + + Returns a ``TextStringObject`` or ``None`` if keywords are not + specified. + """ + return self._get_text(DI.KEYWORDS) + + @property + def keywords_raw(self) -> Optional[str]: + """The "raw" version of keywords; can return a ``ByteStringObject``.""" + return self.get(DI.KEYWORDS) + class PdfDocCommon: """ diff --git a/tests/test_reader.py b/tests/test_reader.py index b01dc1add..bcc8dcb39 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -107,6 +107,8 @@ def test_read_metadata(pdf_path, expected): docinfo.creation_date_raw docinfo.modification_date docinfo.modification_date_raw + docinfo.keywords + docinfo.keywords_raw if "/Title" in metadict: assert isinstance(docinfo.title, str) assert metadict["/Title"] == docinfo.title