From d7cd2c8dc50a61411451bb9d870c99bb85d24781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20H=C3=A4rtl?= Date: Fri, 8 Jul 2022 19:39:54 +0200 Subject: [PATCH] Issue #291 Fix updateInfo() to accept any updateable data --- src/InfoFile.php | 169 ++++++++++++++++++++++++++++++----- src/Pdf.php | 2 +- tests/InfoFileTest.php | 88 +++++++++++++++++- tests/PdfTest.php | 24 +++++ tests/files/InfoFileTest.txt | 21 +++++ 5 files changed, 281 insertions(+), 23 deletions(-) diff --git a/src/InfoFile.php b/src/InfoFile.php index d07b86c..0325dfa 100644 --- a/src/InfoFile.php +++ b/src/InfoFile.php @@ -1,6 +1,7 @@ value + * @param array|InfoFields $data the data in this format: + * ``` + * [ + * 'Info' => [ + * 'Title' => '...', + * 'Author' => '...', + * 'Subject' => '...', + * 'Keywords' => '...', + * 'Creator' => '...', + * 'Producer' => '...', + * 'CreationDate' => '...', + * 'ModDate' => '...', + * 'Trapped' => '...', + * ], + * 'Bookmark' => [ + * [ + * 'Title' => '...', + * 'Level' => ..., + * 'PageNumber' => ..., + * ], + * ], + * 'PageMedia' => [ ... ], + * 'PageLabel' => [ ... ], + * // ... + * ] + * ``` + * This is the same format as the InfoFields object that is returned + * by `getData()` if you cast it to an array. You can also pass such an + * (optionally modified) object as input. Some fields like 'NumberOfPages' + * or 'PdfID0' are ignored as those are not part of the PDF's metadata. + * All array elements are optional. * @param string|null $suffix the optional suffix for the tmp file - * @param string|null $suffix the optional prefix for the tmp file. If null 'php_tmpfile_' is used. - * @param string|null $directory directory where the file should be created. Autodetected if not provided. - * @param string|null $encoding of the data. Default is 'UTF-8'. + * @param string|null $suffix the optional prefix for the tmp file. If null + * 'php_tmpfile_' is used. + * @param string|null $directory directory where the file should be + * created. Autodetected if not provided. + * @param string|null $encoding of the data. Default is 'UTF-8'. If the + * data has another encoding it will be converted to UTF-8. This requires + * the mbstring extension to be installed. + * @throws Exception on invalid data format or if mbstring extension is + * missing and data must be converted */ public function __construct($data, $suffix = null, $prefix = null, $directory = null, $encoding = 'UTF-8') { + if ($suffix === null) { + $suffix = '.txt'; + } + if ($prefix === null) { + $prefix = 'php_pdftk_info_'; + } if ($directory === null) { $directory = self::getTempDir(); } - $suffix = '.txt'; - $prefix = 'php_pdftk_info_'; - $this->_fileName = tempnam($directory, $prefix); - $newName = $this->_fileName . $suffix; - rename($this->_fileName, $newName); + $tempName = tempnam($directory, $prefix); + $newName = $tempName . $suffix; + rename($tempName, $newName); $this->_fileName = $newName; - if (!function_exists('mb_convert_encoding')) { - throw new \Exception('MB extension required.'); + if ($encoding !== 'UTF-8' && !function_exists('mb_convert_encoding')) { + throw new Exception('mbstring extension required.'); } $fields = ''; - foreach ($data as $key => $value) { - // Always convert to UTF-8 - if ($encoding !== 'UTF-8' && function_exists('mb_convert_encoding')) { - $value = mb_convert_encoding($value, 'UTF-8', $encoding); - $key = mb_convert_encoding($key, 'UTF-8', $encoding); - $value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key); - $key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value); - } - $fields .= "InfoBegin\nInfoKey: $key\nInfoValue: $value\n"; + $normalizedData = self::normalize($data); + + foreach ($normalizedData as $block => $items) { + $fields .= self::renderBlock($block, $items, $encoding); } // Use fwrite, since file_put_contents() messes around with character encoding @@ -58,4 +112,79 @@ public function __construct($data, $suffix = null, $prefix = null, $directory = fwrite($fp, $fields); fclose($fp); } + + /** + * Normalize the input data + * + * This also converts data from the legacy format (<0.13.0) to the new + * input format described in the constructor. + * + * @param array $data the data to normalize + * @return array a normalized array in the format described in the constructor + */ + private static function normalize($data) + { + $normalized = array(); + foreach ($data as $key => $value) { + if (in_array($key, self::$documentInfoFields)) { + $normalized['Info'][$key] = $value; + } elseif (is_array($value)) { + if (!isset($normalized[$key])) { + $normalized[$key] = array(); + } + $normalized[$key] = array_merge($normalized[$key], $value); + } + } + return $normalized; + } + + /** + * Render a set of block fields + * + * @param string $block like 'Info', 'Bookmark', etc. + * @param array $items the field items to render + * @param string $encoding the encoding of the item data + * @return string the rendered fields + */ + private static function renderBlock($block, $items, $encoding) + { + $fields = ''; + foreach ($items as $key => $value) { + if ($block === 'Info') { + $fields .= self::renderField($block, $key, $value, $encoding, true); + } else { + $fields .= "{$block}Begin\n"; + foreach ($value as $subKey => $subValue) { + $fields .= self::renderField($block, $subKey, $subValue, $encoding, false); + } + } + } + return $fields; + } + + /** + * Render a field in a given input block + * + * @param string $prefix the prefix to use for the field + * @param string $key the field key + * @param string $value the field value + * @param string $encoding the endoding of key and value + * @param bool $isInfo whether it's an 'Info' field + * @return string the rendered field + */ + private static function renderField($prefix, $key, $value, $encoding, $isInfo) + { + if ($encoding !== 'UTF-8') { + $value = mb_convert_encoding($value, 'UTF-8', $encoding); + $key = mb_convert_encoding($key, 'UTF-8', $encoding); + $value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key); + $key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value); + } + if ($isInfo) { + return "InfoBegin\nInfoKey: $key\nInfoValue: $value\n"; + } else { + return "{$prefix}{$key}: $value\n"; + } + + } } diff --git a/src/Pdf.php b/src/Pdf.php index 5a83fc8..f50ed8a 100644 --- a/src/Pdf.php +++ b/src/Pdf.php @@ -321,7 +321,7 @@ public function fillForm($data, $encoding = 'UTF-8', $dropXfa = true, $format = public function updateInfo($data, $encoding = 'UTF-8') { $this->constrainSingleFile(); - if (is_array($data)) { + if (is_array($data) || $data instanceof InfoFields) { $data = new InfoFile($data, null, null, $this->tempDir, $encoding); } $this->getCommand() diff --git a/tests/InfoFileTest.php b/tests/InfoFileTest.php index b9236bc..27872af 100644 --- a/tests/InfoFileTest.php +++ b/tests/InfoFileTest.php @@ -7,16 +7,100 @@ class InfoFileTest extends TestCase { public function testInfoFileCreation() + { + $data = array( + 'Info' => array( + 'Creator' => 'php-pdftk', + 'Subject' => 'öäüÖÄÜ', + 'Title' => 'Title x', + ), + 'PdfID0' => '8b93f76a0b28b720d0dee9a6eb2a780a', + 'PdfID1' => '8b93f76a0b28b720d0dee9a6eb2a780a', + 'NumberOfPages' => '5', + 'Bookmark' => array( + array( + 'Title' => 'Title 1', + 'Level' => 1, + 'PageNumber' => 1, + ), + array( + 'Title' => 'Title 2', + 'Level' => 2, + 'PageNumber' => 10, + ), + ), + 'PageMedia' => array( + array( + 'Number' => '1', + 'Rotation' => '0', + 'Rect' => '0 0 595 842', + 'Dimensions' => '595 842' + ), + ), + 'PageLabel' => array( + array( + 'NewIndex' => '1', + 'Start' => '1', + 'Prefix' => 'some name 1', + 'NumStyle' => 'NoNumber', + ), + ), + ); + + $oInfoFile = new InfoFile($data, null, null, __DIR__); + $sInfoFilename = $oInfoFile->getFileName(); + + $this->assertFileExists($sInfoFilename); + $this->assertFileEquals(__DIR__ . '/files/InfoFileTest.txt', $sInfoFilename); + } + + public function testInfoFileCreationFromLegacyFormat() { $data = array( 'Creator' => 'php-pdftk', - 'Subject' => "öäüÖÄÜ", + 'Subject' => 'öäüÖÄÜ', + 'NumberOfPages' => 17, + 'PdfID0' => '8b93f76a0b28b720d0dee9a6eb2a780a', + 'PdfID1' => '8b93f76a0b28b720d0dee9a6eb2a780a', + 'NumberOfPages' => '5', + // Mix-in new format + 'Info' => array( + 'Title' => 'Title x', + ), + 'Bookmark' => array( + array( + 'Title' => 'Title 1', + 'Level' => 1, + 'PageNumber' => 1, + ), + array( + 'Title' => 'Title 2', + 'Level' => 2, + 'PageNumber' => 10, + ), + ), + 'PageMedia' => array( + array( + 'Number' => '1', + 'Rotation' => '0', + 'Rect' => '0 0 595 842', + 'Dimensions' => '595 842' + ), + ), + 'PageLabel' => array( + array( + 'NewIndex' => '1', + 'Start' => '1', + 'Prefix' => 'some name 1', + 'NumStyle' => 'NoNumber', + ), + ), ); $oInfoFile = new InfoFile($data, null, null, __DIR__); $sInfoFilename = $oInfoFile->getFileName(); $this->assertFileExists($sInfoFilename); - $this->assertFileEquals(__DIR__ . "/files/InfoFileTest.txt", $sInfoFilename); + $this->assertFileEquals(__DIR__ . '/files/InfoFileTest.txt', $sInfoFilename); } } diff --git a/tests/PdfTest.php b/tests/PdfTest.php index ce99913..28f515b 100644 --- a/tests/PdfTest.php +++ b/tests/PdfTest.php @@ -606,6 +606,30 @@ public function testCanGetData() $this->assertEquals($this->formDataArray, (array)$data); } + public function testCanUpdateInfoFromDumpedData() + { + $document1 = $this->getDocument1(); + $file = $this->getOutFile(); + + $pdf = new Pdf($document1); + $data = $pdf->getData(); + $this->assertInstanceOf('\mikehaertl\pdftk\InfoFields', $data); + $this->assertEquals($this->formDataArray, (array)$data); + + $data['Info']['Creator'] = 'php-pdftk'; + + $pdf = new Pdf($document1); + $this->assertInstanceOf('mikehaertl\pdftk\Pdf', $pdf->updateInfo($data)); + + $this->assertTrue($pdf->saveAs($file)); + + $this->assertFileExists($file); + + $pdf = new Pdf($file); + $data = $pdf->getData(); + $this->assertEquals('php-pdftk', $data['Info']['Creator']); + } + public function testCanGetDataFields() { $form = $this->getForm(); diff --git a/tests/files/InfoFileTest.txt b/tests/files/InfoFileTest.txt index dfa392b..fa3d7b6 100644 --- a/tests/files/InfoFileTest.txt +++ b/tests/files/InfoFileTest.txt @@ -4,3 +4,24 @@ InfoValue: php-pdftk InfoBegin InfoKey: Subject InfoValue: öäüÖÄÜ +InfoBegin +InfoKey: Title +InfoValue: Title x +BookmarkBegin +BookmarkTitle: Title 1 +BookmarkLevel: 1 +BookmarkPageNumber: 1 +BookmarkBegin +BookmarkTitle: Title 2 +BookmarkLevel: 2 +BookmarkPageNumber: 10 +PageMediaBegin +PageMediaNumber: 1 +PageMediaRotation: 0 +PageMediaRect: 0 0 595 842 +PageMediaDimensions: 595 842 +PageLabelBegin +PageLabelNewIndex: 1 +PageLabelStart: 1 +PageLabelPrefix: some name 1 +PageLabelNumStyle: NoNumber