Skip to content

Commit

Permalink
Merge pull request #297 from mikehaertl/291-fix-update-info
Browse files Browse the repository at this point in the history
Issue #291 Fix updateInfo() to accept any updateable data
  • Loading branch information
mikehaertl authored Dec 21, 2022
2 parents c856d2d + d7cd2c8 commit bfe3d05
Show file tree
Hide file tree
Showing 5 changed files with 281 additions and 23 deletions.
169 changes: 149 additions & 20 deletions src/InfoFile.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<?php
namespace mikehaertl\pdftk;

use Exception;
use mikehaertl\tmp\File;

/**
Expand All @@ -14,48 +15,176 @@
*/
class InfoFile extends File
{
/**
* @var string[] list of valid keys for the document information directory of
* the PDF. These will be converted into `InfoBegin... InfoKey... InvoValue`
* blocks on the output.
*
* See section 14.3.3 in https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf
*/
public static $documentInfoFields = array(
'Title',
'Author',
'Subject',
'Keywords',
'Creator',
'Producer',
'CreationDate',
'ModDate',
'Trapped',
);

/**
* Constructor
*
* @param array $data the form data as name => value
* @param array|InfoFields $data the data in this format:
* ```
* [
* 'Info' => [
* 'Title' => '...',
* 'Author' => '...',
* 'Subject' => '...',
* 'Keywords' => '...',
* 'Creator' => '...',
* 'Producer' => '...',
* 'CreationDate' => '...',
* 'ModDate' => '...',
* 'Trapped' => '...',
* ],
* 'Bookmark' => [
* [
* 'Title' => '...',
* 'Level' => ...,
* 'PageNumber' => ...,
* ],
* ],
* 'PageMedia' => [ ... ],
* 'PageLabel' => [ ... ],
* // ...
* ]
* ```
* This is the same format as the InfoFields object that is returned
* by `getData()` if you cast it to an array. You can also pass such an
* (optionally modified) object as input. Some fields like 'NumberOfPages'
* or 'PdfID0' are ignored as those are not part of the PDF's metadata.
* All array elements are optional.
* @param string|null $suffix the optional suffix for the tmp file
* @param string|null $suffix the optional prefix for the tmp file. If null 'php_tmpfile_' is used.
* @param string|null $directory directory where the file should be created. Autodetected if not provided.
* @param string|null $encoding of the data. Default is 'UTF-8'.
* @param string|null $suffix the optional prefix for the tmp file. If null
* 'php_tmpfile_' is used.
* @param string|null $directory directory where the file should be
* created. Autodetected if not provided.
* @param string|null $encoding of the data. Default is 'UTF-8'. If the
* data has another encoding it will be converted to UTF-8. This requires
* the mbstring extension to be installed.
* @throws Exception on invalid data format or if mbstring extension is
* missing and data must be converted
*/
public function __construct($data, $suffix = null, $prefix = null, $directory = null, $encoding = 'UTF-8')
{
if ($suffix === null) {
$suffix = '.txt';
}
if ($prefix === null) {
$prefix = 'php_pdftk_info_';
}
if ($directory === null) {
$directory = self::getTempDir();
}
$suffix = '.txt';
$prefix = 'php_pdftk_info_';

$this->_fileName = tempnam($directory, $prefix);
$newName = $this->_fileName . $suffix;
rename($this->_fileName, $newName);
$tempName = tempnam($directory, $prefix);
$newName = $tempName . $suffix;
rename($tempName, $newName);
$this->_fileName = $newName;

if (!function_exists('mb_convert_encoding')) {
throw new \Exception('MB extension required.');
if ($encoding !== 'UTF-8' && !function_exists('mb_convert_encoding')) {
throw new Exception('mbstring extension required.');
}

$fields = '';
foreach ($data as $key => $value) {
// Always convert to UTF-8
if ($encoding !== 'UTF-8' && function_exists('mb_convert_encoding')) {
$value = mb_convert_encoding($value, 'UTF-8', $encoding);
$key = mb_convert_encoding($key, 'UTF-8', $encoding);
$value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key);
$key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value);
}
$fields .= "InfoBegin\nInfoKey: $key\nInfoValue: $value\n";
$normalizedData = self::normalize($data);

foreach ($normalizedData as $block => $items) {
$fields .= self::renderBlock($block, $items, $encoding);
}

// Use fwrite, since file_put_contents() messes around with character encoding
$fp = fopen($this->_fileName, 'w');
fwrite($fp, $fields);
fclose($fp);
}

/**
* Normalize the input data
*
* This also converts data from the legacy format (<0.13.0) to the new
* input format described in the constructor.
*
* @param array $data the data to normalize
* @return array a normalized array in the format described in the constructor
*/
private static function normalize($data)
{
$normalized = array();
foreach ($data as $key => $value) {
if (in_array($key, self::$documentInfoFields)) {
$normalized['Info'][$key] = $value;
} elseif (is_array($value)) {
if (!isset($normalized[$key])) {
$normalized[$key] = array();
}
$normalized[$key] = array_merge($normalized[$key], $value);
}
}
return $normalized;
}

/**
* Render a set of block fields
*
* @param string $block like 'Info', 'Bookmark', etc.
* @param array $items the field items to render
* @param string $encoding the encoding of the item data
* @return string the rendered fields
*/
private static function renderBlock($block, $items, $encoding)
{
$fields = '';
foreach ($items as $key => $value) {
if ($block === 'Info') {
$fields .= self::renderField($block, $key, $value, $encoding, true);
} else {
$fields .= "{$block}Begin\n";
foreach ($value as $subKey => $subValue) {
$fields .= self::renderField($block, $subKey, $subValue, $encoding, false);
}
}
}
return $fields;
}

/**
* Render a field in a given input block
*
* @param string $prefix the prefix to use for the field
* @param string $key the field key
* @param string $value the field value
* @param string $encoding the endoding of key and value
* @param bool $isInfo whether it's an 'Info' field
* @return string the rendered field
*/
private static function renderField($prefix, $key, $value, $encoding, $isInfo)
{
if ($encoding !== 'UTF-8') {
$value = mb_convert_encoding($value, 'UTF-8', $encoding);
$key = mb_convert_encoding($key, 'UTF-8', $encoding);
$value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key);
$key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value);
}
if ($isInfo) {
return "InfoBegin\nInfoKey: $key\nInfoValue: $value\n";
} else {
return "{$prefix}{$key}: $value\n";
}

}
}
2 changes: 1 addition & 1 deletion src/Pdf.php
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ public function fillForm($data, $encoding = 'UTF-8', $dropXfa = true, $format =
public function updateInfo($data, $encoding = 'UTF-8')
{
$this->constrainSingleFile();
if (is_array($data)) {
if (is_array($data) || $data instanceof InfoFields) {
$data = new InfoFile($data, null, null, $this->tempDir, $encoding);
}
$this->getCommand()
Expand Down
88 changes: 86 additions & 2 deletions tests/InfoFileTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,100 @@
class InfoFileTest extends TestCase
{
public function testInfoFileCreation()
{
$data = array(
'Info' => array(
'Creator' => 'php-pdftk',
'Subject' => 'öäüÖÄÜ',
'Title' => 'Title x',
),
'PdfID0' => '8b93f76a0b28b720d0dee9a6eb2a780a',
'PdfID1' => '8b93f76a0b28b720d0dee9a6eb2a780a',
'NumberOfPages' => '5',
'Bookmark' => array(
array(
'Title' => 'Title 1',
'Level' => 1,
'PageNumber' => 1,
),
array(
'Title' => 'Title 2',
'Level' => 2,
'PageNumber' => 10,
),
),
'PageMedia' => array(
array(
'Number' => '1',
'Rotation' => '0',
'Rect' => '0 0 595 842',
'Dimensions' => '595 842'
),
),
'PageLabel' => array(
array(
'NewIndex' => '1',
'Start' => '1',
'Prefix' => 'some name 1',
'NumStyle' => 'NoNumber',
),
),
);

$oInfoFile = new InfoFile($data, null, null, __DIR__);
$sInfoFilename = $oInfoFile->getFileName();

$this->assertFileExists($sInfoFilename);
$this->assertFileEquals(__DIR__ . '/files/InfoFileTest.txt', $sInfoFilename);
}

public function testInfoFileCreationFromLegacyFormat()
{
$data = array(
'Creator' => 'php-pdftk',
'Subject' => "öäüÖÄÜ",
'Subject' => 'öäüÖÄÜ',
'NumberOfPages' => 17,
'PdfID0' => '8b93f76a0b28b720d0dee9a6eb2a780a',
'PdfID1' => '8b93f76a0b28b720d0dee9a6eb2a780a',
'NumberOfPages' => '5',
// Mix-in new format
'Info' => array(
'Title' => 'Title x',
),
'Bookmark' => array(
array(
'Title' => 'Title 1',
'Level' => 1,
'PageNumber' => 1,
),
array(
'Title' => 'Title 2',
'Level' => 2,
'PageNumber' => 10,
),
),
'PageMedia' => array(
array(
'Number' => '1',
'Rotation' => '0',
'Rect' => '0 0 595 842',
'Dimensions' => '595 842'
),
),
'PageLabel' => array(
array(
'NewIndex' => '1',
'Start' => '1',
'Prefix' => 'some name 1',
'NumStyle' => 'NoNumber',
),
),
);

$oInfoFile = new InfoFile($data, null, null, __DIR__);
$sInfoFilename = $oInfoFile->getFileName();

$this->assertFileExists($sInfoFilename);
$this->assertFileEquals(__DIR__ . "/files/InfoFileTest.txt", $sInfoFilename);
$this->assertFileEquals(__DIR__ . '/files/InfoFileTest.txt', $sInfoFilename);
}
}
24 changes: 24 additions & 0 deletions tests/PdfTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,30 @@ public function testCanGetData()
$this->assertEquals($this->formDataArray, (array)$data);
}

public function testCanUpdateInfoFromDumpedData()
{
$document1 = $this->getDocument1();
$file = $this->getOutFile();

$pdf = new Pdf($document1);
$data = $pdf->getData();
$this->assertInstanceOf('\mikehaertl\pdftk\InfoFields', $data);
$this->assertEquals($this->formDataArray, (array)$data);

$data['Info']['Creator'] = 'php-pdftk';

$pdf = new Pdf($document1);
$this->assertInstanceOf('mikehaertl\pdftk\Pdf', $pdf->updateInfo($data));

$this->assertTrue($pdf->saveAs($file));

$this->assertFileExists($file);

$pdf = new Pdf($file);
$data = $pdf->getData();
$this->assertEquals('php-pdftk', $data['Info']['Creator']);
}

public function testCanGetDataFields()
{
$form = $this->getForm();
Expand Down
21 changes: 21 additions & 0 deletions tests/files/InfoFileTest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,24 @@ InfoValue: php-pdftk
InfoBegin
InfoKey: Subject
InfoValue: öäüÖÄÜ
InfoBegin
InfoKey: Title
InfoValue: Title x
BookmarkBegin
BookmarkTitle: Title 1
BookmarkLevel: 1
BookmarkPageNumber: 1
BookmarkBegin
BookmarkTitle: Title 2
BookmarkLevel: 2
BookmarkPageNumber: 10
PageMediaBegin
PageMediaNumber: 1
PageMediaRotation: 0
PageMediaRect: 0 0 595 842
PageMediaDimensions: 595 842
PageLabelBegin
PageLabelNewIndex: 1
PageLabelStart: 1
PageLabelPrefix: some name 1
PageLabelNumStyle: NoNumber

0 comments on commit bfe3d05

Please sign in to comment.