Skip to content

Commit

Permalink
[MAINTENANCE] Complete tests for MetsDocument (kitodo#919)
Browse files Browse the repository at this point in the history
  • Loading branch information
oliver-stoehr authored Oct 27, 2023
1 parent 8b7880e commit fe70610
Show file tree
Hide file tree
Showing 4 changed files with 259 additions and 0 deletions.
66 changes: 66 additions & 0 deletions Tests/Fixtures/MetsDocument/fulltext_0003.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<alto:alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:alto="http://www.loc.gov/standards/alto/ns-v2#">
<alto:Description>
<alto:MeasurementUnit>pixel</alto:MeasurementUnit>
<alto:OCRProcessing ID="IdOcr">
<alto:ocrProcessingStep>
<alto:processingDateTime>2020-05-14</alto:processingDateTime>
<alto:processingSoftware>
<alto:softwareCreator>ABBYY</alto:softwareCreator>
<alto:softwareName>ABBYY FineReader Engine</alto:softwareName>
<alto:softwareVersion>12</alto:softwareVersion>
</alto:processingSoftware>
</alto:ocrProcessingStep>
</alto:OCRProcessing>
</alto:Description>
<alto:Styles> </alto:Styles>
<alto:Layout>
<alto:Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801">
<alto:PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0">
<alto:Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/>
<alto:TextBlock ID="Page1_Block2" HEIGHT="241" WIDTH="1064" VPOS="2068" HPOS="470" language="de">
<alto:Shape>
<alto:Polygon POINTS="1506,2068 1533,2068 1533,2283 1534,2283 1534,2306 1509,2306 1509,2307 1104,2307 1104,2308 700,2308 700,2309 471,2309 471,2286 470,2286 470,2071 697,2071 697,2070 1101,2070 1101,2069 1506,2069 1506,2068"/>
</alto:Shape>
<alto:TextLine HEIGHT="102" WIDTH="628" VPOS="2076" HPOS="477">
<alto:String WC="0.79777777194976807" CONTENT="Bürgertum" HEIGHT="95" WIDTH="437" VPOS="2083" HPOS="477"/>
<alto:SP WIDTH="34" VPOS="2107" HPOS="915"/>
<alto:String WC="0.66333335638046265" CONTENT="und" HEIGHT="76" WIDTH="155" VPOS="2076" HPOS="950"/>
</alto:TextLine>
<alto:TextLine HEIGHT="104" WIDTH="1051" VPOS="2199" HPOS="477">
<alto:String WC="0.83142858743667603" CONTENT="Bürgerlichkeit" HEIGHT="102" WIDTH="574" VPOS="2201" HPOS="477"/>
<alto:SP WIDTH="32" VPOS="2206" HPOS="1051"/>
<alto:String WC="1." CONTENT="in" HEIGHT="68" WIDTH="74" VPOS="2205" HPOS="1084"/>
<alto:SP WIDTH="34" VPOS="2204" HPOS="1159"/>
<alto:String WC="0.8028571605682373" CONTENT="Dresden" HEIGHT="75" WIDTH="333" VPOS="2199" HPOS="1194"/>
</alto:TextLine>
</alto:TextBlock>
<alto:TextBlock ID="Page1_Block3" HEIGHT="290" WIDTH="775" VPOS="307" HPOS="466" language="de">
<alto:Shape>
<alto:Polygon POINTS="1101,307 1241,307 1241,595 1104,595 1104,596 700,596 700,597 466,597 466,309 697,309 697,308 1101,308 1101,307"/>
</alto:Shape>
<alto:TextLine HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473">
<alto:String WC="0.75625002384185791" CONTENT="DRESDNER" HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473"/>
</alto:TextLine>
<alto:TextLine HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473">
<alto:String WC="0.70399999618530273" CONTENT="HEFTE" HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473"/>
</alto:TextLine>
</alto:TextBlock>
<alto:GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/>
<alto:GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/>
<alto:GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/>
<alto:GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/>
<alto:Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544">
<alto:Shape>
<alto:Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/>
</alto:Shape>
</alto:Illustration>
<alto:Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/>
</alto:PrintSpace>
</alto:Page>
</alto:Layout>
</alto:alto>
73 changes: 73 additions & 0 deletions Tests/Fixtures/MetsDocument/mets_with_pages.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/standards/mets/mets.xsd">
<mets:fileSec>
<mets:fileGrp USE="FULLTEXT">
<mets:file ID="FULLTEXT_0001" MIMETYPE="application/pdf">
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/fulltext_0001.xml"/>
</mets:file>
<mets:file ID="FULLTEXT_0002" MIMETYPE="application/pdf">
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/fulltext_002.xml"/>
</mets:file>
<mets:file ID="FULLTEXT_0003" MIMETYPE="application/pdf">
<mets:FLocat LOCTYPE="URL" xlink:href="http://web:8001/Tests/Fixtures/MetsDocument/fulltext_0003.xml"/>
</mets:file>
<mets:file ID="FULLTEXT_0004" MIMETYPE="application/pdf">
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/fulltext_0003.xml"/>
</mets:file>
</mets:fileGrp>
<mets:fileGrp USE="DEFAULT">
<mets:file ID="FILE_0001" MIMETYPE="image/png">
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/image/0001.png"/>
</mets:file>
<mets:file ID="FILE_0002" MIMETYPE="image/png">
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/image/0002.png"/>
</mets:file>
<mets:file ID="FILE_0003" MIMETYPE="image/png">
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/image/0003.png"/>
</mets:file>
<mets:file ID="FILE_0004" MIMETYPE="image/png">
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/image/0004.png"/>
</mets:file>
</mets:fileGrp>
</mets:fileSec>
<mets:structMap TYPE="LOGICAL">
<mets:div ID="LOG_0000" LABEL="Example Book" TYPE="manuscript">
<mets:div ID="LOG_0001" LABEL="First Chapter" TYPE="chapter"/>
<mets:div ID="LOG_0002" LABEL="Second Chapter" TYPE="chapter"/>
<mets:div ID="LOG_0003" LABEL="Third Chapter" TYPE="chapter"/>
</mets:div>
</mets:structMap>
<mets:structMap TYPE="PHYSICAL">
<mets:div ID="PHYS_0000" TYPE="physSequence">
<mets:div ID="PHYS_0001" ORDER="1" TYPE="page">
<mets:fptr FILEID="FILE_0001"/>
<mets:fptr FILEID="FULLTEXT_0001"/>
</mets:div>
<mets:div ID="PHYS_0002" ORDER="2" TYPE="page">
<mets:fptr FILEID="FILE_0002"/>
<mets:fptr FILEID="FULLTEXT_0002"/>
</mets:div>
<mets:div ID="PHYS_0003" ORDER="3" TYPE="page">
<mets:fptr FILEID="FILE_0003"/>
<mets:fptr FILEID="FULLTEXT_0003"/>
</mets:div>
<mets:div ID="PHYS_0004" ORDER="4" TYPE="page">
<mets:fptr FILEID="FILE_0004"/>
<mets:fptr FILEID="FULLTEXT_0004"/>
</mets:div>
</mets:div>
</mets:structMap>
<mets:structLink>
<mets:smLink xlink:from="LOG_0000" xlink:to="PHYS_0001"/>
<mets:smLink xlink:from="LOG_0000" xlink:to="PHYS_0002"/>
<mets:smLink xlink:from="LOG_0000" xlink:to="PHYS_0003"/>
<mets:smLink xlink:from="LOG_0000" xlink:to="PHYS_0004"/>
<mets:smLink xlink:from="LOG_0001" xlink:to="PHYS_0001"/>
<mets:smLink xlink:from="LOG_0002" xlink:to="PHYS_0002"/>
<mets:smLink xlink:from="LOG_0003" xlink:to="PHYS_0003"/>
<mets:smLink xlink:from="LOG_0004" xlink:to="PHYS_0004"/>
</mets:structLink>
</mets:mets>
5 changes: 5 additions & 0 deletions Tests/Fixtures/MetsDocument/two_dmdsec.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@
<mets:FLocat LOCTYPE="URL" xlink:href="https://digital.slub-dresden.de/data/kitodo/1703800435/thumbnail.jpg"/>
</mets:file>
</mets:fileGrp>
<mets:fileGrp USE="DOWNLOAD">
<mets:file ID="FILE_0000_DOWNLOAD" MIMETYPE="application/vnd.netfpx">
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/download"/>
</mets:file>
</mets:fileGrp>
</mets:fileSec>
<mets:structMap TYPE="LOGICAL">
<mets:div ADMID="AMD" DMDID="DMDLOG_0000 DMDLOG_0000b" ID="LOG_0000" LABEL="With (two) DMD and AMD"></mets:div>
Expand Down
115 changes: 115 additions & 0 deletions Tests/Functional/Common/MetsDocumentTest.php
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
<?php
/**
* (c) Kitodo. Key to digital objects e.V. <[email protected]>
*
* This file is part of the Kitodo and TYPO3 projects.
*
* @license GNU General Public License version 3 or later.
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*/

namespace Kitodo\Dlf\Tests\Functional\Common;

Expand All @@ -11,6 +20,7 @@ public function setUp(): void
{
parent::setUp();

$this->importDataSet(__DIR__ . '/../../Fixtures/Common/documents_1.xml');
$this->importDataSet(__DIR__ . '/../../Fixtures/Common/metadata.xml');
$this->importDataSet(__DIR__ . '/../../Fixtures/MetsDocument/metadata_mets.xml');
}
Expand Down Expand Up @@ -128,4 +138,109 @@ public function returnsEmptyMetadataWhenNoDmdSec()
$metadata = $doc->getMetadata('LOG_0002', 20000);
$this->assertEquals([], $metadata);
}

/**
* @test
*/
public function canGetDownloadLocation()
{
$doc = $this->doc('two_dmdsec.xml');

$correct = $doc->getDownloadLocation('FILE_0000_DOWNLOAD');
$this->assertEquals('https://example.com/download?&CVT=jpeg', $correct);

/*
* The method `getDownloadLocation` should return a string, but returns null in some cases.
* Therefor, a TypeError must be expected here.
*/
$this->expectException('TypeError');
$doc->getDownloadLocation('ID_DOES_NOT_EXIST');
}


/**
* @test
*/
public function canGetFileLocation()
{
$doc = $this->doc('two_dmdsec.xml');

$correct = $doc->getFileLocation('FILE_0000_DEFAULT');
$this->assertEquals('https://digital.slub-dresden.de/data/kitodo/1703800435/video.mov', $correct);

$incorrect = $doc->getFileLocation('ID_DOES_NOT_EXIST');
$this->assertEquals('', $incorrect);
}

/**
* @test
*/
public function canGetFileMimeType()
{
$doc = $this->doc('two_dmdsec.xml');

$correct = $doc->getFileMimeType('FILE_0000_DEFAULT');
$this->assertEquals('video/quicktime', $correct);

$incorrect = $doc->getFileMimeType('ID_DOES_NOT_EXIST');
$this->assertEquals('', $incorrect);
}

// FIXME: Method getPhysicalPage does not work as expected
/**
* @test
*/
public function canGetPhysicalPage()
{
$doc = $this->doc('mets_with_pages.xml');

// pass orderlabel and retrieve order
$physicalPage = $doc->getPhysicalPage('1');
$this->assertEquals(1, $physicalPage);
}

/**
* @test
*/
public function canGetTitle()
{
$doc = $this->doc('mets_with_pages.xml');

$correct = $doc->getTitle(1001);
$this->assertEquals('10 Keyboard pieces - Go. S. 658', $correct);

$incorrect = $doc->getTitle(1234);
$this->assertEquals('', $incorrect);
}

/**
* @test
*/
public function canGetFullText()
{
$doc = $this->doc('mets_with_pages.xml');

$fulltext = $doc->getFullText('PHYS_0003');
$expected = '<?xml version="1.0"?>
<ocr><b/><b/></ocr>
';
$this->assertEquals($expected, $fulltext);

$incorrect = $doc->getFullText('ID_DOES_NOT_EXIST');
$this->assertEquals('', $incorrect);
}

/**
* @test
*/
public function canGetStructureDepth()
{
$doc = $this->doc('mets_with_pages.xml');

$correct = $doc->getStructureDepth('LOG_0001');
$this->assertEquals(3, $correct);

$incorrect = $doc->getStructureDepth('ID_DOES_NOT_EXIST');
$this->assertEquals(0, $incorrect);
}
}

0 comments on commit fe70610

Please sign in to comment.