Skip to content

Commit

Permalink
Add unconvertable content to the result
Browse files Browse the repository at this point in the history
There may be legacy Confluence WikiText contents included in the export,
which can not be converted by this tool. In this case, we just want to
add them as raw text to the result.
  • Loading branch information
rvogel committed Jun 3, 2024
1 parent a8f07a3 commit bb2169e
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions src/Converter/ConfluenceConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use DOMElement;
use DOMNode;
use DOMXPath;
use Exception;
use HalloWelt\MediaWiki\Lib\Migration\Converter\PandocHTML;
use HalloWelt\MediaWiki\Lib\Migration\DataBuckets;
use HalloWelt\MediaWiki\Lib\Migration\IOutputAwareInterface;
Expand Down Expand Up @@ -167,7 +168,13 @@ protected function doConvert( SplFileInfo $file ): string {
$this->currentPageTitle = 'not_current_revision_' . $pageId;
}

$dom = $this->preprocessFile();
try {
$dom = $this->preprocessFile();
}
catch ( Exception $e ) {
$rawContent = file_get_contents( $this->rawFile->getPathname() );
return "<-- Unconvertable RAW start-->\n$rawContent\n<-- Unconvertable RAW start-->\n[[Category:Unconvertable]]";
}

$xpath = new DOMXPath( $dom );
$xpath->registerNamespace( 'ac', 'some' );
Expand Down Expand Up @@ -325,7 +332,10 @@ private function preprocessFile() {
$dom->formatOutput = true;
$dom->preserveWhiteSpace = true;
$dom->validateOnParse = false;
$dom->loadXML( $source, LIBXML_PARSEHUGE );
$validXML = $dom->loadXML( $source, LIBXML_PARSEHUGE );
if ( $validXML === false ) {
throw new Exception( 'Unconvertable');
}

$preprocessedPathname = str_replace( '.mraw', '.mprep', $this->rawFile->getPathname() );
$dom->saveHTMLFile( $preprocessedPathname );
Expand Down

0 comments on commit bb2169e

Please sign in to comment.