Skip to content

Commit

Permalink
TriGParserIterator class added providing a lazy iterator wrapper for …
Browse files Browse the repository at this point in the history
…TriGParser (#36)

* TriGParserIterator class added providing a lazy iterator wrapper for the TriGParser.

* mixed type hint removed (as it is incompatible with PHP < 8)
  • Loading branch information
zozlak authored Mar 10, 2021
1 parent 5280127 commit 9230173
Show file tree
Hide file tree
Showing 2 changed files with 222 additions and 0 deletions.
159 changes: 159 additions & 0 deletions src/TriGParserIterator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
<?php

declare(strict_types=1);

namespace pietercolpaert\hardf;

/**
* TrigParser wrapper turning it into a triple/quad generator.
*
* Parses the input in chunks and reads the triples in a lazy way which assures
* both speed and low memory footprint.
*
* Can be reused (meaning parse() and parseStream() methods can be run
* multiple times).
*
* Use as follows:
*
* ```
* $parser = new TrigParserIterator();
* foreach ($parser as $quad) {
* ...do something...
* }
* ```
*/
class TriGParserIterator implements \Iterator
{
/**
* Store TriG
*
* @var array
*/
private $options;
private $prefixCallback;
/**
* @var \pietercolpaert\hardf\TriGParser
*/
private $parser;
private $chunkSize;
private $input;
private $triplesBuffer;
private $n;
private $tmpStream;

/**
* Creates a parser object. For documentation of parameters, see the
* \pietercolpaert\hardf\TrigParser constructor documentation.
*
* If you're using this class, you probably don't need the $tripleCallback
* but $prefixCallback can be still useful.
*
* @param array $options
* @param callable $prefixCallback
*/
public function __construct($options = [], $prefixCallback = null)
{
$this->options = $options;
$this->prefixCallback = $prefixCallback;
}

public function __destruct()
{
$this->closeTmpStream();
}

/**
* A thiny wrapper for the parseStream() method turning a string into
* a stream resource.
*/
public function parse(string $input): \Iterator
{
$this->closeTmpStream();
$this->tmpStream = fopen('php://memory', 'r+');
fwrite($this->tmpStream, $input);
rewind($this->tmpStream);

return $this->parseStream($this->tmpStream);
}

/**
* Parses a given input stream using a given chunk size.
*
* @param resource $input
*
* @throws \Exception
*/
public function parseStream($input, int $chunkSize = 8192): \Iterator
{
if (!\is_resource($input)) {
throw new \Exception('Input has to be a resource');
}

$this->input = $input;
$this->chunkSize = $chunkSize;
$this->n = -1;
$this->triplesBuffer = [];
$this->parser = new TriGParser($this->options, null, $this->prefixCallback);

return $this;
}

public function current()
{
return current($this->triplesBuffer);
}

public function key()
{
return $this->n;
}

public function next(): void
{
$el = next($this->triplesBuffer);
if (false === $el) {
$this->triplesBuffer = [];
$this->parser->setTripleCallback(function (?\Exception $e,
?array $quad): void {
if ($e) {
throw $e;
}
if ($quad) {
$this->triplesBuffer[] = $quad;
}
});
while (!feof($this->input) && 0 === \count($this->triplesBuffer)) {
$this->parser->parseChunk(fgets($this->input, $this->chunkSize));
}
if (feof($this->input)) {
$this->parser->end();
}
}
++$this->n;
}

/**
* @throws \Exception
*/
public function rewind(): void
{
$ret = rewind($this->input);
if (true !== $ret) {
throw new \Exception("Can't seek in the input stream");
}
$this->next();
}

public function valid(): bool
{
return false !== current($this->triplesBuffer);
}

private function closeTmpStream(): void
{
if (\is_resource($this->tmpStream)) {
fclose($this->tmpStream);
$this->tmpStream = null;
}
}
}
63 changes: 63 additions & 0 deletions test/TriGParserIteratorTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?php

namespace Tests\hardf;

use PHPUnit\Framework\TestCase;
use pietercolpaert\hardf\TriGParserIterator;

class TriGParserIteratorTest extends TestCase
{
public function testStream(): void
{
$input = fopen('php://memory', 'w');
fwrite($input, <<<IN
<http://foo/bar> <http://bar/baz> "foo baz"@en .
<http://foo/bar> <http://bar/baz> "baz foo"@de .
IN
);
fseek($input, 0);
$parser = new TriGParserIterator();
$iterator = $parser->parseStream($input);
$this->assertInstanceOf(\Iterator::class, $iterator);
$values = iterator_to_array($iterator);
$this->assertCount(2, $values);
fclose($input);
}

public function testString(): void
{
$input = <<<IN
<http://foo/bar> <http://bar/baz> "foo baz"@en .
<http://foo/bar> <http://bar/baz> "baz foo"@de .
IN;
$parser = new TriGParserIterator();
$iterator = $parser->parse($input);
$this->assertInstanceOf(\Iterator::class, $iterator);
$values = iterator_to_array($iterator);
$this->assertCount(2, $values);
}

public function testRepeat(): void
{
$input = <<<IN
<http://foo/bar> <http://bar/baz> "foo baz"@en .
<http://foo/bar> <http://bar/baz> "baz foo"@de .
IN;
$parser = new TriGParserIterator();

$iterator = $parser->parse($input);
$this->assertInstanceOf(\Iterator::class, $iterator);
$values = iterator_to_array($iterator);
$this->assertCount(2, $values);

$input = <<<IN
<http://foo/bar> <http://bar/baz> "foo baz"@en .
<http://foo/bar> <http://bar/baz> "baz foo"@de .
<http://foo/bar> <http://bar/baz> _:genid1 .
IN;
$iterator = $parser->parse($input);
$this->assertInstanceOf(\Iterator::class, $iterator);
$values = iterator_to_array($iterator);
$this->assertCount(3, $values);
}
}

0 comments on commit 9230173

Please sign in to comment.