diff --git a/README.md b/README.md
index 0bc05c3..ae8755b 100644
--- a/README.md
+++ b/README.md
@@ -4,12 +4,14 @@
Extract, Transform and Load data using PHP.
+![ETL](docs/img/etl.svg)
+
## Changelog
See the changelog [here](changelog.MD)
## Installation
In your application's folder, run:
-```
+```shell
composer require wizaplace/php-etl
```
diff --git a/docs/Extractors/Aggregator.md b/docs/Extractors/Aggregator.md
new file mode 100644
index 0000000..0640dbd
--- /dev/null
+++ b/docs/Extractors/Aggregator.md
@@ -0,0 +1,93 @@
+# Aggregator
+
+Merge rows from a list of partial data iterators with a matching index.
+
+```php
+# user data from one CSV file
+$userDataIterator = (new Etl())
+ ->extract(
+ new Csv(),
+ 'user_data.csv',
+ ['columns' => ['id','email', 'name']]
+ )
+ ->toIterator()
+;
+
+# extended info from another source
+$extendedInfoIterator = (new Etl())
+ ->extract(
+ new Table(),
+ 'extended_info',
+ ['columns' => 'courriel', 'twitter']
+ )
+ # let's rename 'courriel' to 'email'
+ ->tranform(
+ new RenameColumns(),
+ [
+ 'columns' => ['courriel' => 'email']
+ ]
+ )
+ ->toIterator()
+;
+
+# merge this two data sources
+$mergedData = (new Etl())
+ ->extract(
+ new Aggregator(),
+ [
+ $userDataIterator,
+ $extendedInfoIterator,
+ ],
+ [
+ 'index' => ['email'], # common matching index
+ 'columns' => ['id','email','name','twitter']
+ ]
+ )
+ ->load(
+ new CsvLoader(),
+ 'completeUserData.csv'
+ )
+ ->run()
+;
+```
+
+## Options
+
+### Index (required)
+
+An array of column names common in all data sources
+
+| Type | Default value |
+|-------|---------------|
+| array | `null` |
+
+```php
+$options = ['index' => ['email']];
+```
+
+### Columns (required)
+
+A `Row` is yield when all specified columns have been found for the matching index.
+
+| Type | Default value |
+|-------|---------------|
+| array | `null` |
+
+```php
+$options = ['columns' => ['id', 'name', 'email']];
+```
+
+### Strict
+
+When all Iterators input are fully consummed, if we have any remaining incomplete rows:
+
+- if *true*: Throw an `IncompleteDataException`
+- if *false*: yield the incomplete remaining `Row` flagged as `incomplete`
+
+| Type | Default value |
+|---------|---------------|
+| boolean | `true` |
+
+```php
+$options = ['strict' => false];
+```
\ No newline at end of file
diff --git a/docs/Extractors/Collection.md b/docs/Extractors/Collection.md
index 035abd4..8b5b76b 100644
--- a/docs/Extractors/Collection.md
+++ b/docs/Extractors/Collection.md
@@ -9,15 +9,15 @@ $etl->extract($collection, $iterable, $options);
> **Tip:** Using generators will decrease memory usage.
-
## Options
### Columns
+
Columns from the iterable item that will be extracted.
-| Type | Default value |
-|----- | ------------- |
-| array | `null` |
+| Type | Default value |
+|-------|---------------|
+| array | `null` |
```php
$options = ['columns' => ['id', 'name', 'email']];
diff --git a/docs/Extractors/Csv.md b/docs/Extractors/Csv.md
index e590897..ee55a5e 100644
--- a/docs/Extractors/Csv.md
+++ b/docs/Extractors/Csv.md
@@ -7,22 +7,24 @@ Extracts data from a character-separated values file.
$etl->extract($csv, 'path/to/file.csv', $options);
```
-
## Options
### Columns
+
Columns that will be extracted. If `null`, all columns will be extracted and the first line will be used as the columns names.
-| Type | Default value |
-|----- | ------------- |
-| array | `null` |
+| Type | Default value |
+|-------|---------------|
+| array | `null` |
To select which columns will be extracted, use an array with the columns list:
+
```php
$options = ['columns' => ['id', 'name', 'email']];
```
To rename the columns, use an associative array where the `key` is the name of the column in the file and the `value` is the name that will be used in the etl process:
+
```php
$options = ['columns' => [
'id' => 'id',
@@ -32,6 +34,7 @@ $options = ['columns' => [
```
If your file does not contains the columns names, you may set the name and the index of the columns to extract starting at 1:
+
```php
$options = ['columns' => [
'id' => 1,
@@ -41,35 +44,38 @@ $options = ['columns' => [
```
### Delimiter
+
Field delimiter (one character only).
-| Type | Default value |
-|----- | ------------- |
-| string | , |
+| Type | Default value |
+|--------|---------------|
+| string | , |
```php
$options = ['delimiter' => ';'];
```
### Enclosure
+
Field enclosure character (one character only).
-| Type | Default value |
-|----- | ------------- |
-| string | |
+| Type | Default value |
+|--------|---------------|
+| string | |
```php
$options = ['enclosure' => '"'];
```
### Throw error
+
If the extractor need to throw an exception if it
encounters any input issue during the data processing. Default value
is set to false to keep backward compatibility.
-| Type | Default value |
-|----- | ------------- |
-| boolean | false |
+| Type | Default value |
+|---------|---------------|
+| boolean | false |
```php
$options = ['throwError' => '"'];
diff --git a/docs/Extractors/FixedWidth.md b/docs/Extractors/FixedWidth.md
index eef7c1a..74658f1 100644
--- a/docs/Extractors/FixedWidth.md
+++ b/docs/Extractors/FixedWidth.md
@@ -7,17 +7,18 @@ Extracts data from a text file with fields delimited by a fixed number of charac
$etl->extract($fixedWidth, 'path/to/file.txt', $options);
```
-
## Options
### Columns (required)
+
Columns that will be extracted.
-| Type | Default value |
-|----- | ------------- |
-| array | `null` |
+| Type | Default value |
+|-------|---------------|
+| array | `null` |
Associative array where the `key` is the name of the column and the `value` is an array containing the start position and the length of the column;
+
```php
$options = ['columns' => [
'id' => [0, 5], // Start position is 0 and length is 5.
diff --git a/docs/Extractors/Json.md b/docs/Extractors/Json.md
index e08caa9..31b3e2a 100644
--- a/docs/Extractors/Json.md
+++ b/docs/Extractors/Json.md
@@ -7,17 +7,18 @@ Extracts data from a JavaScript Object Notation file.
$etl->extract($json, 'path/to/file.json', $options);
```
-
## Options
### Columns
+
Columns that will be extracted. If `null`, the first level key/value pairs of the object in each iteration will be used.
-| Type | Default value |
-|----- | ------------- |
-| array | `null` |
+| Type | Default value |
+|-------|---------------|
+| array | `null` |
For more control over the columns, you may use JSON path:
+
```php
$options = ['columns' => [
'id' => '$..bindings[*].id.value',
diff --git a/docs/Extractors/Query.md b/docs/Extractors/Query.md
index 899765d..25402f0 100644
--- a/docs/Extractors/Query.md
+++ b/docs/Extractors/Query.md
@@ -7,10 +7,10 @@ Extracts data from a database table using a custom SQL query.
$etl->extract($query, 'select * from users', $options);
```
-
## Options
### Connection
+
Name of the database connection to use.
| Type | Default value |
@@ -22,6 +22,7 @@ $options = ['connection' => 'app'];
```
### Bindings
+
Values to bind to the query statement.
| Type | Default value |
@@ -29,11 +30,13 @@ Values to bind to the query statement.
| array | `[]` |
Using prepared statement with named placeholders `select * from users where status = :status`:
+
```php
$options = ['bindings' => ['status' => 'active']];
```
Using prepared statement with question mark placeholders `select * from users where status = ?`:
+
```php
$options = ['bindings' => ['active']];
```
diff --git a/docs/Extractors/README.md b/docs/Extractors/README.md
index a4f176b..c97da69 100644
--- a/docs/Extractors/README.md
+++ b/docs/Extractors/README.md
@@ -7,9 +7,9 @@ Extractors are the entry point of any process. To start a process, you must set
$etl->extract($type, $source, $options);
```
-
## Available extractors types
+* [Aggregator](Aggregator.md)
* [Collection](Collection.md)
* [CSV](Csv.md)
* [Fixed Width](FixedWidth.md)
diff --git a/docs/Extractors/Table.md b/docs/Extractors/Table.md
index 23b85c6..ecbee18 100644
--- a/docs/Extractors/Table.md
+++ b/docs/Extractors/Table.md
@@ -7,38 +7,41 @@ Extracts data from a database table.
$etl->extract($table, 'table_name', $options);
```
-
## Options
### Columns
+
Columns that will be extracted. If `null`, all columns of the table will be extracted.
-| Type | Default value |
-|----- | ------------- |
-| array | `null` |
+| Type | Default value |
+|-------|---------------|
+| array | `null` |
To select which columns will be extracted, use an array with the columns list:
+
```php
$options = ['columns' => ['id', 'name', 'email']];
```
### Connection
+
Name of the database connection to use.
-| Type | Default value |
-|----- | ------------- |
-| string | default |
+| Type | Default value |
+|--------|---------------|
+| string | default |
```php
$options = ['connection' => 'app'];
```
### Where
+
Array of conditions, where `key` equals `value`. If you need more flexibility in the the query creation, you may use the [Query extractor](Query.md).
-| Type | Default value |
-|----- | ------------- |
-| array | `[]` |
+| Type | Default value |
+|-------|---------------|
+| array | `[]` |
```php
$options = ['where' => ['status' => 'active']];
diff --git a/docs/Extractors/Xml.md b/docs/Extractors/Xml.md
index f90e805..bf0756d 100644
--- a/docs/Extractors/Xml.md
+++ b/docs/Extractors/Xml.md
@@ -7,33 +7,38 @@ Extracts data from an XML file.
$etl->extract($xml, 'path/to/file.xml', $options);
```
-
## Options
### Columns
+
Columns that will be extracted. If `null`, all tags and attributes within the loop path will be extracted.
-| Type | Default value |
-|----- | ------------- |
-| array | `null` |
+| Type | Default value |
+|-------| ------------- |
+| array | `null` |
To select which columns will be extracted, use the path (without the loop path) of the value. Use `@` to select attributes:
+
```php
-$options = ['columns' => [
- 'id' => '/@id',
- 'name' => '/profile/name',
- 'email' => '/profile/email',
-]];
+$options = [
+ 'columns' => [
+ 'id' => '/@id',
+ 'name' => '/profile/name',
+ 'email' => '/profile/email',
+ ]
+];
```
### Loop
+
The path to loop through.
-| Type | Default value |
-|----- | ------------- |
-| string | / |
+| Type | Default value |
+|--------|---------------|
+| string | / |
To select which columns will be extracted, use the path (without the loop path) of the value. Use `@` to select attributes:
+
```php
$options = ['loop' => '/users/user'];
```
diff --git a/docs/README.md b/docs/README.md
index 8099941..e10aa7c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,6 +1,7 @@
* [Introduction](Introduction.md)
* [Getting Started](GettingStarted.md)
* [Extractors](Extractors/README.md)
+ * [Aggregator](Extractors/Aggregator.md)
* [Collection](Extractors/Collection.md)
* [CSV](Extractors/Csv.md)
* [Fixed Width](Extractors/FixedWidth.md)
diff --git a/docs/RunningProcesses.md b/docs/RunningProcesses.md
index c62a8cc..8010096 100644
--- a/docs/RunningProcesses.md
+++ b/docs/RunningProcesses.md
@@ -9,6 +9,14 @@ $etl->extract(/* ... */)
->run();
```
+To return the resulting data as an iterator (ex.: [Chaining ETL's usecase](../tests/UseCases/ChainingTest.php)), you may use the `toIterator` method:
+
+```php
+$iterator = $etl->extract(/* ... */)
+ ->transform(/* ... */)
+ ->toIterator();
+```
+
To run the process and return the resulting data as an array, you may use the `toArray` method:
```php
@@ -16,4 +24,4 @@ $data = $etl->extract(/* ... */)
->transform(/* ... */)
->load(/* ... */)
->toArray();
-```
+```
\ No newline at end of file
diff --git a/docs/img/etl.svg b/docs/img/etl.svg
new file mode 100644
index 0000000..53524f1
--- /dev/null
+++ b/docs/img/etl.svg
@@ -0,0 +1,1203 @@
+
+
+
+
diff --git a/src/Etl.php b/src/Etl.php
index 4a9dce5..f5c1dfc 100644
--- a/src/Etl.php
+++ b/src/Etl.php
@@ -113,7 +113,7 @@ public function toArray(): array
}
/**
- * Consume the pipeline as a Generator
+ * Consume the pipeline as a Generator.
*
* @return \Generator
*/
diff --git a/src/Exception/IncompleteDataException.php b/src/Exception/IncompleteDataException.php
new file mode 100644
index 0000000..575007d
--- /dev/null
+++ b/src/Exception/IncompleteDataException.php
@@ -0,0 +1,15 @@
+
+ * @copyright Copyright (c) Wizacha
+ * @license MIT
+ */
+
+namespace Wizaplace\Etl\Exception;
+
+class IncompleteDataException extends \Exception
+{
+}
diff --git a/src/Exception/InvalidOptionException.php b/src/Exception/InvalidOptionException.php
new file mode 100644
index 0000000..17020a4
--- /dev/null
+++ b/src/Exception/InvalidOptionException.php
@@ -0,0 +1,15 @@
+
+ * @copyright Copyright (c) Wizacha
+ * @license MIT
+ */
+
+namespace Wizaplace\Etl\Exception;
+
+class InvalidOptionException extends \Exception
+{
+}
diff --git a/src/Exception/UndefinedIndexException.php b/src/Exception/UndefinedIndexException.php
new file mode 100644
index 0000000..49f0ee5
--- /dev/null
+++ b/src/Exception/UndefinedIndexException.php
@@ -0,0 +1,15 @@
+
+ * @copyright Copyright (c) Wizacha
+ * @license MIT
+ */
+
+namespace Wizaplace\Etl\Exception;
+
+class UndefinedIndexException extends \Exception
+{
+}
diff --git a/src/Extractors/Aggregator.php b/src/Extractors/Aggregator.php
new file mode 100644
index 0000000..a467c86
--- /dev/null
+++ b/src/Extractors/Aggregator.php
@@ -0,0 +1,183 @@
+
+ * @copyright Copyright (c) Wizacha
+ * @license MIT
+ */
+
+namespace Wizaplace\Etl\Extractors;
+
+use Wizaplace\Etl\DirtyRow;
+use Wizaplace\Etl\Exception\IncompleteDataException;
+use Wizaplace\Etl\Exception\InvalidOptionException;
+use Wizaplace\Etl\Exception\UndefinedIndexException;
+use Wizaplace\Etl\Row;
+
+class Aggregator extends Extractor
+{
+ /**
+ * The matching key tuplet between iterators.
+ *
+ * @var string[]
+ */
+ protected $index;
+
+ /**
+ * Columns.
+ *
+ * @var string[]
+ */
+ protected $columns;
+
+ /**
+ * If set to true,
+ * will throw a MissingDataException if there is any incomplete rows remaining
+ * when all input iterators are fully consumed and closed.
+ *
+ * @var bool
+ */
+ protected $strict = true;
+
+ /** @var array[] */
+ protected $data;
+
+ /**
+ * Properties that can be set via the options method.
+ *
+ * @var array
+ */
+ protected $availableOptions = [
+ 'index',
+ 'columns',
+ 'strict',
+ ];
+
+ /**
+ * Properties that MUST be set via the options method.
+ *
+ * @var array
+ */
+ protected $requiredOptions = [
+ 'index',
+ 'columns',
+ ];
+
+ /**
+ * @return \Generator
+ *
+ * @throws IncompleteDataException
+ */
+ public function extract(): \Generator
+ {
+ // consume input iterators
+ do {
+ foreach ($this->input as $iterator) {
+ if (
+ ($line = $iterator->current())
+ && ($row = $this->build($line))
+ ) {
+ yield new Row($row);
+ }
+ $iterator->next();
+ }
+ } while (
+ $this->hasValidInput()
+ );
+
+ $incompletes = \count($this->data);
+ if ($this->strict && $incompletes) {
+ throw new IncompleteDataException("$incompletes rows");
+ }
+
+ // then yield the incomplete remaining rows
+ foreach ($this->data as $row) {
+ yield (new Row($row))->setIncomplete();
+ }
+ }
+
+ /**
+ * Accumulate row data and return when completed.
+ *
+ * @param mixed[] $line
+ *
+ * @return mixed[]
+ */
+ protected function build(array $line): ?array
+ {
+ try {
+ $hash = $this->lineHash($line);
+ } catch (UndefinedIndexException $exception) {
+ return null;
+ }
+
+ $this->data[$hash] = \array_merge(
+ $this->data[$hash] ?? [],
+ $line
+ );
+
+ if ($this->isCompleted($hash)) {
+ $row = $this->data[$hash];
+ unset($this->data[$hash]); // free the RAM
+
+ return $row;
+ }
+
+ return null;
+ }
+
+ /**
+ * Check if row is completed.
+ */
+ protected function isCompleted(string $hash): bool
+ {
+ if (!\is_array($this->columns)) {
+ throw new InvalidOptionException('invalid columns', 2);
+ }
+
+ foreach ($this->columns as $key) {
+ if (!\array_key_exists($key, $this->data[$hash])) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Check if there is any opened iterators left.
+ */
+ protected function hasValidInput(): bool
+ {
+ foreach ($this->input as $iterator) {
+ if ($iterator->valid()) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * calculate row hash key from specified index array.
+ */
+ protected function lineHash(array $line): string
+ {
+ if (!\is_array($this->index)) {
+ throw new InvalidOptionException('Invalid index', 1);
+ }
+
+ return \json_encode(
+ \array_map(
+ function (string $key) use ($line) {
+ if (!\array_key_exists($key, $line)) {
+ throw new UndefinedIndexException();
+ }
+
+ return $line[$key];
+ },
+ $this->index
+ )
+ );
+ }
+}
diff --git a/src/Row.php b/src/Row.php
index 17cb5be..04c832b 100644
--- a/src/Row.php
+++ b/src/Row.php
@@ -27,6 +27,13 @@ class Row implements \ArrayAccess
*/
protected $discarded = false;
+ /**
+ * Flag the row as incomplete
+ *
+ * @var bool
+ */
+ protected $incomplete = false;
+
/**
* Create a new Row instance.
*
@@ -116,6 +123,24 @@ public function discarded(): bool
return $this->discarded;
}
+ /**
+ * Set the row dirty
+ */
+ public function setIncomplete(): self
+ {
+ $this->incomplete = true;
+
+ return $this;
+ }
+
+ /**
+ * Check if the is dirty
+ */
+ public function isIncomplete(): bool
+ {
+ return $this->incomplete;
+ }
+
/**
* Dynamically retrieve attributes on the row.
*
diff --git a/tests/Extractors/AggregatorTest.php b/tests/Extractors/AggregatorTest.php
new file mode 100644
index 0000000..e89b35c
--- /dev/null
+++ b/tests/Extractors/AggregatorTest.php
@@ -0,0 +1,208 @@
+
+ * @copyright Copyright (c) Wizacha
+ * @copyright Copyright (c) Leonardo Marquine
+ * @license MIT
+ */
+
+namespace Tests\Extractors;
+
+use Tests\TestCase;
+use Wizaplace\Etl\DirtyRow;
+use Wizaplace\Etl\Exception\IncompleteDataException;
+use Wizaplace\Etl\Exception\InvalidOptionException;
+use Wizaplace\Etl\Extractors\Aggregator;
+use Wizaplace\Etl\Row;
+
+class AggregatorTest extends TestCase
+{
+ /**
+ * @test
+ *
+ * @dataProvider invalidOptionsProvider
+ **/
+ public function invalid_index_options($invalidOptions, $exceptionCode)
+ {
+ $extractor = new Aggregator();
+
+ $extractor
+ ->input(
+ $this->iteratorsProvider()[0][0] // 👀️
+ )
+ ->options(
+ array_merge(
+ $invalidOptions,
+ ['strict' => false]
+ )
+ )
+ ;
+
+ $this->expectException(InvalidOptionException::class);
+ $this->expectExceptionCode($exceptionCode);
+ iterator_to_array($extractor->extract());
+ }
+
+ /**
+ * @test
+ *
+ * @dataProvider iteratorsProvider
+ **/
+ public function strict_index_matching($iterators)
+ {
+ $extractor = new Aggregator();
+
+ $extractor
+ ->input($iterators)
+ ->options(
+ [
+ 'index' => ['email'],
+ 'columns' => ['name', 'twitter'],
+ 'strict' => true,
+ ]
+ );
+
+ $this->expectException(IncompleteDataException::class);
+ iterator_to_array($extractor->extract());
+ }
+
+ /**
+ * @test
+ *
+ * @dataProvider iteratorsProvider
+ **/
+ public function unstrict_index_matching($iterators)
+ {
+ $extractor = new Aggregator();
+
+ $extractor
+ ->input($iterators)
+ ->options(
+ [
+ 'index' => ['email'],
+ 'columns' => ['name', 'twitter'],
+ 'strict' => false,
+ ]
+ );
+
+ $actual = iterator_to_array($extractor->extract());
+ $expected = [
+ new Row([
+ 'id' => 1,
+ 'name' => 'John Doe',
+ 'email' => 'johndoe@email.com',
+ 'twitter' => '@john',
+ ]),
+ new Row([
+ 'id' => 2,
+ 'name' => 'Jane Doe',
+ 'email' => 'janedoe@email.com',
+ 'twitter' => '@jane',
+ ]),
+ (
+ new Row([
+ 'id' => 3,
+ 'name' => 'Incomplete',
+ 'email' => 'incomplete@dirtydata',
+ ])
+ )
+ ->setIncomplete(),
+ ];
+ static::assertEquals($expected, $actual);
+ }
+
+ /**
+ * @test
+ */
+ public function big_shuffled_data_set()
+ {
+ $expected = 10 ** 4;
+
+ $iterator = function (string $key, string $template) use ($expected): \Generator {
+ $ids = range(1, $expected);
+ shuffle($ids);
+ foreach ($ids as $id) {
+ yield [
+ 'id' => $id,
+ "useseless_$id" => 'nevermind',
+ $key => sprintf($template, $id),
+ ];
+ }
+ };
+
+ $extractor = new Aggregator();
+
+ $extractor
+ ->input(
+ [
+ $iterator('email', 'user_%s@email.com'),
+ $iterator('name', 'name_%s'),
+ $iterator('info', 'info_%s'),
+ $iterator('stuff', 'stuff_%s'),
+ ]
+ )
+ ->options(
+ [
+ 'index' => ['id'],
+ 'columns' => ['email', 'name', 'info', 'stuff'],
+ ]
+ );
+
+ $actual = 0;
+ foreach ($extractor->extract() as $row) {
+ ++$actual;
+ }
+ static::assertEquals($expected, $actual);
+ }
+
+ public function invalidOptionsProvider()
+ {
+ return [
+ 'invalid index' => [
+ [
+ 'columns' => ['name', 'id'],
+ ],
+ 'error_code' => 1,
+ ],
+ 'invalid columns' => [
+ [
+ 'index' => ['email'],
+ ],
+ 'error_code' => 2,
+ ],
+ ];
+ }
+
+ public function iteratorsProvider(): array
+ {
+ $simpleDataSet =
+ [
+ [
+ $this->arrayToIterator([
+ ['id' => 1, 'name' => 'John Doe', 'email' => 'johndoe@email.com'],
+ ['impossible error'], // should not happen
+ ['id' => 2, 'name' => 'Jane Doe', 'email' => 'janedoe@email.com'],
+ ['id' => 3, 'name' => 'Incomplete', 'email' => 'incomplete@dirtydata'],
+ ]),
+ $this->arrayToIterator([
+ ['email' => 'janedoe@email.com', 'twitter' => '@jane'],
+ ['email' => 'johndoe@email.com', 'twitter' => '@john'],
+ ['impossible error'], // should not happen as well
+ ]),
+ ],
+ ]
+ ;
+
+ return [$simpleDataSet];
+ }
+
+ public function arrayToIterator(array $lines): \Iterator
+ {
+ foreach ($lines as $line) {
+ yield $line;
+ }
+ }
+}
diff --git a/tests/Usecases/ChainingTest.php b/tests/Usecases/ChainingTest.php
new file mode 100644
index 0000000..65c8f23
--- /dev/null
+++ b/tests/Usecases/ChainingTest.php
@@ -0,0 +1,118 @@
+
+ * @copyright Copyright (c) Wizacha
+ * @copyright Copyright (c) Leonardo Marquine
+ * @license MIT
+ */
+
+namespace Tests\Usecases;
+
+use PHPUnit\Framework\TestCase;
+use Wizaplace\Etl\Etl;
+use Wizaplace\Etl\Extractors\Aggregator;
+use Wizaplace\Etl\Extractors\Csv;
+use Wizaplace\Etl\Transformers\ConvertCase;
+use Wizaplace\Etl\Transformers\RenameColumns;
+
+class ChainingTest extends TestCase
+{
+ /**
+ * Merging data from two different source
+ * using a common matching column data
+ *
+ * @test
+ */
+ public function merging_iterators_chaining()
+ {
+ // lazy get users
+ $usersIterator = (new Etl())
+ ->extract(
+ new Csv(),
+ __DIR__ . '/data/users.csv',
+ ['delimiter' => ';']
+ )
+ ->toIterator();
+
+ // lazy get extended user info
+ $infosIterator = (new Etl())
+ ->extract(
+ new Csv(),
+ __DIR__ . '/data/infos.csv',
+ ['delimiter' => ';']
+ )
+ ->transform(
+ new RenameColumns(),
+ [
+ 'columns' => [
+ 'courriel' => 'email'
+ ]
+ ]
+ )
+ ->transform(
+ new ConvertCase(),
+ [
+ 'columns' => ['email'],
+ 'mode' => 'lower',
+ ]
+ )
+ ->toIterator();
+
+ // and finally lazy merge these iterators data
+ $usersInfosIterator = (new Etl())
+ ->extract(
+ new Aggregator(),
+ [
+ $usersIterator,
+ $infosIterator,
+ ],
+ [
+ 'index' => ['email'],
+ 'columns' => [
+ 'id',
+ 'email',
+ 'name',
+ 'age'
+ ],
+ 'strict' => false
+ ]
+ )
+ ->toIterator();
+
+ $expected = [
+ [
+ 'id' => '1',
+ 'name' => 'John Doe',
+ 'email' => 'johndoe@email.com',
+ 'age' => '42',
+ ],
+ [
+ 'id' => '2',
+ 'name' => 'Jane Doe',
+ 'email' => 'janedoe@email.com',
+ 'age' => '39',
+ ],
+ [
+ 'id' => '3',
+ 'name' => 'Hello World',
+ 'email' => 'hello@world.com',
+ ],
+ [
+ 'age' => '1000',
+ 'email' => 'glinglin@email.com',
+ ]
+ ];
+
+ $actual = iterator_to_array(
+ $usersInfosIterator
+ );
+
+ static::assertSame(
+ $expected,
+ $actual
+ );
+ }
+}
diff --git a/tests/Usecases/data/infos.csv b/tests/Usecases/data/infos.csv
new file mode 100644
index 0000000..dfdee69
--- /dev/null
+++ b/tests/Usecases/data/infos.csv
@@ -0,0 +1,4 @@
+"courriel";"age"
+"JOHNDOE@EMAIL.COM";42
+"janedoe@email.com";39
+"glinglin@email.com";1000
\ No newline at end of file
diff --git a/tests/Usecases/data/users.csv b/tests/Usecases/data/users.csv
new file mode 100644
index 0000000..83a408f
--- /dev/null
+++ b/tests/Usecases/data/users.csv
@@ -0,0 +1,4 @@
+"id";"name";"email"
+1;"John Doe";"johndoe@email.com"
+2;"Jane Doe";"janedoe@email.com"
+3;"Hello World";"hello@world.com"
\ No newline at end of file