Skip to content

Commit

Permalink
Add codelist to enum generator (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
AurelienPillevesse authored Oct 15, 2024
1 parent 657c4fa commit 72a41f6
Show file tree
Hide file tree
Showing 8 changed files with 284 additions and 4 deletions.
9 changes: 9 additions & 0 deletions bin/generate-codelists.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env php
<?php

require __DIR__ . '/../vendor/autoload.php'; // Ensure autoload is included

use Tiime\EN16931\Codelist\Generator\CodelistGenerator;

// Call the generateCodelists method
CodelistGenerator::generateCodelists(__DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'tests/EN16931_code_lists_values_v14-used_from_2024-11-15.xlsx');
11 changes: 7 additions & 4 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@
],
"minimum-stability": "stable",
"require": {
"php": "^8.1",
"ext-bcmath": "*"
"php": ">=8.3",
"ext-bcmath": "*",
"ext-iconv": "*"
},
"require-dev": {
"phpunit/phpunit": "^10.0",
"squizlabs/php_codesniffer": "^3.7",
"phpstan/phpstan": "^1.10",
"staabm/annotate-pull-request-from-checkstyle": "^1.8"
"staabm/annotate-pull-request-from-checkstyle": "^1.8",
"phpoffice/phpspreadsheet": "^3.3"
},
"autoload": {
"psr-4": {
Expand All @@ -41,6 +43,7 @@
"scripts": {
"test": "vendor/bin/phpunit tests",
"code_sniffer": "vendor/bin/phpcs -q --report=checkstyle --standard=PSR12 src/",
"phpstan": "vendor/bin/phpstan analyse -l 9 src tests"
"phpstan": "vendor/bin/phpstan analyse -l 9 src tests",
"generate-codelists": "php bin/generate-codelists.php"
}
}
50 changes: 50 additions & 0 deletions src/Codelist/Generator/CodelistGenerator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?php

namespace Tiime\EN16931\Codelist\Generator;

final readonly class CodelistGenerator
{
public static function generateCodelists(
string $fileToRead
): void {
$generator = new Generator();

$countryList = XlsxReader::read(filename: $fileToRead, valueColumn: 'B', nameColumn: 'A', sheetName: 'Country');
$currencyList = XlsxReader::read(filename: $fileToRead, valueColumn: 'B', nameColumn: 'A', sheetName: 'Currency');
$internationalCodeDesignatorList = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: 'ICD');
$code1001List = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: '1001');
$code1153List = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: '1153');
$code2005List = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: 'Time', startLine: 3);
$code2475List = XlsxReader::read(filename: $fileToRead, valueColumn: 'C', nameColumn: 'D', sheetName: 'Time', startLine: 3);
$code4451List = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: 'Text',);
$code4461List = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: 'Payment',);
$code5305List = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: '5305',);
$code5189List = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: 'Allowance',);
$code7143List = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: 'Item',);
$code7161List = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: 'Charge',);
$mimeList = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'A', sheetName: 'MIME',);
$electronicAddressSchemeList = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: 'EAS',);
$vatExemptionReasonCodeList = XlsxReader::read(filename: $fileToRead, valueColumn: 'A', nameColumn: 'B', sheetName: 'VATEX',);
$unitOfMeasure = XlsxReader::read(filename: $fileToRead, valueColumn: 'B', nameColumn: 'C', sheetName: 'Unit',);


// @todo Greece : EL for France Spec
$generator->generateCodelist(className: 'CountryAlpha2Code', cases: $countryList);
$generator->generateCodelist(className: 'CurrencyCodeISO4217', cases: $currencyList);
$generator->generateCodelist(className: 'InternationalCodeDesignator', cases: $internationalCodeDesignatorList);
$generator->generateCodelist(className: 'InvoiceTypeCodeUNTDID1001', cases: $code1001List);
$generator->generateCodelist(className: 'ReferenceQualifierCodeUNTDID1153', cases: $code1153List);
$generator->generateCodelist(className: 'TimeReferencingCodeUNTDID2005', cases: $code2005List);
$generator->generateCodelist(className: 'TimeReferencingCodeUNTDID2475', cases: $code2475List);
$generator->generateCodelist(className: 'TextSubjectCodeUNTDID4451', cases: $code4451List);
$generator->generateCodelist(className: 'PaymentMeansCodeUNTDID4461', cases: $code4461List);
$generator->generateCodelist(className: 'DutyTaxFeeCategoryCodeUNTDID5305', cases: $code5305List);
$generator->generateCodelist(className: 'AllowanceReasonCodeUNTDID5189', cases: $code5189List);
$generator->generateCodelist(className: 'ItemTypeCodeUNTDID7143', cases: $code7143List);
$generator->generateCodelist(className: 'ChargeReasonCodeUNTDID7161', cases: $code7161List);
$generator->generateCodelist(className: 'MimeCode', cases: $mimeList);
$generator->generateCodelist(className: 'ElectronicAddressSchemeCode', cases: $electronicAddressSchemeList);
$generator->generateCodelist(className: 'VatExemptionReasonCodeList', cases: $vatExemptionReasonCodeList);
$generator->generateCodelist(className: 'UnitOfMeasureCode', cases: $unitOfMeasure);
}
}
46 changes: 46 additions & 0 deletions src/Codelist/Generator/Generator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

namespace Tiime\EN16931\Codelist\Generator;

/**
* @internal
*/
final readonly class Generator
{
private const string ENUM_TEMPLATE = <<<'TEMPLATE'
<?php
declare(strict_types=1);
namespace <namespace>;
enum <className> : string
{
<cases>
}
TEMPLATE;

public function generateCodelist(
string $className,
XlsxReaderResult $cases
): void {
$code = [];

/** @var XlsxReaderResultItem $case */
foreach ($cases->getHarmonized() as $case) {
$code[] = (string) $case;
}

$replacements = [
'<cases>' => ' ' . implode("\n ", $code),
'<namespace>' => 'Tiime\EN16931\Codelist',
'<className>' => $className
];

$code = strtr(self::ENUM_TEMPLATE, $replacements);
$code = preg_replace('/^ +$/m', '', $code);
$path = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . $className . '.php';

file_put_contents($path, $code);
}
}
45 changes: 45 additions & 0 deletions src/Codelist/Generator/XlsxReader.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?php

namespace Tiime\EN16931\Codelist\Generator;

use PhpOffice\PhpSpreadsheet\Reader\Xlsx;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;

final readonly class XlsxReader
{
public static function read(
string $filename,
string $valueColumn,
string $nameColumn,
string $sheetName,
int $startLine = 2,
): XlsxReaderResult {
$reader = (new Xlsx())
->setLoadSheetsOnly($sheetName)
->setReadEmptyCells(false)
->setIgnoreRowsWithNoCells(true)
;
$spreadsheet = $reader->load($filename);

$worksheet = $spreadsheet->getSheetByNameOrThrow($sheetName);
$highestRow = $worksheet->getHighestRow();

$result = new XlsxReaderResult();

for ($row = $startLine; $row <= $highestRow; $row++) {
$nameFromCell = $worksheet->getCell($nameColumn . $row)->getValue();
assert(is_string($nameFromCell));
$valueFromCell = $worksheet->getCell($valueColumn . $row)->getValue();
assert(is_string($valueFromCell));
$name = trim($nameFromCell);
$value = trim($valueFromCell);

$result->add(new XlsxReaderResultItem(
name: $name,
value: $value,
));
}

return $result;
}
}
110 changes: 110 additions & 0 deletions src/Codelist/Generator/XlsxReaderResult.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
<?php

namespace Tiime\EN16931\Codelist\Generator;

use ArrayObject;

final class XlsxReaderResult
{
/**
* @var XlsxReaderResultItem[]
*/
private array $result;

public function add(XlsxReaderResultItem $item): void
{
$this->result[] = $item;
}

/**
* @return XlsxReaderResultItem[]
*/
public function get(): array
{
return $this->result;
}

/**
* @return XlsxReaderResultItem[]
*/
public function getHarmonized(): array
{
$nameCount = [];
$harmonizedResult = [];

foreach ($this->result as $item) {
$name = $item->name;

$name = preg_replace('/§\s*27/', '_', $name); // Specific ICD sheet
assert(is_string($name));
$name = preg_replace('/®/', '_', $name);
assert(is_string($name));
$name = preg_replace('/@/', 'A', $name);
assert(is_string($name));

$replacements = [
'' => '_',
'>' => 'GREATER', // Replace > with GREATER
'%' => 'PERCENT', // Replace % with PERCENT
'+' => 'PLUS', // Replace + with PLUS
'°' => 'DEGREE' // Replace ° with DEGREE
];

$name = strtr($name, $replacements);

$name = preg_replace('/[()]/', '', $name); // Remove parentheses
assert(is_string($name));
$name = preg_replace('/\s*\[.*?\]\s*/', '', $name); // Remove brackets and content inside
assert(is_string($name));
$name = preg_replace('/-/', '_', $name); // Replace - with underscores
assert(is_string($name));
$name = preg_replace('/,/', '_', $name); // Replace , with underscores
assert(is_string($name));
$name = preg_replace('/&/', '_', $name); // Replace & with underscores
assert(is_string($name));
$name = preg_replace('/\//', '_', $name); // Replace / with underscores
assert(is_string($name));
$name = preg_replace('/[.\'“”’:=*]/u', '', $name);
assert(is_string($name));

$name = trim($name, '_'); // Ensure the result does not start or end with an underscore

$name = preg_replace('/^(\d)/', '_$1', $name); // Add underscore at the beginning if starting by a number
assert(is_string($name));

// Convert accented characters to non-accented
$name = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $name);
assert(is_string($name));
$name = mb_strtoupper($name);
assert(is_string($name));
$name = preg_replace('/\s+/', '_', $name); // Replace spaces with underscores
assert(is_string($name));
$name = preg_replace('/_+/', '_', $name); // Replace multiple underscores with a single underscore
assert(is_string($name));

// Initialize the count for this name if it doesn't exist
if (!isset($nameCount[$name])) {
$nameCount[$name] = 0;
}

$nameCount[$name]++;

if ($nameCount[$name] > 1) {
$name .= '_' . $this->getSuffix($nameCount[$name]);
}

$harmonizedResult[] = new XlsxReaderResultItem(name: $name, value: $item->value);
}

return $harmonizedResult;
}

private function getSuffix(int $count): string
{
return match ($count) {
2 => 'SECOND',
3 => 'THIRD',
default => $count . 'TH',
};
}
}
17 changes: 17 additions & 0 deletions src/Codelist/Generator/XlsxReaderResultItem.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

namespace Tiime\EN16931\Codelist\Generator;

final readonly class XlsxReaderResultItem
{
public function __construct(
public string $name,
public string $value,
) {
}

public function __toString(): string
{
return sprintf("case %s = '%s';", $this->name, $this->value);
}
}
Binary file not shown.

0 comments on commit 72a41f6

Please sign in to comment.