diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..22d0d82 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +vendor diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..cb50b48 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,4 @@ +# v1.0.0 + +- Fork from https://github.com/CodeRevolutionPlugins/GPT-3-Encoder-PHP +- Use Object structuration diff --git a/README.md b/README.md index a0eebaa..3c7c172 100644 --- a/README.md +++ b/README.md @@ -2,27 +2,16 @@ PHP BPE Text Encoder for GPT-2 / GPT-3 ## About -GPT-2 and GPT-3 use byte pair encoding to turn text into a series of integers to feed into the model. This is a PHP implementation of OpenAI's original python encoder which can be found [here](https://github.com/openai/gpt-2). The main source of inspiration for writing this encoder was the NodeJS version of this encoder, found [here](https://github.com/latitudegames/GPT-3-Encoder). - -You can test the results, by comparing the output generated by this script, with the [official tokenizer page from OpenAI](https://beta.openai.com/tokenizer). - -This specific encoder is used in one of my [WordPress plugins](https://coderevolution.ro), to count the number of tokens a string will use when sent to OpenAI API. - +Just a copy of https://github.com/CodeRevolutionPlugins/GPT-3-Encoder-PHP to fit our usage ## Usage The mbstring PHP extension is needed for this tool to work correctly (in case non-ASCII characters are present in the tokenized text): [details here on how to install mbstring](https://www.php.net/manual/en/mbstring.installation.php) - +PHP 8.1 is needed too; ```php - -$prompt = "Many words map to one token, but some don't: indivisible. Unicode characters like emojis may be split into many tokens containing the underlying bytes: 🤚🏾 Sequences of characters commonly found next to each other may be grouped together: 1234567890"; - -$token_array = gpt_encode($prompt); - +use Semji\GPT3Tokenizer\Encoder; +$prompt = "Many words map"; +$encoder = new Encoder(); +$encoder->encode($prompt); ``` - - -## TODO - -Create also a decoder for the package, currently only an encoder is implemented. diff --git a/composer.json b/composer.json index 4b07dba..1d77afc 100644 --- a/composer.json +++ b/composer.json @@ -1,5 +1,5 @@ { - "name": "coderevolutionplugins/gpt-3-encoder-php", + "name": "semji/gpt-3-encoder-php", "description": "PHP BPE Text Encoder for GPT-2 / GPT-3", "type": "library", "license": "MIT", @@ -9,6 +9,17 @@ "email": "support@coderevolution.ro" } ], + "autoload": { + "psr-4": { + "Semji\\GPT3Tokenizer\\": "src" + } + }, "minimum-stability": "stable", - "require": {} + "require": { + "php": "^8.1", + "ext-mbstring": "*" + }, + "require-dev": { + "phpunit/phpunit": "^9.5" + } } diff --git a/composer.lock b/composer.lock new file mode 100644 index 0000000..f8bd5e3 --- /dev/null +++ b/composer.lock @@ -0,0 +1,1752 @@ +{ + "_readme": [ + "This file locks the dependencies of your project to a known state", + "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", + "This file is @generated automatically" + ], + "content-hash": "1f36f363cc2ecdd96fb8671ad17263cc", + "packages": [], + "packages-dev": [ + { + "name": "doctrine/instantiator", + "version": "2.0.0", + "source": { + "type": "git", + "url": "https://github.com/doctrine/instantiator.git", + "reference": "c6222283fa3f4ac679f8b9ced9a4e23f163e80d0" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/doctrine/instantiator/zipball/c6222283fa3f4ac679f8b9ced9a4e23f163e80d0", + "reference": "c6222283fa3f4ac679f8b9ced9a4e23f163e80d0", + "shasum": "" + }, + "require": { + "php": "^8.1" + }, + "require-dev": { + "doctrine/coding-standard": "^11", + "ext-pdo": "*", + "ext-phar": "*", + "phpbench/phpbench": "^1.2", + "phpstan/phpstan": "^1.9.4", + "phpstan/phpstan-phpunit": "^1.3", + "phpunit/phpunit": "^9.5.27", + "vimeo/psalm": "^5.4" + }, + "type": "library", + "autoload": { + "psr-4": { + "Doctrine\\Instantiator\\": "src/Doctrine/Instantiator/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Marco Pivetta", + "email": "ocramius@gmail.com", + "homepage": "https://ocramius.github.io/" + } + ], + "description": "A small, lightweight utility to instantiate objects in PHP without invoking their constructors", + "homepage": "https://www.doctrine-project.org/projects/instantiator.html", + "keywords": [ + "constructor", + "instantiate" + ], + "support": { + "issues": "https://github.com/doctrine/instantiator/issues", + "source": "https://github.com/doctrine/instantiator/tree/2.0.0" + }, + "funding": [ + { + "url": "https://www.doctrine-project.org/sponsorship.html", + "type": "custom" + }, + { + "url": "https://www.patreon.com/phpdoctrine", + "type": "patreon" + }, + { + "url": "https://tidelift.com/funding/github/packagist/doctrine%2Finstantiator", + "type": "tidelift" + } + ], + "time": "2022-12-30T00:23:10+00:00" + }, + { + "name": "myclabs/deep-copy", + "version": "1.11.0", + "source": { + "type": "git", + "url": "https://github.com/myclabs/DeepCopy.git", + "reference": "14daed4296fae74d9e3201d2c4925d1acb7aa614" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/14daed4296fae74d9e3201d2c4925d1acb7aa614", + "reference": "14daed4296fae74d9e3201d2c4925d1acb7aa614", + "shasum": "" + }, + "require": { + "php": "^7.1 || ^8.0" + }, + "conflict": { + "doctrine/collections": "<1.6.8", + "doctrine/common": "<2.13.3 || >=3,<3.2.2" + }, + "require-dev": { + "doctrine/collections": "^1.6.8", + "doctrine/common": "^2.13.3 || ^3.2.2", + "phpunit/phpunit": "^7.5.20 || ^8.5.23 || ^9.5.13" + }, + "type": "library", + "autoload": { + "files": [ + "src/DeepCopy/deep_copy.php" + ], + "psr-4": { + "DeepCopy\\": "src/DeepCopy/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "Create deep copies (clones) of your objects", + "keywords": [ + "clone", + "copy", + "duplicate", + "object", + "object graph" + ], + "support": { + "issues": "https://github.com/myclabs/DeepCopy/issues", + "source": "https://github.com/myclabs/DeepCopy/tree/1.11.0" + }, + "funding": [ + { + "url": "https://tidelift.com/funding/github/packagist/myclabs/deep-copy", + "type": "tidelift" + } + ], + "time": "2022-03-03T13:19:32+00:00" + }, + { + "name": "nikic/php-parser", + "version": "v4.15.3", + "source": { + "type": "git", + "url": "https://github.com/nikic/PHP-Parser.git", + "reference": "570e980a201d8ed0236b0a62ddf2c9cbb2034039" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/nikic/PHP-Parser/zipball/570e980a201d8ed0236b0a62ddf2c9cbb2034039", + "reference": "570e980a201d8ed0236b0a62ddf2c9cbb2034039", + "shasum": "" + }, + "require": { + "ext-tokenizer": "*", + "php": ">=7.0" + }, + "require-dev": { + "ircmaxell/php-yacc": "^0.0.7", + "phpunit/phpunit": "^6.5 || ^7.0 || ^8.0 || ^9.0" + }, + "bin": [ + "bin/php-parse" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.9-dev" + } + }, + "autoload": { + "psr-4": { + "PhpParser\\": "lib/PhpParser" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Nikita Popov" + } + ], + "description": "A PHP parser written in PHP", + "keywords": [ + "parser", + "php" + ], + "support": { + "issues": "https://github.com/nikic/PHP-Parser/issues", + "source": "https://github.com/nikic/PHP-Parser/tree/v4.15.3" + }, + "time": "2023-01-16T22:05:37+00:00" + }, + { + "name": "phar-io/manifest", + "version": "2.0.3", + "source": { + "type": "git", + "url": "https://github.com/phar-io/manifest.git", + "reference": "97803eca37d319dfa7826cc2437fc020857acb53" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phar-io/manifest/zipball/97803eca37d319dfa7826cc2437fc020857acb53", + "reference": "97803eca37d319dfa7826cc2437fc020857acb53", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-phar": "*", + "ext-xmlwriter": "*", + "phar-io/version": "^3.0.1", + "php": "^7.2 || ^8.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.0.x-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Arne Blankerts", + "email": "arne@blankerts.de", + "role": "Developer" + }, + { + "name": "Sebastian Heuer", + "email": "sebastian@phpeople.de", + "role": "Developer" + }, + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "Developer" + } + ], + "description": "Component for reading phar.io manifest information from a PHP Archive (PHAR)", + "support": { + "issues": "https://github.com/phar-io/manifest/issues", + "source": "https://github.com/phar-io/manifest/tree/2.0.3" + }, + "time": "2021-07-20T11:28:43+00:00" + }, + { + "name": "phar-io/version", + "version": "3.2.1", + "source": { + "type": "git", + "url": "https://github.com/phar-io/version.git", + "reference": "4f7fd7836c6f332bb2933569e566a0d6c4cbed74" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phar-io/version/zipball/4f7fd7836c6f332bb2933569e566a0d6c4cbed74", + "reference": "4f7fd7836c6f332bb2933569e566a0d6c4cbed74", + "shasum": "" + }, + "require": { + "php": "^7.2 || ^8.0" + }, + "type": "library", + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Arne Blankerts", + "email": "arne@blankerts.de", + "role": "Developer" + }, + { + "name": "Sebastian Heuer", + "email": "sebastian@phpeople.de", + "role": "Developer" + }, + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "Developer" + } + ], + "description": "Library for handling version information and constraints", + "support": { + "issues": "https://github.com/phar-io/version/issues", + "source": "https://github.com/phar-io/version/tree/3.2.1" + }, + "time": "2022-02-21T01:04:05+00:00" + }, + { + "name": "phpunit/php-code-coverage", + "version": "9.2.24", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/php-code-coverage.git", + "reference": "2cf940ebc6355a9d430462811b5aaa308b174bed" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/2cf940ebc6355a9d430462811b5aaa308b174bed", + "reference": "2cf940ebc6355a9d430462811b5aaa308b174bed", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-libxml": "*", + "ext-xmlwriter": "*", + "nikic/php-parser": "^4.14", + "php": ">=7.3", + "phpunit/php-file-iterator": "^3.0.3", + "phpunit/php-text-template": "^2.0.2", + "sebastian/code-unit-reverse-lookup": "^2.0.2", + "sebastian/complexity": "^2.0", + "sebastian/environment": "^5.1.2", + "sebastian/lines-of-code": "^1.0.3", + "sebastian/version": "^3.0.1", + "theseer/tokenizer": "^1.2.0" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "suggest": { + "ext-pcov": "*", + "ext-xdebug": "*" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "9.2-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Library that provides collection, processing, and rendering functionality for PHP code coverage information.", + "homepage": "https://github.com/sebastianbergmann/php-code-coverage", + "keywords": [ + "coverage", + "testing", + "xunit" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/php-code-coverage/issues", + "source": "https://github.com/sebastianbergmann/php-code-coverage/tree/9.2.24" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2023-01-26T08:26:55+00:00" + }, + { + "name": "phpunit/php-file-iterator", + "version": "3.0.6", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/php-file-iterator.git", + "reference": "cf1c2e7c203ac650e352f4cc675a7021e7d1b3cf" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/php-file-iterator/zipball/cf1c2e7c203ac650e352f4cc675a7021e7d1b3cf", + "reference": "cf1c2e7c203ac650e352f4cc675a7021e7d1b3cf", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "FilterIterator implementation that filters files based on a list of suffixes.", + "homepage": "https://github.com/sebastianbergmann/php-file-iterator/", + "keywords": [ + "filesystem", + "iterator" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/php-file-iterator/issues", + "source": "https://github.com/sebastianbergmann/php-file-iterator/tree/3.0.6" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2021-12-02T12:48:52+00:00" + }, + { + "name": "phpunit/php-invoker", + "version": "3.1.1", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/php-invoker.git", + "reference": "5a10147d0aaf65b58940a0b72f71c9ac0423cc67" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/php-invoker/zipball/5a10147d0aaf65b58940a0b72f71c9ac0423cc67", + "reference": "5a10147d0aaf65b58940a0b72f71c9ac0423cc67", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "ext-pcntl": "*", + "phpunit/phpunit": "^9.3" + }, + "suggest": { + "ext-pcntl": "*" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.1-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Invoke callables with a timeout", + "homepage": "https://github.com/sebastianbergmann/php-invoker/", + "keywords": [ + "process" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/php-invoker/issues", + "source": "https://github.com/sebastianbergmann/php-invoker/tree/3.1.1" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-09-28T05:58:55+00:00" + }, + { + "name": "phpunit/php-text-template", + "version": "2.0.4", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/php-text-template.git", + "reference": "5da5f67fc95621df9ff4c4e5a84d6a8a2acf7c28" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/php-text-template/zipball/5da5f67fc95621df9ff4c4e5a84d6a8a2acf7c28", + "reference": "5da5f67fc95621df9ff4c4e5a84d6a8a2acf7c28", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Simple template engine.", + "homepage": "https://github.com/sebastianbergmann/php-text-template/", + "keywords": [ + "template" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/php-text-template/issues", + "source": "https://github.com/sebastianbergmann/php-text-template/tree/2.0.4" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-10-26T05:33:50+00:00" + }, + { + "name": "phpunit/php-timer", + "version": "5.0.3", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/php-timer.git", + "reference": "5a63ce20ed1b5bf577850e2c4e87f4aa902afbd2" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/php-timer/zipball/5a63ce20ed1b5bf577850e2c4e87f4aa902afbd2", + "reference": "5a63ce20ed1b5bf577850e2c4e87f4aa902afbd2", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Utility class for timing", + "homepage": "https://github.com/sebastianbergmann/php-timer/", + "keywords": [ + "timer" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/php-timer/issues", + "source": "https://github.com/sebastianbergmann/php-timer/tree/5.0.3" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-10-26T13:16:10+00:00" + }, + { + "name": "phpunit/phpunit", + "version": "9.5.28", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/phpunit.git", + "reference": "954ca3113a03bf780d22f07bf055d883ee04b65e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/954ca3113a03bf780d22f07bf055d883ee04b65e", + "reference": "954ca3113a03bf780d22f07bf055d883ee04b65e", + "shasum": "" + }, + "require": { + "doctrine/instantiator": "^1.3.1 || ^2", + "ext-dom": "*", + "ext-json": "*", + "ext-libxml": "*", + "ext-mbstring": "*", + "ext-xml": "*", + "ext-xmlwriter": "*", + "myclabs/deep-copy": "^1.10.1", + "phar-io/manifest": "^2.0.3", + "phar-io/version": "^3.0.2", + "php": ">=7.3", + "phpunit/php-code-coverage": "^9.2.13", + "phpunit/php-file-iterator": "^3.0.5", + "phpunit/php-invoker": "^3.1.1", + "phpunit/php-text-template": "^2.0.3", + "phpunit/php-timer": "^5.0.2", + "sebastian/cli-parser": "^1.0.1", + "sebastian/code-unit": "^1.0.6", + "sebastian/comparator": "^4.0.8", + "sebastian/diff": "^4.0.3", + "sebastian/environment": "^5.1.3", + "sebastian/exporter": "^4.0.5", + "sebastian/global-state": "^5.0.1", + "sebastian/object-enumerator": "^4.0.3", + "sebastian/resource-operations": "^3.0.3", + "sebastian/type": "^3.2", + "sebastian/version": "^3.0.2" + }, + "suggest": { + "ext-soap": "*", + "ext-xdebug": "*" + }, + "bin": [ + "phpunit" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "9.5-dev" + } + }, + "autoload": { + "files": [ + "src/Framework/Assert/Functions.php" + ], + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "The PHP Unit Testing framework.", + "homepage": "https://phpunit.de/", + "keywords": [ + "phpunit", + "testing", + "xunit" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/phpunit/issues", + "source": "https://github.com/sebastianbergmann/phpunit/tree/9.5.28" + }, + "funding": [ + { + "url": "https://phpunit.de/sponsors.html", + "type": "custom" + }, + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/phpunit/phpunit", + "type": "tidelift" + } + ], + "time": "2023-01-14T12:32:24+00:00" + }, + { + "name": "sebastian/cli-parser", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/cli-parser.git", + "reference": "442e7c7e687e42adc03470c7b668bc4b2402c0b2" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/cli-parser/zipball/442e7c7e687e42adc03470c7b668bc4b2402c0b2", + "reference": "442e7c7e687e42adc03470c7b668bc4b2402c0b2", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Library for parsing CLI options", + "homepage": "https://github.com/sebastianbergmann/cli-parser", + "support": { + "issues": "https://github.com/sebastianbergmann/cli-parser/issues", + "source": "https://github.com/sebastianbergmann/cli-parser/tree/1.0.1" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-09-28T06:08:49+00:00" + }, + { + "name": "sebastian/code-unit", + "version": "1.0.8", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/code-unit.git", + "reference": "1fc9f64c0927627ef78ba436c9b17d967e68e120" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/code-unit/zipball/1fc9f64c0927627ef78ba436c9b17d967e68e120", + "reference": "1fc9f64c0927627ef78ba436c9b17d967e68e120", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Collection of value objects that represent the PHP code units", + "homepage": "https://github.com/sebastianbergmann/code-unit", + "support": { + "issues": "https://github.com/sebastianbergmann/code-unit/issues", + "source": "https://github.com/sebastianbergmann/code-unit/tree/1.0.8" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-10-26T13:08:54+00:00" + }, + { + "name": "sebastian/code-unit-reverse-lookup", + "version": "2.0.3", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/code-unit-reverse-lookup.git", + "reference": "ac91f01ccec49fb77bdc6fd1e548bc70f7faa3e5" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/code-unit-reverse-lookup/zipball/ac91f01ccec49fb77bdc6fd1e548bc70f7faa3e5", + "reference": "ac91f01ccec49fb77bdc6fd1e548bc70f7faa3e5", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + } + ], + "description": "Looks up which function or method a line of code belongs to", + "homepage": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/", + "support": { + "issues": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/issues", + "source": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/tree/2.0.3" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-09-28T05:30:19+00:00" + }, + { + "name": "sebastian/comparator", + "version": "4.0.8", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/comparator.git", + "reference": "fa0f136dd2334583309d32b62544682ee972b51a" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/comparator/zipball/fa0f136dd2334583309d32b62544682ee972b51a", + "reference": "fa0f136dd2334583309d32b62544682ee972b51a", + "shasum": "" + }, + "require": { + "php": ">=7.3", + "sebastian/diff": "^4.0", + "sebastian/exporter": "^4.0" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + }, + { + "name": "Jeff Welch", + "email": "whatthejeff@gmail.com" + }, + { + "name": "Volker Dusch", + "email": "github@wallbash.com" + }, + { + "name": "Bernhard Schussek", + "email": "bschussek@2bepublished.at" + } + ], + "description": "Provides the functionality to compare PHP values for equality", + "homepage": "https://github.com/sebastianbergmann/comparator", + "keywords": [ + "comparator", + "compare", + "equality" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/comparator/issues", + "source": "https://github.com/sebastianbergmann/comparator/tree/4.0.8" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2022-09-14T12:41:17+00:00" + }, + { + "name": "sebastian/complexity", + "version": "2.0.2", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/complexity.git", + "reference": "739b35e53379900cc9ac327b2147867b8b6efd88" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/complexity/zipball/739b35e53379900cc9ac327b2147867b8b6efd88", + "reference": "739b35e53379900cc9ac327b2147867b8b6efd88", + "shasum": "" + }, + "require": { + "nikic/php-parser": "^4.7", + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Library for calculating the complexity of PHP code units", + "homepage": "https://github.com/sebastianbergmann/complexity", + "support": { + "issues": "https://github.com/sebastianbergmann/complexity/issues", + "source": "https://github.com/sebastianbergmann/complexity/tree/2.0.2" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-10-26T15:52:27+00:00" + }, + { + "name": "sebastian/diff", + "version": "4.0.4", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/diff.git", + "reference": "3461e3fccc7cfdfc2720be910d3bd73c69be590d" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/diff/zipball/3461e3fccc7cfdfc2720be910d3bd73c69be590d", + "reference": "3461e3fccc7cfdfc2720be910d3bd73c69be590d", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3", + "symfony/process": "^4.2 || ^5" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + }, + { + "name": "Kore Nordmann", + "email": "mail@kore-nordmann.de" + } + ], + "description": "Diff implementation", + "homepage": "https://github.com/sebastianbergmann/diff", + "keywords": [ + "diff", + "udiff", + "unidiff", + "unified diff" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/diff/issues", + "source": "https://github.com/sebastianbergmann/diff/tree/4.0.4" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-10-26T13:10:38+00:00" + }, + { + "name": "sebastian/environment", + "version": "5.1.4", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/environment.git", + "reference": "1b5dff7bb151a4db11d49d90e5408e4e938270f7" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/1b5dff7bb151a4db11d49d90e5408e4e938270f7", + "reference": "1b5dff7bb151a4db11d49d90e5408e4e938270f7", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "suggest": { + "ext-posix": "*" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.1-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + } + ], + "description": "Provides functionality to handle HHVM/PHP environments", + "homepage": "http://www.github.com/sebastianbergmann/environment", + "keywords": [ + "Xdebug", + "environment", + "hhvm" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/environment/issues", + "source": "https://github.com/sebastianbergmann/environment/tree/5.1.4" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2022-04-03T09:37:03+00:00" + }, + { + "name": "sebastian/exporter", + "version": "4.0.5", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/exporter.git", + "reference": "ac230ed27f0f98f597c8a2b6eb7ac563af5e5b9d" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/exporter/zipball/ac230ed27f0f98f597c8a2b6eb7ac563af5e5b9d", + "reference": "ac230ed27f0f98f597c8a2b6eb7ac563af5e5b9d", + "shasum": "" + }, + "require": { + "php": ">=7.3", + "sebastian/recursion-context": "^4.0" + }, + "require-dev": { + "ext-mbstring": "*", + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + }, + { + "name": "Jeff Welch", + "email": "whatthejeff@gmail.com" + }, + { + "name": "Volker Dusch", + "email": "github@wallbash.com" + }, + { + "name": "Adam Harvey", + "email": "aharvey@php.net" + }, + { + "name": "Bernhard Schussek", + "email": "bschussek@gmail.com" + } + ], + "description": "Provides the functionality to export PHP variables for visualization", + "homepage": "https://www.github.com/sebastianbergmann/exporter", + "keywords": [ + "export", + "exporter" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/exporter/issues", + "source": "https://github.com/sebastianbergmann/exporter/tree/4.0.5" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2022-09-14T06:03:37+00:00" + }, + { + "name": "sebastian/global-state", + "version": "5.0.5", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/global-state.git", + "reference": "0ca8db5a5fc9c8646244e629625ac486fa286bf2" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/global-state/zipball/0ca8db5a5fc9c8646244e629625ac486fa286bf2", + "reference": "0ca8db5a5fc9c8646244e629625ac486fa286bf2", + "shasum": "" + }, + "require": { + "php": ">=7.3", + "sebastian/object-reflector": "^2.0", + "sebastian/recursion-context": "^4.0" + }, + "require-dev": { + "ext-dom": "*", + "phpunit/phpunit": "^9.3" + }, + "suggest": { + "ext-uopz": "*" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + } + ], + "description": "Snapshotting of global state", + "homepage": "http://www.github.com/sebastianbergmann/global-state", + "keywords": [ + "global state" + ], + "support": { + "issues": "https://github.com/sebastianbergmann/global-state/issues", + "source": "https://github.com/sebastianbergmann/global-state/tree/5.0.5" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2022-02-14T08:28:10+00:00" + }, + { + "name": "sebastian/lines-of-code", + "version": "1.0.3", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/lines-of-code.git", + "reference": "c1c2e997aa3146983ed888ad08b15470a2e22ecc" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/lines-of-code/zipball/c1c2e997aa3146983ed888ad08b15470a2e22ecc", + "reference": "c1c2e997aa3146983ed888ad08b15470a2e22ecc", + "shasum": "" + }, + "require": { + "nikic/php-parser": "^4.6", + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Library for counting the lines of code in PHP source code", + "homepage": "https://github.com/sebastianbergmann/lines-of-code", + "support": { + "issues": "https://github.com/sebastianbergmann/lines-of-code/issues", + "source": "https://github.com/sebastianbergmann/lines-of-code/tree/1.0.3" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-11-28T06:42:11+00:00" + }, + { + "name": "sebastian/object-enumerator", + "version": "4.0.4", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/object-enumerator.git", + "reference": "5c9eeac41b290a3712d88851518825ad78f45c71" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/object-enumerator/zipball/5c9eeac41b290a3712d88851518825ad78f45c71", + "reference": "5c9eeac41b290a3712d88851518825ad78f45c71", + "shasum": "" + }, + "require": { + "php": ">=7.3", + "sebastian/object-reflector": "^2.0", + "sebastian/recursion-context": "^4.0" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + } + ], + "description": "Traverses array structures and object graphs to enumerate all referenced objects", + "homepage": "https://github.com/sebastianbergmann/object-enumerator/", + "support": { + "issues": "https://github.com/sebastianbergmann/object-enumerator/issues", + "source": "https://github.com/sebastianbergmann/object-enumerator/tree/4.0.4" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-10-26T13:12:34+00:00" + }, + { + "name": "sebastian/object-reflector", + "version": "2.0.4", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/object-reflector.git", + "reference": "b4f479ebdbf63ac605d183ece17d8d7fe49c15c7" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/object-reflector/zipball/b4f479ebdbf63ac605d183ece17d8d7fe49c15c7", + "reference": "b4f479ebdbf63ac605d183ece17d8d7fe49c15c7", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + } + ], + "description": "Allows reflection of object attributes, including inherited and non-public ones", + "homepage": "https://github.com/sebastianbergmann/object-reflector/", + "support": { + "issues": "https://github.com/sebastianbergmann/object-reflector/issues", + "source": "https://github.com/sebastianbergmann/object-reflector/tree/2.0.4" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-10-26T13:14:26+00:00" + }, + { + "name": "sebastian/recursion-context", + "version": "4.0.4", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/recursion-context.git", + "reference": "cd9d8cf3c5804de4341c283ed787f099f5506172" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/recursion-context/zipball/cd9d8cf3c5804de4341c283ed787f099f5506172", + "reference": "cd9d8cf3c5804de4341c283ed787f099f5506172", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + }, + { + "name": "Jeff Welch", + "email": "whatthejeff@gmail.com" + }, + { + "name": "Adam Harvey", + "email": "aharvey@php.net" + } + ], + "description": "Provides functionality to recursively process PHP variables", + "homepage": "http://www.github.com/sebastianbergmann/recursion-context", + "support": { + "issues": "https://github.com/sebastianbergmann/recursion-context/issues", + "source": "https://github.com/sebastianbergmann/recursion-context/tree/4.0.4" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-10-26T13:17:30+00:00" + }, + { + "name": "sebastian/resource-operations", + "version": "3.0.3", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/resource-operations.git", + "reference": "0f4443cb3a1d92ce809899753bc0d5d5a8dd19a8" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/resource-operations/zipball/0f4443cb3a1d92ce809899753bc0d5d5a8dd19a8", + "reference": "0f4443cb3a1d92ce809899753bc0d5d5a8dd19a8", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + } + ], + "description": "Provides a list of PHP built-in functions that operate on resources", + "homepage": "https://www.github.com/sebastianbergmann/resource-operations", + "support": { + "issues": "https://github.com/sebastianbergmann/resource-operations/issues", + "source": "https://github.com/sebastianbergmann/resource-operations/tree/3.0.3" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-09-28T06:45:17+00:00" + }, + { + "name": "sebastian/type", + "version": "3.2.0", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/type.git", + "reference": "fb3fe09c5f0bae6bc27ef3ce933a1e0ed9464b6e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/type/zipball/fb3fe09c5f0bae6bc27ef3ce933a1e0ed9464b6e", + "reference": "fb3fe09c5f0bae6bc27ef3ce933a1e0ed9464b6e", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "require-dev": { + "phpunit/phpunit": "^9.5" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.2-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Collection of value objects that represent the types of the PHP type system", + "homepage": "https://github.com/sebastianbergmann/type", + "support": { + "issues": "https://github.com/sebastianbergmann/type/issues", + "source": "https://github.com/sebastianbergmann/type/tree/3.2.0" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2022-09-12T14:47:03+00:00" + }, + { + "name": "sebastian/version", + "version": "3.0.2", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/version.git", + "reference": "c6c1022351a901512170118436c764e473f6de8c" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/version/zipball/c6c1022351a901512170118436c764e473f6de8c", + "reference": "c6c1022351a901512170118436c764e473f6de8c", + "shasum": "" + }, + "require": { + "php": ">=7.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Library that helps with managing the version number of Git-hosted PHP projects", + "homepage": "https://github.com/sebastianbergmann/version", + "support": { + "issues": "https://github.com/sebastianbergmann/version/issues", + "source": "https://github.com/sebastianbergmann/version/tree/3.0.2" + }, + "funding": [ + { + "url": "https://github.com/sebastianbergmann", + "type": "github" + } + ], + "time": "2020-09-28T06:39:44+00:00" + }, + { + "name": "theseer/tokenizer", + "version": "1.2.1", + "source": { + "type": "git", + "url": "https://github.com/theseer/tokenizer.git", + "reference": "34a41e998c2183e22995f158c581e7b5e755ab9e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/theseer/tokenizer/zipball/34a41e998c2183e22995f158c581e7b5e755ab9e", + "reference": "34a41e998c2183e22995f158c581e7b5e755ab9e", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-tokenizer": "*", + "ext-xmlwriter": "*", + "php": "^7.2 || ^8.0" + }, + "type": "library", + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Arne Blankerts", + "email": "arne@blankerts.de", + "role": "Developer" + } + ], + "description": "A small library for converting tokenized PHP source code into XML and potentially other formats", + "support": { + "issues": "https://github.com/theseer/tokenizer/issues", + "source": "https://github.com/theseer/tokenizer/tree/1.2.1" + }, + "funding": [ + { + "url": "https://github.com/theseer", + "type": "github" + } + ], + "time": "2021-07-28T10:34:58+00:00" + } + ], + "aliases": [], + "minimum-stability": "stable", + "stability-flags": [], + "prefer-stable": false, + "prefer-lowest": false, + "platform": { + "php": "^8.1", + "ext-mbstring": "*" + }, + "platform-dev": [], + "plugin-api-version": "2.0.0" +} diff --git a/characters.json b/data/characters.json similarity index 100% rename from characters.json rename to data/characters.json diff --git a/encoder.json b/data/encoder.json similarity index 100% rename from encoder.json rename to data/encoder.json diff --git a/vocab.bpe b/data/vocab.bpe similarity index 100% rename from vocab.bpe rename to data/vocab.bpe diff --git a/gpt3-encoder.php b/gpt3-encoder.php index 7c6fc3e..3bf1424 100644 --- a/gpt3-encoder.php +++ b/gpt3-encoder.php @@ -1,320 +1,313 @@ - 0) - { - $bpe_merges[] = $split_bmt; - } - } - $bpe_ranks = gpt_dictZip($bpe_merges, range(0, count($bpe_merges) - 1)); - - $cache = array(); - foreach($matches[0] as $token) - { - $new_tokens = array(); - $chars = array(); - $token = utf8_encode($token); - if(function_exists('mb_strlen')) - { - $len = mb_strlen($token, 'UTF-8'); - for ($i = 0; $i < $len; $i++) - { - $chars[] = mb_substr($token, $i, 1, 'UTF-8'); - } - } - else - { - $chars = str_split($token); - } - $result_word = ''; - foreach($chars as $char) - { - if(isset($byte_encoder[gpt_unichr($char)])) - { - $result_word .= $byte_encoder[gpt_unichr($char)]; - } - } - $new_tokens_bpe = gpt_bpe($result_word, $bpe_ranks, $cache); - $new_tokens_bpe = explode(' ', $new_tokens_bpe); - foreach($new_tokens_bpe as $x) - { - if(isset($encoder[$x])) - { - $new_tokens[$x] = $encoder[$x]; - } - else - { - $new_tokens[$x] = $x; - } - } - foreach($new_tokens as $ninx => $nval) - { - if(isset($bpe_tokens[$ninx])) - { - $bpe_tokens[] = $nval; - } - else - { - $bpe_tokens[$ninx] = $nval; - } - } - } - return $bpe_tokens; -} - -function gpt_my_filter($var) -{ - return ($var !== NULL && $var !== FALSE && $var !== ''); -} - -function gpt_unichr($c) -{ - if (ord($c[0]) >=0 && ord($c[0]) <= 127) - { - return ord($c[0]); - } - if (ord($c[0]) >= 192 && ord($c[0]) <= 223) - { - return (ord($c[0])-192)*64 + (ord($c[1])-128); - } - if (ord($c[0]) >= 224 && ord($c[0]) <= 239) - { - return (ord($c[0])-224)*4096 + (ord($c[1])-128)*64 + (ord($c[2])-128); - } - if (ord($c[0]) >= 240 && ord($c[0]) <= 247) - { - return (ord($c[0])-240)*262144 + (ord($c[1])-128)*4096 + (ord($c[2])-128)*64 + (ord($c[3])-128); - } - if (ord($c[0]) >= 248 && ord($c[0]) <= 251) - { - return (ord($c[0])-248)*16777216 + (ord($c[1])-128)*262144 + (ord($c[2])-128)*4096 + (ord($c[3])-128)*64 + (ord($c[4])-128); - } - if (ord($c[0]) >= 252 && ord($c[0]) <= 253) - { - return (ord($c[0])-252)*1073741824 + (ord($c[1])-128)*16777216 + (ord($c[2])-128)*262144 + (ord($c[3])-128)*4096 + (ord($c[4])-128)*64 + (ord($c[5])-128); - } - if (ord($c[0]) >= 254 && ord($c[0]) <= 255) - { - return 0; - } - return 0; -} -function gpt_dictZip($x, $y) -{ - $result = array(); - $cnt = 0; - foreach($x as $i) - { - if(isset($i[1]) && isset($i[0])) - { - $result[$i[0] . ',' . $i[1]] = $cnt; - $cnt++; - } - } - return $result; -} -function gpt_get_pairs($word) -{ - $pairs = array(); - $prev_char = $word[0]; - for ($i = 1; $i < count($word); $i++) - { - $char = $word[$i]; - $pairs[] = array($prev_char, $char); - $prev_char = $char; - } - return $pairs; -} -function gpt_split($str, $len = 1) -{ - $arr = []; - if(function_exists('mb_strlen')) - { - $length = mb_strlen($str, 'UTF-8'); - } - else - { - $length = strlen($str); - } - - for ($i = 0; $i < $length; $i += $len) - { - if(function_exists('mb_substr')) - { - $arr[] = mb_substr($str, $i, $len, 'UTF-8'); - } - else - { - $arr[] = substr($str, $i, $len); - } - } - return $arr; - -} -function gpt_bpe($token, $bpe_ranks, &$cache) -{ - if(array_key_exists($token, $cache)) - { - return $cache[$token]; - } - $word = gpt_split($token); - $init_len = count($word); - $pairs = gpt_get_pairs($word); - if(!$pairs) - { - return $token; - } - while (true) - { - $minPairs = array(); - foreach($pairs as $pair) - { - if(array_key_exists($pair[0] . ','. $pair[1], $bpe_ranks)) - { - $rank = $bpe_ranks[$pair[0] . ','. $pair[1]]; - $minPairs[$rank] = $pair; - } - else - { - $minPairs[10e10] = $pair; - } - } - ksort($minPairs); - $min_key = array_key_first($minPairs); - foreach($minPairs as $mpi => $mp) - { - if($mpi < $min_key) - { - $min_key = $mpi; - } - } - $bigram = $minPairs[$min_key]; - if(!array_key_exists($bigram[0] . ',' . $bigram[1], $bpe_ranks)) - { - break; - } - $first = $bigram[0]; - $second = $bigram[1]; - $new_word = array(); - $i = 0; - while ($i < count($word)) - { - $j = gpt_indexOf($word, $first, $i); - if ($j === -1) - { - $new_word = array_merge($new_word, array_slice($word, $i, null, true)); - break; - } - if($i > $j) - { - $slicer = array(); - } - elseif($j == 0) - { - $slicer = array(); - } - else - { - $slicer = array_slice($word, $i, $j - $i, true); - } - $new_word = array_merge($new_word, $slicer); - if(count($new_word) > $init_len) - { - break; - } - $i = $j; - if ($word[$i] === $first && $i < count($word) - 1 && $word[$i + 1] === $second) - { - array_push($new_word, $first . $second); - $i = $i + 2; - } - else - { - array_push($new_word, $word[$i]); - $i = $i + 1; - } - } - if($word == $new_word) - { - break; - } - $word = $new_word; - if (count($word) === 1) - { - break; - } - else - { - $pairs = gpt_get_pairs($word); - } - } - $word = implode(' ', $word); - $cache[$token] = $word; - return $word; -} -function gpt_indexOf($arrax, $searchElement, $fromIndex) -{ - $index = 0; - foreach($arrax as $index => $value) - { - if($index < $fromIndex) - { - $index++; - continue; - } - if($value == $searchElement) - { - return $index; - } - $index++; - } - return -1; -} - -$prompt = "Many words map to one token, but some don't: indivisible. Unicode characters like emojis may be split into many tokens containing the underlying bytes: 🤚🏾 Sequences of characters commonly found next to each other may be grouped together: 1234567890"; -$token_array = gpt_encode($prompt); -error_log('Token array: ' . print_r($token_array, true)); -error_log('Count: ' . count($token_array)); - -?> \ No newline at end of file + 0) + { + $bpe_merges[] = $split_bmt; + } + } + $bpe_ranks = gpt_dictZip($bpe_merges, range(0, count($bpe_merges) - 1)); + + $cache = array(); + foreach($matches[0] as $token) + { + $new_tokens = array(); + $chars = array(); + $token = utf8_encode($token); + if(function_exists('mb_strlen')) + { + $len = mb_strlen($token, 'UTF-8'); + for ($i = 0; $i < $len; $i++) + { + $chars[] = mb_substr($token, $i, 1, 'UTF-8'); + } + } + else + { + $chars = str_split($token); + } + $result_word = ''; + foreach($chars as $char) + { + if(isset($byte_encoder[gpt_unichr($char)])) + { + $result_word .= $byte_encoder[gpt_unichr($char)]; + } + } + $new_tokens_bpe = gpt_bpe($result_word, $bpe_ranks, $cache); + $new_tokens_bpe = explode(' ', $new_tokens_bpe); + foreach($new_tokens_bpe as $x) + { + if(isset($encoder[$x])) + { + $new_tokens[$x] = $encoder[$x]; + } + else + { + $new_tokens[$x] = $x; + } + } + foreach($new_tokens as $ninx => $nval) + { + if(isset($bpe_tokens[$ninx])) + { + $bpe_tokens[] = $nval; + } + else + { + $bpe_tokens[$ninx] = $nval; + } + } + } + return $bpe_tokens; +} + +function gpt_my_filter($var) +{ + return ($var !== NULL && $var !== FALSE && $var !== ''); +} + +function gpt_unichr($c) +{ + if (ord($c[0]) >=0 && ord($c[0]) <= 127) + { + return ord($c[0]); + } + if (ord($c[0]) >= 192 && ord($c[0]) <= 223) + { + return (ord($c[0])-192)*64 + (ord($c[1])-128); + } + if (ord($c[0]) >= 224 && ord($c[0]) <= 239) + { + return (ord($c[0])-224)*4096 + (ord($c[1])-128)*64 + (ord($c[2])-128); + } + if (ord($c[0]) >= 240 && ord($c[0]) <= 247) + { + return (ord($c[0])-240)*262144 + (ord($c[1])-128)*4096 + (ord($c[2])-128)*64 + (ord($c[3])-128); + } + if (ord($c[0]) >= 248 && ord($c[0]) <= 251) + { + return (ord($c[0])-248)*16777216 + (ord($c[1])-128)*262144 + (ord($c[2])-128)*4096 + (ord($c[3])-128)*64 + (ord($c[4])-128); + } + if (ord($c[0]) >= 252 && ord($c[0]) <= 253) + { + return (ord($c[0])-252)*1073741824 + (ord($c[1])-128)*16777216 + (ord($c[2])-128)*262144 + (ord($c[3])-128)*4096 + (ord($c[4])-128)*64 + (ord($c[5])-128); + } + if (ord($c[0]) >= 254 && ord($c[0]) <= 255) + { + return 0; + } + return 0; +} +function gpt_dictZip($x, $y) +{ + $result = array(); + $cnt = 0; + foreach($x as $i) + { + if(isset($i[1]) && isset($i[0])) + { + $result[$i[0] . ',' . $i[1]] = $cnt; + $cnt++; + } + } + return $result; +} +function gpt_get_pairs($word) +{ + $pairs = array(); + $prev_char = $word[0]; + for ($i = 1; $i < count($word); $i++) + { + $char = $word[$i]; + $pairs[] = array($prev_char, $char); + $prev_char = $char; + } + return $pairs; +} +function gpt_split($str, $len = 1) +{ + $arr = []; + if(function_exists('mb_strlen')) + { + $length = mb_strlen($str, 'UTF-8'); + } + else + { + $length = strlen($str); + } + + for ($i = 0; $i < $length; $i += $len) + { + if(function_exists('mb_substr')) + { + $arr[] = mb_substr($str, $i, $len, 'UTF-8'); + } + else + { + $arr[] = substr($str, $i, $len); + } + } + return $arr; + +} +function gpt_bpe($token, $bpe_ranks, &$cache) +{ + if(array_key_exists($token, $cache)) + { + return $cache[$token]; + } + $word = gpt_split($token); + $init_len = count($word); + $pairs = gpt_get_pairs($word); + if(!$pairs) + { + return $token; + } + while (true) + { + $minPairs = array(); + foreach($pairs as $pair) + { + if(array_key_exists($pair[0] . ','. $pair[1], $bpe_ranks)) + { + $rank = $bpe_ranks[$pair[0] . ','. $pair[1]]; + $minPairs[$rank] = $pair; + } + else + { + $minPairs[10e10] = $pair; + } + } + ksort($minPairs); + $min_key = array_key_first($minPairs); + foreach($minPairs as $mpi => $mp) + { + if($mpi < $min_key) + { + $min_key = $mpi; + } + } + $bigram = $minPairs[$min_key]; + if(!array_key_exists($bigram[0] . ',' . $bigram[1], $bpe_ranks)) + { + break; + } + $first = $bigram[0]; + $second = $bigram[1]; + $new_word = array(); + $i = 0; + while ($i < count($word)) + { + $j = gpt_indexOf($word, $first, $i); + if ($j === -1) + { + $new_word = array_merge($new_word, array_slice($word, $i, null, true)); + break; + } + if($i > $j) + { + $slicer = array(); + } + elseif($j == 0) + { + $slicer = array(); + } + else + { + $slicer = array_slice($word, $i, $j - $i, true); + } + $new_word = array_merge($new_word, $slicer); + if(count($new_word) > $init_len) + { + break; + } + $i = $j; + if ($word[$i] === $first && $i < count($word) - 1 && $word[$i + 1] === $second) + { + array_push($new_word, $first . $second); + $i = $i + 2; + } + else + { + array_push($new_word, $word[$i]); + $i = $i + 1; + } + } + if($word == $new_word) + { + break; + } + $word = $new_word; + if (count($word) === 1) + { + break; + } + else + { + $pairs = gpt_get_pairs($word); + } + } + $word = implode(' ', $word); + $cache[$token] = $word; + return $word; +} +function gpt_indexOf($arrax, $searchElement, $fromIndex) +{ + $index = 0; + foreach($arrax as $index => $value) + { + if($index < $fromIndex) + { + $index++; + continue; + } + if($value == $searchElement) + { + return $index; + } + $index++; + } + return -1; +} diff --git a/src/Encoder.php b/src/Encoder.php new file mode 100644 index 0000000..806d1cb --- /dev/null +++ b/src/Encoder.php @@ -0,0 +1,274 @@ +my_filter(...)); + if ($split_bmt !== []) { + $bpe_merges[] = $split_bmt; + } + } + + $bpe_ranks = $this->dictZip($bpe_merges, range(0, count($bpe_merges) - 1)); + + $cache = []; + foreach ($matches[0] as $token) { + $new_tokens = []; + $chars = []; + $token = utf8_encode((string) $token); + $len = mb_strlen($token, 'UTF-8'); + for ($i = 0; $i < $len; ++$i) { + $chars[] = mb_substr($token, $i, 1, 'UTF-8'); + } + + $result_word = ''; + foreach ($chars as $char) { + if (isset($byte_encoder[$this->unichr($char)])) { + $result_word .= $byte_encoder[$this->unichr($char)]; + } + } + + $new_tokens_bpe = $this->bpe($result_word, $bpe_ranks, $cache); + $new_tokens_bpe = explode(' ', (string) $new_tokens_bpe); + foreach ($new_tokens_bpe as $x) { + if (isset($encoder[$x])) { + $new_tokens[$x] = $encoder[$x]; + } else { + $new_tokens[$x] = $x; + } + } + + foreach ($new_tokens as $ninx => $nval) { + if (isset($bpe_tokens[$ninx])) { + $bpe_tokens[] = $nval; + } else { + $bpe_tokens[$ninx] = $nval; + } + } + } + + return array_values($bpe_tokens); + } + + private function my_filter($var) + { + return null !== $var && false !== $var && '' !== $var; + } + + private function unichr($c) + { + if (ord($c[0]) >= 0 && ord($c[0]) <= 127) { + return ord($c[0]); + } + + if (ord($c[0]) >= 192 && ord($c[0]) <= 223) { + return (ord($c[0]) - 192) * 64 + (ord($c[1]) - 128); + } + + if (ord($c[0]) >= 224 && ord($c[0]) <= 239) { + return (ord($c[0]) - 224) * 4096 + (ord($c[1]) - 128) * 64 + (ord($c[2]) - 128); + } + + if (ord($c[0]) >= 240 && ord($c[0]) <= 247) { + return (ord($c[0]) - 240) * 262144 + (ord($c[1]) - 128) * 4096 + (ord($c[2]) - 128) * 64 + (ord($c[3]) - 128); + } + + if (ord($c[0]) >= 248 && ord($c[0]) <= 251) { + return (ord($c[0]) - 248) * 16_777_216 + (ord($c[1]) - 128) * 262144 + (ord($c[2]) - 128) * 4096 + (ord($c[3]) - 128) * 64 + (ord($c[4]) - 128); + } + + if (ord($c[0]) >= 252 && ord($c[0]) <= 253) { + return (ord($c[0]) - 252) * 1_073_741_824 + (ord($c[1]) - 128) * 16_777_216 + (ord($c[2]) - 128) * 262144 + (ord($c[3]) - 128) * 4096 + (ord($c[4]) - 128) * 64 + (ord($c[5]) - 128); + } + + if (ord($c[0]) >= 254 && ord($c[0]) <= 255) { + return 0; + } + + return 0; + } + + private function dictZip($x, $y) + { + $result = []; + $cnt = 0; + foreach ($x as $i) { + if (isset($i[1]) && isset($i[0])) { + $result[$i[0].','.$i[1]] = $cnt; + ++$cnt; + } + } + + return $result; + } + + private function get_pairs($word) + { + $pairs = []; + $prev_char = $word[0]; + for ($i = 1; $i < (is_countable($word) ? count($word) : 0); ++$i) { + $char = $word[$i]; + $pairs[] = [$prev_char, $char]; + $prev_char = $char; + } + + return $pairs; + } + + private function split($str, $len = 1) + { + $arr = []; + $length = mb_strlen((string) $str, 'UTF-8'); + + for ($i = 0; $i < $length; $i += $len) { + $arr[] = mb_substr((string) $str, $i, $len, 'UTF-8'); + } + + return $arr; + } + + private function bpe($token, $bpe_ranks, &$cache) + { + if (array_key_exists($token, $cache)) { + return $cache[$token]; + } + + $word = $this->split($token); + $init_len = is_countable($word) ? count($word) : 0; + $pairs = $this->get_pairs($word); + if (!$pairs) { + return $token; + } + + while (true) { + $minPairs = []; + foreach ($pairs as $pair) { + if (array_key_exists($pair[0].','.$pair[1], $bpe_ranks)) { + $rank = $bpe_ranks[$pair[0].','.$pair[1]]; + $minPairs[$rank] = $pair; + } else { + $minPairs[10e10] = $pair; + } + } + + ksort($minPairs); + $min_key = array_key_first($minPairs); + foreach ($minPairs as $mpi => $mp) { + if ($mpi < $min_key) { + $min_key = $mpi; + } + } + + $bigram = $minPairs[$min_key]; + if (!array_key_exists($bigram[0].','.$bigram[1], $bpe_ranks)) { + break; + } + + $first = $bigram[0]; + $second = $bigram[1]; + $new_word = []; + $i = 0; + while ($i < (is_countable($word) ? count($word) : 0)) { + $j = $this->indexOf($word, $first, $i); + if (-1 === $j) { + $new_word = array_merge($new_word, array_slice($word, $i, null, true)); + break; + } + + if ($i > $j) { + $slicer = []; + } elseif (0 == $j) { + $slicer = []; + } else { + $slicer = array_slice($word, $i, $j - $i, true); + } + + $new_word = array_merge($new_word, $slicer); + if (count($new_word) > $init_len) { + break; + } + + $i = $j; + if ($word[$i] === $first && $i < (is_countable($word) ? count($word) : 0) - 1 && $word[$i + 1] === $second) { + array_push($new_word, $first.$second); + $i = $i + 2; + } else { + array_push($new_word, $word[$i]); + $i = $i + 1; + } + } + + if ($word == $new_word) { + break; + } + + $word = $new_word; + if (1 === count($word)) { + break; + } else { + $pairs = $this->get_pairs($word); + } + } + + $word = implode(' ', $word); + $cache[$token] = $word; + + return $word; + } + + private function indexOf($array, $searchElement, $fromIndex) + { + $index = 0; + foreach ($array as $index => $value) { + if ($index < $fromIndex) { + ++$index; + continue; + } + + if ($value == $searchElement) { + return $index; + } + + ++$index; + } + + return -1; + } +} diff --git a/tests/EncoderTest.php b/tests/EncoderTest.php new file mode 100644 index 0000000..e1b6fd1 --- /dev/null +++ b/tests/EncoderTest.php @@ -0,0 +1,28 @@ +assertEquals(array(1212,318,617,2420), $encoder->encode("This is some text")); + $this->assertEquals([10134, 23858, 21746], $encoder->encode("hasOwnProperty")); + $this->assertEquals([10163, 2231, 30924, 3829], $encoder->encode("1234567890")); + $this->assertEquals([ 15496, 11854, 616, 1468, 1545 ], $encoder->encode("Hello darkness my old friend")); + $this->assertEquals([31373, 50169, 233, 995, 12520, 234, 235], $encoder->encode("hello 👋 world 🌍")); + $this->assertEquals([33, 11401, 19047, 326, 262, 749, 2219, 2456, 389, 7997, 287, 262, 25818, 355, 257, 2060, 11241, 981, 262, 4071, 2456, 389, 5445, 866, 656, 734, 393, 517, 850, 4775, 16326, 290, 428, 318, 287, 4381, 351, 644, 257, 850, 4775, 12, 3106, 11241, 1634, 11862, 857, 13], $encoder->encode($longText)); + $this->assertEquals([33, 11401, 19047, 326, 262, 749, 2219, 2456, 389, 7997, 287, 262, 25818, 355, 257, 2060, 11241, 981, 262, 4071, 2456, 389, 5445, 866, 656, 734, 393, 517, 850, 4775, 16326, 290, 428, 318, 287, 4381, 351, 644, 257, 850, 4775, 12, 3106, 11241, 1634, 11862, 857, 13], $encoder->encode($longText)); + $this->assertEquals([38374, 268, 292, 256, 446, 274, 31215, 285, 8836, 13], $encoder->encode("Buenas tardes para mí."));; + } +}