Skip to content

Commit

Permalink
Merge pull request #78 from TysonAndre/2.1.0-dev-changes
Browse files Browse the repository at this point in the history
2.1.0dev changes: Allow parsing numbers outside of the int64/uint64/double ranges
  • Loading branch information
crazyxman authored Oct 13, 2022
2 parents c0034f2 + 03fec36 commit a860354
Show file tree
Hide file tree
Showing 42 changed files with 4,240 additions and 219 deletions.
8 changes: 4 additions & 4 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,22 @@ environment:
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
ARCH: x86
VC: vs16
PHP_VER: 8.2.0beta2
PHP_VER: 8.2.0RC3
TS: 1
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
ARCH: x64
VC: vs16
PHP_VER: 8.2.0beta2
PHP_VER: 8.2.0RC3
TS: 0
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
ARCH: x64
VC: vs16
PHP_VER: 8.1.9
PHP_VER: 8.1.11
TS: 0
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
ARCH: x64
VC: vs16
PHP_VER: 8.0.22
PHP_VER: 8.0.24
TS: 0
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
ARCH: x64
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ jobs:
- "7.3"
- "7.4"
- "8.0"
- "8.1"
os: [ubuntu-latest]
experimental: [false]
runs-on: ${{ matrix.os }}
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ modules/
*.dep
php_test_results_*.txt
tests/*
!tests/*.phpt
!tests/**/*.phpt
!tests/_files/
*~
configure.ac
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,36 +109,36 @@ var_dump($res) //int(5)
/**
* Similar to json_decode()
*
* @returns array|stdClass|string|float|int|bool|null
* @return array|stdClass|string|float|int|bool|null
* @throws RuntimeException for invalid JSON (or document over 4GB, or out of range integer/float)
*/
function simdjson_decode(string $json, bool $assoc = false, int $depth = 512) {}

/**
* Returns true if json is valid.
*
* @returns ?bool (null if depth is invalid)
* @return ?bool (null if depth is invalid)
*/
function simdjson_is_valid(string $json, int $depth = 512) : ?bool {}

/**
* Parses $json and returns the number of keys in $json matching the JSON pointer $key
*
* @returns ?bool (null if depth is invalid)
* @return ?int (null if depth is invalid)
*/
function simdjson_key_count(string $json, string $key, int $depth = 512) : ?int {}

/**
* Returns true if the JSON pointer $key could be found.
*
* @returns ?bool (null if depth is invalid, false if json is invalid or key is not found)
* @return ?bool (null if depth is invalid, false if json is invalid or key is not found)
*/
function simdjson_key_exists(string $json, string $key, int $depth = 512) : ?bool {}

/**
* Returns the value at $key
*
* @returns array|stdClass|string|float|int|bool|null the value at $key
* @return array|stdClass|string|float|int|bool|null the value at $key
* @throws RuntimeException for invalid JSON (or document over 4GB, or out of range integer/float)
*/
function simdjson_key_value(string $json, string $key, bool $assoc = unknown, int $depth = unknown) {}
Expand All @@ -148,7 +148,7 @@ function simdjson_key_value(string $json, string $key, bool $assoc = unknown, in

There are some differences from `json_decode()` due to the implementation of the underlying simdjson library. This will throw a RuntimeException if simdjson rejects the JSON.

1) `simdjson_decode()` how out of range 64-bit integers and floats are handled.
1) **Until simdjson 2.1.0,** `simdjson_decode()` differed in how out of range 64-bit integers and floats are handled.

See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#standard-compliance

Expand Down
6 changes: 4 additions & 2 deletions config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ if test "$PHP_SIMDJSON" != "no"; then
[CXXFLAGS="$CXXFLAGS -fvisibility=hidden"])

AC_DEFINE(HAVE_SIMDJSON, 1, [whether simdjson is enabled])
dnl Disable exceptions because PHP is written in C and loads this C++ module, handle errors manually.
dnl Disable development checks of C simdjson library in php debug builds (can manually override)
PHP_NEW_EXTENSION(simdjson, [
php_simdjson.cpp \
src/bindings.cpp \
src/simdjson.cpp],
$ext_shared,, "-std=c++17 -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1", cxx)
$ext_shared,, "-std=c++17 -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 -DSIMDJSON_EXCEPTIONS=0 -DSIMDJSON_DEVELOPMENT_CHECKS=0", cxx)

PHP_INSTALL_HEADERS([ext/simdjson], [php_simdjson.h])
PHP_INSTALL_HEADERS([ext/simdjson], [php_simdjson.h, src/bindings.h src/bindings_defs.h])
PHP_ADD_MAKEFILE_FRAGMENT
PHP_ADD_BUILD_DIR(src, 1)
fi
2 changes: 1 addition & 1 deletion config.w32
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ if (PHP_SIMDJSON == "yes") {
'/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 /std:c++latest');
ADD_SOURCES(configure_module_dirname + '/src', 'simdjson.cpp bindings.cpp', 'simdjson');
ADD_FLAG('CFLAGS_SIMDJSON', '/I' + configure_module_dirname);
PHP_INSTALL_HEADERS('ext/simdjson', 'php_simdjson.h');
PHP_INSTALL_HEADERS('ext/simdjson', 'php_simdjson.h src/bindings.h src/bindings_defs.h');
}
// vim:ft=javascript
57 changes: 52 additions & 5 deletions package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,23 @@
-->
<date>2022-10-12</date>
<version>
<release>2.0.5</release>
<api>2.0.5</api>
<release>2.1.0dev</release>
<api>2.1.0dev</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="https://www.apache.org/licenses/LICENSE-2.0.html">Apache 2.0</license>
<notes>
* Reuse PHP's 1-byte and 0-byte interned strings in simdjson_decode, reducing memory usage for those strings. (e.g. for the key/value in '{"x":""}')
* Return correct count in simdjson_key_count. Properly return counts larger than 0xFFFFFF instead of returning 0xFFFFFF.
* Allow out of range 64-bit values in JSON integer syntax and allow floating point values outside of the max/min finite floating point values (i.e. parsing to +/- infinity).

This allows simdjson_decode() to be used as a replacement for json_decode() in more use cases.
* Return the correct value in simdjson_key_count() for JSON pointers to arrays/objects exceeding size 0xFFFFFF.
Previously, this would be limited to returning at most 0xFFFFFF(16777215).
* Throw 'SimdJsonException extends RuntimeException' instead of RuntimeException.
* Set the error code from simdjson as SimdJsonException->getCode()
* Expose error_code constants from simdjson as `SIMDJSON_ERR_$ERRCODENAME`
</notes>
<contents>
<dir name="/">
Expand All @@ -42,9 +48,12 @@
<file name="php_simdjson.h" role="src"/>
<file name="php_simdjson.cpp" role="src"/>
<file name="README.md" role="doc"/>
<file name="simdjson.stub.php" role="src"/>
<file name="simdjson_arginfo.h" role="src"/>
<dir name="src">
<file name="bindings.cpp" role="src"/>
<file name="bindings.h" role="src"/>
<file name="bindings_defs.h" role="src"/>
<file name="simdjson.cpp" role="src"/>
<file name="simdjson.h" role="src"/>
</dir>
Expand All @@ -67,7 +76,6 @@
<file name="key_count.phpt" role="test"/>
<file name="key_count_args.phpt" role="test"/>
<file name="key_count_exception.phpt" role="test"/>
<file name="key_count_large.phpt" role="test"/>
<file name="key_exists.phpt" role="test"/>
<file name="key_exists_args.phpt" role="test"/>
<file name="key_value_args.phpt" role="test"/>
Expand All @@ -77,6 +85,30 @@
<file name="key_value_result.phpt" role="test"/>
<file name="uint64_overflow.phpt" role="test"/>
<file name="_files/result.json" role="test"/>
<dir name="compat">
<file name="001.phpt" role="test"/>
<file name="bug41067.phpt" role="test"/>
<file name="bug41504.phpt" role="test"/>
<file name="bug45791.phpt" role="test"/>
<file name="bug47644.phpt" role="test"/>
<file name="bug50224.phpt" role="test"/>
<file name="bug62010.phpt" role="test"/>
<file name="bug64874_part1.phpt" role="test"/>
<file name="bug64874_part2.phpt" role="test"/>
<file name="bug68546.phpt" role="test"/>
<file name="bug68817.phpt" role="test"/>
<file name="bug68938.phpt" role="test"/>
<file name="bug69187.phpt" role="test"/>
<file name="fail001.phpt" role="test"/>
<file name="json_decode_basic.phpt" role="test"/>
<file name="json_decode_error.phpt" role="test"/>
<file name="json_decode_invalid_utf8.phpt" role="test"/>
<file name="pass001.1_64bit.phpt" role="test"/>
<file name="pass001.1.phpt" role="test"/>
<file name="pass001.phpt" role="test"/>
<file name="pass002.phpt" role="test"/>
<file name="pass003.phpt" role="test"/>
</dir>
</dir>
</dir>
</contents>
Expand All @@ -93,6 +125,21 @@
<providesextension>simdjson</providesextension>
<extsrcrelease/>
<changelog>
<release>
<date>2022-10-01</date>
<version>
<release>2.0.5</release>
<api>2.0.5</api>
</version>
<stability>
<release>stable</release>
<api>stable</api>
</stability>
<license uri="https://www.apache.org/licenses/LICENSE-2.0.html">Apache 2.0</license>
<notes>
* Reuse PHP's 1-byte and 0-byte interned strings in simdjson_decode, reducing memory usage for those strings. (e.g. for the key/value in '{"x":""}')
</notes>
</release>
<release>
<date>2022-10-01</date>
<version>
Expand Down
80 changes: 58 additions & 22 deletions php_simdjson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,17 @@ extern "C" {
#include "zend_exceptions.h"
#include "main/SAPI.h"
#include "ext/standard/info.h"
#include "ext/spl/spl_exceptions.h"

#include "php_simdjson.h"
#include "simdjson_arginfo.h"
}

// Both the declaration and the definition of ZEND_API variables, functions must be within an 'extern "C"' block for Windows?
zend_class_entry *simdjson_exception_ce;

#include "src/bindings.h"
#include "src/simdjson.h"

ZEND_DECLARE_MODULE_GLOBALS(simdjson);

Expand Down Expand Up @@ -66,29 +72,15 @@ SIMDJSON_ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(simdjson_key_count_arginfo, 0,
ZEND_ARG_TYPE_INFO(0, depth, IS_LONG, 0)
ZEND_END_ARG_INFO()

extern simdjson::dom::parser* cplus_simdjson_create_parser(void);

extern void cplus_simdjson_free_parser(simdjson::dom::parser* parser);

extern bool cplus_simdjson_is_valid(simdjson::dom::parser& parser, const char *json, size_t len, size_t depth);

extern void cplus_simdjson_parse(simdjson::dom::parser& parser, const char *json, size_t len, zval *return_value, unsigned char assoc, size_t depth);

extern void cplus_simdjson_key_value(simdjson::dom::parser& parser, const char *json, size_t len, const char *key, zval *return_value, unsigned char assoc, size_t depth);

extern u_short cplus_simdjson_key_exists(simdjson::dom::parser& parser, const char *json, size_t len, const char *key, size_t depth);

extern void cplus_simdjson_key_count(simdjson::dom::parser& parser, const char *json, size_t len, const char *key, zval *return_value, size_t depth);

#define SIMDJSON_G(v) ZEND_MODULE_GLOBALS_ACCESSOR(simdjson, v)
static simdjson::dom::parser &simdjson_get_parser() {
simdjson::dom::parser *parser = (simdjson::dom::parser *)SIMDJSON_G(parser);
static simdjson_php_parser *simdjson_get_parser() {
simdjson_php_parser *parser = SIMDJSON_G(parser);
if (parser == NULL) {
parser = cplus_simdjson_create_parser();
SIMDJSON_G(parser) = parser;
ZEND_ASSERT(parser != NULL);
}
return *parser;
return parser;
}

// The simdjson parser accepts strings with at most 32-bit lengths, for now.
Expand Down Expand Up @@ -128,7 +120,10 @@ PHP_FUNCTION (simdjson_decode) {
if (!simdjson_validate_depth(depth)) {
RETURN_NULL();
}
cplus_simdjson_parse(simdjson_get_parser(), ZSTR_VAL(json), ZSTR_LEN(json), return_value, assoc, depth);
simdjson_php_error_code error = cplus_simdjson_parse(simdjson_get_parser(), ZSTR_VAL(json), ZSTR_LEN(json), return_value, assoc, depth);
if (error) {
cplus_simdjson_throw_jsonexception(error);
}
}

PHP_FUNCTION (simdjson_key_value) {
Expand All @@ -143,7 +138,10 @@ PHP_FUNCTION (simdjson_key_value) {
if (!simdjson_validate_depth(depth)) {
RETURN_NULL();
}
cplus_simdjson_key_value(simdjson_get_parser(), ZSTR_VAL(json), ZSTR_LEN(json), ZSTR_VAL(key), return_value, assoc, depth);
simdjson_php_error_code error = cplus_simdjson_key_value(simdjson_get_parser(), ZSTR_VAL(json), ZSTR_LEN(json), ZSTR_VAL(key), return_value, assoc, depth);
if (error) {
cplus_simdjson_throw_jsonexception(error);
}
}

PHP_FUNCTION (simdjson_key_count) {
Expand All @@ -156,7 +154,10 @@ PHP_FUNCTION (simdjson_key_count) {
if (!simdjson_validate_depth(depth)) {
RETURN_NULL();
}
cplus_simdjson_key_count(simdjson_get_parser(), ZSTR_VAL(json), ZSTR_LEN(json), ZSTR_VAL(key), return_value, depth);
simdjson_php_error_code error = cplus_simdjson_key_count(simdjson_get_parser(), ZSTR_VAL(json), ZSTR_LEN(json), ZSTR_VAL(key), return_value, depth);
if (error) {
cplus_simdjson_throw_jsonexception(error);
}
}

PHP_FUNCTION (simdjson_key_exists) {
Expand Down Expand Up @@ -202,7 +203,42 @@ ZEND_TSRMLS_CACHE_UPDATE();

/** {{{ PHP_MINIT_FUNCTION
*/
#define SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(errcode) REGISTER_LONG_CONSTANT("SIMDJSON_ERR_" #errcode, simdjson::errcode, CONST_PERSISTENT)
#define SIMDJSON_REGISTER_CUSTOM_ERROR_CODE_CONSTANT(errcode, val) REGISTER_LONG_CONSTANT("SIMDJSON_ERR_" #errcode, (val), CONST_PERSISTENT)
PHP_MINIT_FUNCTION (simdjson) {
simdjson_exception_ce = register_class_SimdJsonException(spl_ce_RuntimeException);
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(CAPACITY); ///< This parser can't support a document that big
// SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(MEMALLOC); ///< Error allocating memory, most likely out of memory
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(TAPE_ERROR); ///< Something went wrong, this is a generic error
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(DEPTH_ERROR); ///< Your document exceeds the user-specified depth limitation
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(STRING_ERROR); ///< Problem while parsing a string
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(T_ATOM_ERROR); ///< Problem while parsing an atom starting with the letter 't'
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(F_ATOM_ERROR); ///< Problem while parsing an atom starting with the letter 'f'
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(N_ATOM_ERROR); ///< Problem while parsing an atom starting with the letter 'n'
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(NUMBER_ERROR); ///< Problem while parsing a number
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(UTF8_ERROR); ///< the input is not valid UTF-8
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(UNINITIALIZED); ///< unknown error, or uninitialized document
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(EMPTY); ///< no structural element found
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(UNESCAPED_CHARS); ///< found unescaped characters in a string.
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(UNCLOSED_STRING); ///< missing quote at the end
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(UNSUPPORTED_ARCHITECTURE); ///< unsupported architecture
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(INCORRECT_TYPE); ///< JSON element has a different type than user expected
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(NUMBER_OUT_OF_RANGE); ///< JSON number does not fit in 64 bits
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(INDEX_OUT_OF_BOUNDS); ///< JSON array index too large
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(NO_SUCH_FIELD); ///< JSON field not found in object
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(IO_ERROR); ///< Error reading a file
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(INVALID_JSON_POINTER); ///< Invalid JSON pointer reference
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(INVALID_URI_FRAGMENT); ///< Invalid URI fragment
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(UNEXPECTED_ERROR); ///< indicative of a bug in simdjson
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(PARSER_IN_USE); ///< parser is already in use.
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(OUT_OF_ORDER_ITERATION); ///< tried to iterate an array or object out of order
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(INSUFFICIENT_PADDING); ///< The JSON doesn't have enough padding for simdjson to safely parse it.
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(INCOMPLETE_ARRAY_OR_OBJECT); ///< The document ends early.
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(SCALAR_DOCUMENT_AS_VALUE); ///< A scalar document is treated as a value.
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(OUT_OF_BOUNDS); ///< Attempted to access location outside of document.
SIMDJSON_REGISTER_ERROR_CODE_CONSTANT(TRAILING_CONTENT); ///< Unexpected trailing content in the JSON input
SIMDJSON_REGISTER_CUSTOM_ERROR_CODE_CONSTANT(INVALID_PROPERTY, 255); ///< Invalid property

return SUCCESS;
}
/* }}} */
Expand All @@ -225,9 +261,9 @@ PHP_RINIT_FUNCTION (simdjson) {
/** {{{ PHP_RSHUTDOWN_FUNCTION
*/
PHP_RSHUTDOWN_FUNCTION (simdjson) {
void *parser = SIMDJSON_G(parser);
simdjson_php_parser *parser = SIMDJSON_G(parser);
if (parser != NULL) {
cplus_simdjson_free_parser((simdjson::dom::parser *) parser);
cplus_simdjson_free_parser(parser);
SIMDJSON_G(parser) = NULL;
}
return SUCCESS;
Expand Down
13 changes: 8 additions & 5 deletions php_simdjson.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
extern zend_module_entry simdjson_module_entry;
#define phpext_simdjson_ptr &simdjson_module_entry

#define PHP_SIMDJSON_VERSION "2.0.5"
#define PHP_SIMDJSON_VERSION "2.1.0dev"
#define SIMDJSON_SUPPORT_URL "https://github.com/crazyxman/simdjson_php"
#define SIMDJSON_PARSE_FAIL 0
#define SIMDJSON_PARSE_SUCCESS 1
Expand All @@ -26,17 +26,20 @@ extern zend_module_entry simdjson_module_entry;

#define SIMDJSON_PARSE_DEFAULT_DEPTH 512


extern PHPAPI void php_var_dump(zval **struc, int level);
extern PHPAPI void php_debug_zval_dump(zval **struc, int level);
/*
* NOTE: Namespaces and references(&) are C++ only functionality.
* To expose this functionality to other C PECLs,
* switch to a forward class declaration of a class that only wraps simdjson::dom::parser
*/
class simdjson_php_parser;

ZEND_BEGIN_MODULE_GLOBALS(simdjson)
/*
* php::simdjson::parser pointer, constructed on first use with request-scope lifetime.
* Note that in ZTS builds, the thread for each request will deliberately have different instances for each concurrently running request.
* (The simdjson library is not thread safe)
*/
void *parser;
simdjson_php_parser *parser;
ZEND_END_MODULE_GLOBALS(simdjson)

PHP_MINIT_FUNCTION(simdjson);
Expand Down
Loading

0 comments on commit a860354

Please sign in to comment.