diff --git a/reformatter/json_reformat.c b/reformatter/json_reformat.c index 2ed4f398..fd1e0eba 100644 --- a/reformatter/json_reformat.c +++ b/reformatter/json_reformat.c @@ -45,10 +45,16 @@ static int reformat_boolean(void * ctx, int boolean) GEN_AND_RETURN(yajl_gen_bool(g, boolean)); } -static int reformat_number(void * ctx, const char * s, size_t l) +static int reformat_integer(void * ctx, long long int i) { yajl_gen g = (yajl_gen) ctx; - GEN_AND_RETURN(yajl_gen_number(g, s, l)); + GEN_AND_RETURN(yajl_gen_integer(g, i)); +} + +static int reformat_double(void * ctx, double d) +{ + yajl_gen g = (yajl_gen) ctx; + GEN_AND_RETURN(yajl_gen_double(g, d)); } static int reformat_string(void * ctx, const unsigned char * stringVal, @@ -93,9 +99,9 @@ static int reformat_end_array(void * ctx) static yajl_callbacks callbacks = { reformat_null, reformat_boolean, + reformat_integer, + reformat_double, NULL, - NULL, - reformat_number, reformat_string, reformat_start_map, reformat_map_key, @@ -109,6 +115,8 @@ usage(const char * progname) { fprintf(stderr, "%s: reformat json from stdin\n" "usage: json_reformat [options]\n" + " -5 allow JSON5 input\n" + " -g generate JSON5 output\n" " -e escape any forward slashes (for embedding in HTML)\n" " -m minimize json rather than beautify (default)\n" " -s reformat a stream of multiple json entites\n" @@ -143,6 +151,12 @@ main(int argc, char ** argv) unsigned int i; for ( i=1; i < strlen(argv[a]); i++) { switch (argv[a][i]) { + case '5': + yajl_config(hand, yajl_allow_json5, 1); + break; + case 'g': + yajl_gen_config(g, yajl_gen_json5, 1); + break; case 'm': yajl_gen_config(g, yajl_gen_beautify, 0); break; diff --git a/src/api/yajl_common.h b/src/api/yajl_common.h index 9596ef98..4ee7053c 100644 --- a/src/api/yajl_common.h +++ b/src/api/yajl_common.h @@ -23,6 +23,9 @@ extern "C" { #endif +/** A limit used by the generator API, YAJL_MAX_DEPTH is the maximum + * depth to which arrays and maps may be nested. + */ #define YAJL_MAX_DEPTH 128 /* msft dll export gunk. To build a DLL on windows, you @@ -38,33 +41,34 @@ extern "C" { # if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 # define YAJL_API __attribute__ ((visibility("default"))) # else +/** Marks a yajl routine for export from the DLL/shared library. */ # define YAJL_API # endif #endif -/** pointer to a malloc function, supporting client overriding memory - * allocation routines */ +/** Pointer to a malloc() function, supporting client overriding memory + * allocation routines. */ typedef void * (*yajl_malloc_func)(void *ctx, size_t sz); -/** pointer to a free function, supporting client overriding memory - * allocation routines */ +/** Pointer to a free() function, supporting client overriding memory + * allocation routines. */ typedef void (*yajl_free_func)(void *ctx, void * ptr); -/** pointer to a realloc function which can resize an allocation. */ +/** Pointer to a realloc() function which can resize an allocation. */ typedef void * (*yajl_realloc_func)(void *ctx, void * ptr, size_t sz); -/** A structure which can be passed to yajl_*_alloc routines to allow the +/** A structure which can be passed to yajl_*_alloc() routines to allow the * client to specify memory allocation functions to be used. */ typedef struct { - /** pointer to a function that can allocate uninitialized memory */ + /** Pointer to a function that can allocate uninitialized memory. */ yajl_malloc_func malloc; - /** pointer to a function that can resize memory allocations */ + /** Pointer to a function that can resize memory allocations. */ yajl_realloc_func realloc; - /** pointer to a function that can free memory allocated using - * reallocFunction or mallocFunction */ + /** Pointer to a function that can free memory allocated using + * reallocFunction or mallocFunction. */ yajl_free_func free; - /** a context pointer that will be passed to above allocation routines */ + /** A context pointer that will be passed to above allocation routines. */ void * ctx; } yajl_alloc_funcs; diff --git a/src/api/yajl_gen.h b/src/api/yajl_gen.h index a74cff1b..fce60aeb 100644 --- a/src/api/yajl_gen.h +++ b/src/api/yajl_gen.h @@ -29,101 +29,146 @@ #ifdef __cplusplus extern "C" { #endif - /** generator status codes */ + /** Generator status codes. */ typedef enum { - /** no error */ + /** No error. */ yajl_gen_status_ok = 0, - /** at a point where a map key is generated, a function other than - * yajl_gen_string was called */ + /** At a point where a map key is generated, a function other than + * yajl_gen_string() was called. */ yajl_gen_keys_must_be_strings, /** YAJL's maximum generation depth was exceeded. see - * YAJL_MAX_DEPTH */ + * \ref YAJL_MAX_DEPTH. */ yajl_max_depth_exceeded, - /** A generator function (yajl_gen_XXX) was called while in an error - * state */ + /** A generator function (yajl_gen_XXX()) was called while in an error + * state. */ yajl_gen_in_error_state, - /** A complete JSON document has been generated */ + /** A complete JSON document has been generated. */ yajl_gen_generation_complete, - /** yajl_gen_double was passed an invalid floating point value + /** yajl_gen_double() was passed an invalid floating point value * (infinity or NaN). */ yajl_gen_invalid_number, /** A print callback was passed in, so there is no internal - * buffer to get from */ + * buffer to get from. */ yajl_gen_no_buf, - /** returned from yajl_gen_string() when the yajl_gen_validate_utf8 - * option is enabled and an invalid was passed by client code. + /** Returned from yajl_gen_string() when the \ref yajl_gen_validate_utf8 + * option is enabled and invalid UTF8 was passed by client code. */ yajl_gen_invalid_string } yajl_gen_status; - /** an opaque handle to a generator */ + /** An opaque handle to a generator */ typedef struct yajl_gen_t * yajl_gen; - /** a callback used for "printing" the results. */ + /** A callback used for "printing" the results. */ typedef void (*yajl_print_t)(void * ctx, const char * str, size_t len); - /** configuration parameters for the parser, these may be passed to - * yajl_gen_config() along with option specific argument(s). In general, - * all configuration parameters default to *off*. */ + /** Configuration parameters for the parser, these may be passed to + * yajl_gen_config() followed by option specific argument(s). In general, + * all boolean configuration parameters default to *off*. */ typedef enum { - /** generate indented (beautiful) output */ + /** + * Generate indented (beautiful) output. + * + * yajl_gen_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_beautify, 1); // Human format please + * \endcode + */ yajl_gen_beautify = 0x01, /** - * Set an indent string which is used when yajl_gen_beautify - * is enabled. Maybe something like \\t or some number of - * spaces. The default is four spaces ' '. + * Set the indent string which is used when \ref yajl_gen_beautify + * is enabled, which may only contain whitespace characters such as + * \c \\t or some number of spaces. The default is four spaces ' '. + * + * yajl_gen_config() argument type: const char * + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_indent_string, " "); // 2 spaces + * \endcode */ yajl_gen_indent_string = 0x02, /** * Set a function and context argument that should be used to - * output generated json. the function should conform to the - * yajl_print_t prototype while the context argument is a + * output the generated json. The function should conform to the + * \ref yajl_print_t prototype while the context argument may be any * void * of your choosing. * - * example: - * yajl_gen_config(g, yajl_gen_print_callback, myFunc, myVoidPtr); + * yajl_gen_config() arguments: \ref yajl_print_t, void * + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_print_callback, myFunc, myVoidPtr); + * \endcode */ yajl_gen_print_callback = 0x04, /** * Normally the generator does not validate that strings you * pass to it via yajl_gen_string() are valid UTF8. Enabling * this option will cause it to do so. + * + * yajl_gen_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_validate_utf8, 1); // Check UTF8 + * \endcode */ yajl_gen_validate_utf8 = 0x08, /** - * the forward solidus (slash or '/' in human) is not required to be + * The forward solidus (slash or '/' in human) is not required to be * escaped in json text. By default, YAJL will not escape it in the * iterest of saving bytes. Setting this flag will cause YAJL to * always escape '/' in generated JSON strings. + * + * yajl_gen_config() argument type: int (boolean) */ - yajl_gen_escape_solidus = 0x10 + yajl_gen_escape_solidus = 0x10, + /** + * Special numbers such as NaN and Infinity cannot be represented in + * the original JSON, but are permitted in JSON5. Setting this flag + * allows YAJL to output the JSON5 representation of these special + * numbers instead of returning with an error, and to emit map keys + * that are valid javascript identifiers without quotes. + * + * yajl_gen_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_json5, 1); // Output JSON5 + * \endcode + */ + yajl_gen_json5 = 0x20, } yajl_gen_option; - /** allow the modification of generator options subsequent to handle - * allocation (via yajl_alloc) - * \returns zero in case of errors, non-zero otherwise + /** Set generator options associated with a generator handle. See the + * \ref yajl_gen_option documentation for details of the available + * options and their arguments. + * \returns Zero in case of error, non-zero otherwise. */ - YAJL_API int yajl_gen_config(yajl_gen g, yajl_gen_option opt, ...); + YAJL_API int yajl_gen_config(yajl_gen hand, yajl_gen_option opt, ...); - /** allocate a generator handle - * \param allocFuncs an optional pointer to a structure which allows - * the client to overide the memory allocation - * used by yajl. May be NULL, in which case - * malloc/free/realloc will be used. + /** Allocate a generator handle + * \param allocFuncs An optional pointer to a structure which allows the + * client to provide memory allocation functions for + * use by yajl. May be \c NULL to use the C runtime + * library's malloc(), free() and realloc(). * - * \returns an allocated handle on success, NULL on failure (bad params) + * \returns An allocated handle on success, \c NULL on failure (bad params) */ YAJL_API yajl_gen yajl_gen_alloc(const yajl_alloc_funcs * allocFuncs); - /** free a generator handle */ - YAJL_API void yajl_gen_free(yajl_gen handle); + /** Free a generator handle. */ + YAJL_API void yajl_gen_free(yajl_gen hand); YAJL_API yajl_gen_status yajl_gen_integer(yajl_gen hand, long long int number); - /** generate a floating point number. number may not be infinity or - * NaN, as these have no representation in JSON. In these cases the - * generator will return 'yajl_gen_invalid_number' */ + /** Generate a floating point number. + * \param hand The generator handle. + * \param number The value to output. The values Infinity or NaN are + * only accepted if the \ref yajl_gen_json5 option is set, + * as these values have no legal representation in JSON; + * the generator will return \ref yajl_gen_invalid_number + * otherwise. + */ YAJL_API yajl_gen_status yajl_gen_double(yajl_gen hand, double number); YAJL_API yajl_gen_status yajl_gen_number(yajl_gen hand, const char * num, @@ -138,26 +183,29 @@ extern "C" { YAJL_API yajl_gen_status yajl_gen_array_open(yajl_gen hand); YAJL_API yajl_gen_status yajl_gen_array_close(yajl_gen hand); - /** access the null terminated generator buffer. If incrementally + /** Access the zero-terminated generator buffer. If incrementally * outputing JSON, one should call yajl_gen_clear to clear the * buffer. This allows stream generation. */ YAJL_API yajl_gen_status yajl_gen_get_buf(yajl_gen hand, const unsigned char ** buf, size_t * len); - /** clear yajl's output buffer, but maintain all internal generation - * state. This function will not "reset" the generator state, and is + /** Clear yajl's output buffer, but maintain all internal generation + * state. This function will not reset the generator state, and is * intended to enable incremental JSON outputing. */ YAJL_API void yajl_gen_clear(yajl_gen hand); - /** Reset the generator state. Allows a client to generate multiple - * json entities in a stream. The "sep" string will be inserted to - * separate the previously generated entity from the current, - * NULL means *no separation* of entites (clients beware, generating - * multiple JSON numbers without a separator, for instance, will result in ambiguous output) + /** Reset the generator state. Allows a client to generate multiple + * JSON entities in a stream. + * \param hand The generator handle. + * \param sep This string will be inserted to separate the previously + * generated output from the following; passing \c NULL means + * *no separation* of entites (beware that generating + * multiple JSON numbers without a separator creates + * ambiguous output). * - * Note: this call will not clear yajl's output buffer. This - * may be accomplished explicitly by calling yajl_gen_clear() */ + * Note: This call does not clear yajl's output buffer, which must be + * accomplished explicitly by calling yajl_gen_clear(). */ YAJL_API void yajl_gen_reset(yajl_gen hand, const char * sep); #ifdef __cplusplus diff --git a/src/api/yajl_parse.h b/src/api/yajl_parse.h index 1c25a60d..b0d6d147 100644 --- a/src/api/yajl_parse.h +++ b/src/api/yajl_parse.h @@ -29,47 +29,47 @@ #ifdef __cplusplus extern "C" { #endif - /** error codes returned from this interface */ + /** Error codes returned from this interface. */ typedef enum { - /** no error was encountered */ + /** No error was encountered. */ yajl_status_ok, - /** a client callback returned zero, stopping the parse */ + /** A client callback returned zero, stopping the parse. */ yajl_status_client_canceled, - /** An error occured during the parse. Call yajl_get_error for - * more information about the encountered error */ + /** An error occured during the parse. Call yajl_get_error() for + * more information about the encountered error. */ yajl_status_error } yajl_status; - /** attain a human readable, english, string for an error */ + /** Return a human readable, english string for an error code. */ YAJL_API const char * yajl_status_to_string(yajl_status code); - /** an opaque handle to a parser */ + /** An opaque handle to a parser. */ typedef struct yajl_handle_t * yajl_handle; - /** yajl is an event driven parser. this means as json elements are + /** yajl is an event driven parser. This means as json elements are * parsed, you are called back to do something with the data. The * functions in this table indicate the various events for which * you will be called back. Each callback accepts a "context" - * pointer, this is a void * that is passed into the yajl_parse + * pointer, this is a \c void \c * that is passed into the yajl_parse() * function which the client code may use to pass around context. * * All callbacks return an integer. If non-zero, the parse will * continue. If zero, the parse will be canceled and - * yajl_status_client_canceled will be returned from the parse. + * \c yajl_status_client_canceled will be returned from the parse. * - * \attention { + * \attention * A note about the handling of numbers: * + * \attention * yajl will only convert numbers that can be represented in a - * double or a 64 bit (long long) int. All other numbers will - * be passed to the client in string form using the yajl_number - * callback. Furthermore, if yajl_number is not NULL, it will - * always be used to return numbers, that is yajl_integer and - * yajl_double will be ignored. If yajl_number is NULL but one - * of yajl_integer or yajl_double are defined, parsing of a + * double or a 64 bit (long long) int. All other numbers will be + * passed to the client in string form using the yajl_number() + * callback. Furthermore, if yajl_number() is not NULL, it will + * always be used to return numbers, that is yajl_integer() and + * yajl_double() will be ignored. If yajl_number() is NULL but one + * of yajl_integer() or yajl_double() are defined, parsing of a * number larger than is representable in a double or 64 bit * integer will result in a parse error. - * } */ typedef struct { int (* yajl_null)(void * ctx); @@ -77,12 +77,12 @@ extern "C" { int (* yajl_integer)(void * ctx, long long integerVal); int (* yajl_double)(void * ctx, double doubleVal); /** A callback which passes the string representation of the number - * back to the client. Will be used for all numbers when present */ + * back to the client. Will be used for all numbers when present. */ int (* yajl_number)(void * ctx, const char * numberVal, size_t numberLen); - /** strings are returned as pointers into the JSON text when, - * possible, as a result, they are _not_ null padded */ + /** Strings are returned as pointers into the JSON text when + * possible. As a result they are _not_ zero-terminated. */ int (* yajl_string)(void * ctx, const unsigned char * stringVal, size_t stringLen); @@ -95,115 +95,151 @@ extern "C" { int (* yajl_end_array)(void * ctx); } yajl_callbacks; - /** allocate a parser handle - * \param callbacks a yajl callbacks structure specifying the + /** Allocate a parser handle. + * \param callbacks A \c yajl_callbacks structure specifying the * functions to call when different JSON entities - * are encountered in the input text. May be NULL, + * are encountered in the input text. May be \c NULL, * which is only useful for validation. - * \param afs memory allocation functions, may be NULL for to use - * C runtime library routines (malloc and friends) - * \param ctx a context pointer that will be passed to callbacks. + * \param afs Memory allocation functions, may be \c NULL to use the + * C runtime library routines (malloc() and friends). + * \param ctx A context pointer that will be passed to callbacks. */ YAJL_API yajl_handle yajl_alloc(const yajl_callbacks * callbacks, yajl_alloc_funcs * afs, void * ctx); - /** configuration parameters for the parser, these may be passed to - * yajl_config() along with option specific argument(s). In general, - * all configuration parameters default to *off*. */ + /** Configuration parameters for the parser, these should be passed to + * yajl_config() followed by any option specific argument(s). In general, + * all boolean configuration parameters default to *off*. */ typedef enum { - /** Ignore javascript style comments present in - * JSON input. Non-standard, but rather fun - * arguments: toggled off with integer zero, on otherwise. + /** + * Ignore javascript style comments present in JSON input. These are + * not standard in JSON, although they are allowed in JSON5 input. + * + * yajl_config() argument type: int (boolean) * - * example: - * yajl_config(h, yajl_allow_comments, 1); // turn comment support on + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_comments, 1); // turn comment support on + * \endcode */ yajl_allow_comments = 0x01, /** * When set the parser will verify that all strings in JSON input are - * valid UTF8 and will emit a parse error if this is not so. When set, + * valid UTF8 and will emit a parse error if this is not so. When set, * this option makes parsing slightly more expensive (~7% depending - * on processor and compiler in use) + * on the processor and compiler in use). * - * example: - * yajl_config(h, yajl_dont_validate_strings, 1); // disable utf8 checking + * yajl_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_config(h, yajl_dont_validate_strings, 1); // disable utf8 checking + * \endcode */ yajl_dont_validate_strings = 0x02, /** - * By default, upon calls to yajl_complete_parse(), yajl will - * ensure the entire input text was consumed and will raise an error - * otherwise. Enabling this flag will cause yajl to disable this - * check. This can be useful when parsing json out of a that contains more - * than a single JSON document. + * By default, upon calls to yajl_complete_parse(), yajl will ensure + * the entire input text was consumed and will raise an error + * otherwise. Turning this flag on cause yajl to disable the garbage + * check. This can be useful when parsing JSON out of an input stream + * that contains more than a single JSON document. + * + * yajl_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_trailing_garbage, 1); // non-JSON follows + * \endcode */ yajl_allow_trailing_garbage = 0x04, /** - * Allow multiple values to be parsed by a single handle. The - * entire text must be valid JSON, and values can be seperated - * by any kind of whitespace. This flag will change the - * behavior of the parser, and cause it continue parsing after - * a value is parsed, rather than transitioning into a - * complete state. This option can be useful when parsing multiple - * values from an input stream. + * Allow multiple values to be parsed by a single handle. The entire + * text must be valid JSON, and values can be seperated by any kind of + * whitespace. This flag will change the behavior of the parser, and + * cause it to continue parsing after a value is parsed, rather than + * transitioning into a complete state. This option can be useful when + * parsing multiple values from an input stream. + * + * yajl_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_multiple_values, 1); // multi-doc stream + * \endcode */ yajl_allow_multiple_values = 0x08, /** - * When yajl_complete_parse() is called the parser will - * check that the top level value was completely consumed. I.E., - * if called whilst in the middle of parsing a value - * yajl will enter an error state (premature EOF). Setting this - * flag suppresses that check and the corresponding error. + * When yajl_complete_parse() is called the parser will check that the + * top level value was completely consumed. If called whilst in the + * middle of parsing a value, yajl will enter an error state (premature + * EOF). Setting this flag suppresses that check and the corresponding + * error. + * + * yajl_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_partial_values, 1); // might stop early + * \endcode */ - yajl_allow_partial_values = 0x10 + yajl_allow_partial_values = 0x10, + /** + * The JSON5 standard allows additional formats for numbers, strings + * and object keys which are not permitted by the JSON standard. + * Setting this flag tells yajl to accept JSON5 standard input. + * This flag also enables \c yajl_allow_comments since comments are + * part of the JSON5 standard. + * + * yajl_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_json5, 1); // We accept JSON5! + * \endcode + */ + yajl_allow_json5 = 0x20, } yajl_option; - /** allow the modification of parser options subsequent to handle - * allocation (via yajl_alloc) - * \returns zero in case of errors, non-zero otherwise + /** Set parser options associated with a parser handle. See the + * \ref yajl_option documentation for details of the available options + * and their arguments. + * \returns Zero in case of error, non-zero otherwise. */ - YAJL_API int yajl_config(yajl_handle h, yajl_option opt, ...); + YAJL_API int yajl_config(yajl_handle hand, yajl_option opt, ...); - /** free a parser handle */ - YAJL_API void yajl_free(yajl_handle handle); + /** Free a parser handle. */ + YAJL_API void yajl_free(yajl_handle hand); /** Parse some json! - * \param hand - a handle to the json parser allocated with yajl_alloc - * \param jsonText - a pointer to the UTF8 json text to be parsed - * \param jsonTextLength - the length, in bytes, of input text + * \param hand A handle to the json parser allocated with yajl_alloc(). + * \param jsonText A pointer to the UTF8 json text to be parsed. + * \param jsonTextLength The length, in bytes, of input text. */ YAJL_API yajl_status yajl_parse(yajl_handle hand, const unsigned char * jsonText, size_t jsonTextLength); /** Parse any remaining buffered json. + * * Since yajl is a stream-based parser, without an explicit end of * input, yajl sometimes can't decide if content at the end of the - * stream is valid or not. For example, if "1" has been fed in, + * stream is valid or not. For example, if "1" has been fed in, * yajl can't know whether another digit is next or some character * that would terminate the integer token. * - * \param hand - a handle to the json parser allocated with yajl_alloc + * \param hand a handle to the json parser allocated with yajl_alloc(). */ YAJL_API yajl_status yajl_complete_parse(yajl_handle hand); - /** get an error string describing the state of the - * parse. + /** Get an error string describing the state of the parse. * - * If verbose is non-zero, the message will include the JSON - * text where the error occured, along with an arrow pointing to - * the specific char. + * If verbose is non-zero, the message will include the JSON text where + * the error occured, along with an arrow pointing to the specific char. * * \returns A dynamically allocated string will be returned which should - * be freed with yajl_free_error + * be freed with yajl_free_error(). */ YAJL_API unsigned char * yajl_get_error(yajl_handle hand, int verbose, const unsigned char * jsonText, size_t jsonTextLength); - /** - * get the amount of data consumed from the last chunk passed to YAJL. + /** Get the amount of data consumed from the last chunk passed to yajl. * * In the case of a successful parse this can help you understand if * the entire buffer was consumed (which will allow you to handle @@ -216,7 +252,7 @@ extern "C" { */ YAJL_API size_t yajl_get_bytes_consumed(yajl_handle hand); - /** free an error returned from yajl_get_error */ + /** Free an error returned from yajl_get_error(). */ YAJL_API void yajl_free_error(yajl_handle hand, unsigned char * str); #ifdef __cplusplus diff --git a/src/api/yajl_tree.h b/src/api/yajl_tree.h index 1c1e06a5..64565f37 100644 --- a/src/api/yajl_tree.h +++ b/src/api/yajl_tree.h @@ -37,7 +37,7 @@ extern "C" { #endif -/** possible data types that a yajl_val_s can hold */ +/** Possible data types that a yajl_val_s can hold */ typedef enum { yajl_t_string = 1, yajl_t_number = 2, @@ -99,33 +99,33 @@ struct yajl_val_s /** * Parse a string. * - * Parses an null-terminated string containing JSON data and returns a pointer + * Parses a zero-terminated string containing JSON5 data and returns a pointer * to the top-level value (root of the parse tree). * * \param input Pointer to a null-terminated utf8 string containing - * JSON data. + * JSON or JSON5 data. * \param error_buffer Pointer to a buffer in which an error message will - * be stored if \em yajl_tree_parse fails, or + * be stored if yajl_tree_parse() fails, or * \c NULL. The buffer will be initialized before * parsing, so its content will be destroyed even if - * \em yajl_tree_parse succeeds. + * yajl_tree_parse() succeeds. * \param error_buffer_size Size of the memory area pointed to by - * \em error_buffer_size. If \em error_buffer_size is - * \c NULL, this argument is ignored. + * \p error_buffer. If \p error_buffer + * is \c NULL, this argument is ignored. * * \returns Pointer to the top-level value or \c NULL on error. The memory - * pointed to must be freed using \em yajl_tree_free. In case of an error, a - * null terminated message describing the error in more detail is stored in - * \em error_buffer if it is not \c NULL. + * pointed to must be freed using yajl_tree_free(). In case of an error, a + * zero-terminated message describing the error in more detail is stored in + * \p error_buffer if it is not \c NULL. */ YAJL_API yajl_val yajl_tree_parse (const char *input, char *error_buffer, size_t error_buffer_size); /** - * Free a parse tree returned by "yajl_tree_parse". + * Free a parse tree returned by yajl_tree_parse(). * - * \param v Pointer to a JSON value returned by "yajl_tree_parse". Passing NULL + * \param v Pointer to a JSON value returned by yajl_tree_parse(). Passing \c NULL * is valid and results in a no-op. */ YAJL_API void yajl_tree_free (yajl_val v); @@ -134,10 +134,10 @@ YAJL_API void yajl_tree_free (yajl_val v); * Access a nested value inside a tree. * * \param parent the node under which you'd like to extract values. - * \param path A null terminated array of strings, each the name of an object key - * \param type the yajl_type of the object you seek, or yajl_t_any if any will do. + * \param path A null terminated array of strings, each the name of an object key. + * \param type the \ref yajl_type of the object you seek, or \ref yajl_t_any if any will do. * - * \returns a pointer to the found value, or NULL if we came up empty. + * \returns a pointer to the found value, or \c NULL if we came up empty. * * Future Ideas: it'd be nice to move path to a string and implement support for * a teeny tiny micro language here, so you can extract array elements, do things @@ -146,7 +146,11 @@ YAJL_API void yajl_tree_free (yajl_val v); */ YAJL_API yajl_val yajl_tree_get(yajl_val parent, const char ** path, yajl_type type); -/* Various convenience macros to check the type of a `yajl_val` */ +/** @name Type Check Macros + * + * Convenience macros to check the type of a \ref yajl_val. + */ +/**@{*/ #define YAJL_IS_STRING(v) (((v) != NULL) && ((v)->type == yajl_t_string)) #define YAJL_IS_NUMBER(v) (((v) != NULL) && ((v)->type == yajl_t_number)) #define YAJL_IS_INTEGER(v) (YAJL_IS_NUMBER(v) && ((v)->u.number.flags & YAJL_NUMBER_INT_VALID)) @@ -156,29 +160,38 @@ YAJL_API yajl_val yajl_tree_get(yajl_val parent, const char ** path, yajl_type t #define YAJL_IS_TRUE(v) (((v) != NULL) && ((v)->type == yajl_t_true )) #define YAJL_IS_FALSE(v) (((v) != NULL) && ((v)->type == yajl_t_false )) #define YAJL_IS_NULL(v) (((v) != NULL) && ((v)->type == yajl_t_null )) +/**@}*/ + +/** @name Value Get Macros + * + * Macros to fetch values from a \ref yajl_val. + */ +/**@{*/ -/** Given a yajl_val_string return a ptr to the bare string it contains, - * or NULL if the value is not a string. */ +/** Given a \ref yajl_t_string return a ptr to the bare string it contains, + * or \c NULL if the value is not a string. */ #define YAJL_GET_STRING(v) (YAJL_IS_STRING(v) ? (v)->u.string : NULL) /** Get the string representation of a number. You should check type first, - * perhaps using YAJL_IS_NUMBER */ + * perhaps using \ref YAJL_IS_NUMBER */ #define YAJL_GET_NUMBER(v) ((v)->u.number.r) /** Get the double representation of a number. You should check type first, - * perhaps using YAJL_IS_DOUBLE */ + * perhaps using \ref YAJL_IS_DOUBLE */ #define YAJL_GET_DOUBLE(v) ((v)->u.number.d) /** Get the 64bit (long long) integer representation of a number. You should - * check type first, perhaps using YAJL_IS_INTEGER */ + * check type first, perhaps using \ref YAJL_IS_INTEGER */ #define YAJL_GET_INTEGER(v) ((v)->u.number.i) -/** Get a pointer to a yajl_val_object or NULL if the value is not an object. */ +/** Get a pointer to a \ref yajl_t_object or \c NULL if the value is not an object. */ #define YAJL_GET_OBJECT(v) (YAJL_IS_OBJECT(v) ? &(v)->u.object : NULL) -/** Get a pointer to a yajl_val_array or NULL if the value is not an object. */ +/** Get a pointer to a \ref yajl_t_array or \c NULL if the value is not an object. */ #define YAJL_GET_ARRAY(v) (YAJL_IS_ARRAY(v) ? &(v)->u.array : NULL) +/**@}*/ + #ifdef __cplusplus } #endif diff --git a/src/yajl.c b/src/yajl.c index d477893f..46534fbb 100644 --- a/src/yajl.c +++ b/src/yajl.c @@ -86,6 +86,8 @@ yajl_config(yajl_handle h, yajl_option opt, ...) va_start(ap, opt); switch(opt) { + case yajl_allow_json5: + opt |= yajl_allow_comments; /* JSON5 allows comments */ case yajl_allow_comments: case yajl_dont_validate_strings: case yajl_allow_trailing_garbage: @@ -124,7 +126,8 @@ yajl_parse(yajl_handle hand, const unsigned char * jsonText, if (hand->lexer == NULL) { hand->lexer = yajl_lex_alloc(&(hand->alloc), hand->flags & yajl_allow_comments, - !(hand->flags & yajl_dont_validate_strings)); + !(hand->flags & yajl_dont_validate_strings), + hand->flags & yajl_allow_json5); } status = yajl_do_parse(hand, jsonText, jsonTextLen); @@ -144,7 +147,8 @@ yajl_complete_parse(yajl_handle hand) if (hand->lexer == NULL) { hand->lexer = yajl_lex_alloc(&(hand->alloc), hand->flags & yajl_allow_comments, - !(hand->flags & yajl_dont_validate_strings)); + !(hand->flags & yajl_dont_validate_strings), + hand->flags & yajl_allow_json5); } return yajl_do_finish(hand); diff --git a/src/yajl_encode.c b/src/yajl_encode.c index fd082581..c056e596 100644 --- a/src/yajl_encode.c +++ b/src/yajl_encode.c @@ -33,13 +33,22 @@ yajl_string_encode(const yajl_print_t print, void * ctx, const unsigned char * str, size_t len, - int escape_solidus) + int escape_solidus, + int output_json5) { size_t beg = 0; size_t end = 0; char hexBuf[7]; - hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0'; - hexBuf[6] = 0; + char *hexAt; + if (output_json5) { + hexBuf[0] = '\\'; hexBuf[1] = 'x'; + hexBuf[4] = 0; + hexAt = &hexBuf[2]; + } else { + hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0'; + hexBuf[6] = 0; + hexAt = &hexBuf[4]; + } while (end < len) { const char * escaped = NULL; @@ -57,9 +66,20 @@ yajl_string_encode(const yajl_print_t print, case '\f': escaped = "\\f"; break; case '\b': escaped = "\\b"; break; case '\t': escaped = "\\t"; break; + case '\0': + if (output_json5) { + escaped = "\\0"; break; + } + goto ashex; + case '\v': + if (output_json5) { + escaped = "\\v"; break; + } + goto ashex; default: if ((unsigned char) str[end] < 32) { - CharToHex(str[end], hexBuf + 4); + ashex: + CharToHex(str[end], hexAt); escaped = hexBuf; } break; @@ -75,10 +95,10 @@ yajl_string_encode(const yajl_print_t print, print(ctx, (const char *) (str + beg), end - beg); } -static void hexToDigit(unsigned int * val, const unsigned char * hex) +static void hexToDigit(unsigned int * val, unsigned int len, const unsigned char * hex) { unsigned int i; - for (i=0;i<4;i++) { + for (i=0;i= 'A') c = (c & ~0x20) - 7; c -= '0'; @@ -87,7 +107,7 @@ static void hexToDigit(unsigned int * val, const unsigned char * hex) } } -static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf) +static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf) { if (codepoint < 0x80) { utf8Buf[0] = (char) codepoint; @@ -117,7 +137,7 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, size_t len) { size_t beg = 0; - size_t end = 0; + size_t end = 0; while (end < len) { if (str[end] == '\\') { @@ -128,24 +148,22 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, case 'r': unescaped = "\r"; break; case 'n': unescaped = "\n"; break; case '\\': unescaped = "\\"; break; - case '/': unescaped = "/"; break; - case '"': unescaped = "\""; break; case 'f': unescaped = "\f"; break; case 'b': unescaped = "\b"; break; case 't': unescaped = "\t"; break; case 'u': { unsigned int codepoint = 0; - hexToDigit(&codepoint, str + ++end); + hexToDigit(&codepoint, 4, str + ++end); end+=3; /* check if this is a surrogate */ if ((codepoint & 0xFC00) == 0xD800) { end++; if (str[end] == '\\' && str[end + 1] == 'u') { unsigned int surrogate = 0; - hexToDigit(&surrogate, str + end + 2); + hexToDigit(&surrogate, 4, str + end + 2); codepoint = - (((codepoint & 0x3F) << 10) | - ((((codepoint >> 6) & 0xF) + 1) << 16) | + (((codepoint & 0x3F) << 10) | + ((((codepoint >> 6) & 0xF) + 1) << 16) | (surrogate & 0x3FF)); end += 5; } else { @@ -153,7 +171,7 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, break; } } - + Utf32toUtf8(codepoint, utf8Buf); unescaped = utf8Buf; @@ -165,8 +183,33 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, break; } + /* The following escapes are only valid when parsing JSON5. + * The lexer catches them when allowJson5 is not set. + */ + case '\n': beg = ++end; continue; + case '\r': + if (str[++end] == '\n') ++end; + beg = end; + continue; + case '0': + utf8Buf[0] = '\0'; + yajl_buf_append(buf, utf8Buf, 1); + beg = ++end; + continue; + case 'v': unescaped = "\v"; break; + case 'x': { + unsigned int codepoint = 0; + hexToDigit(&codepoint, 2, str + ++end); + end++; + utf8Buf[0] = (char) codepoint; + yajl_buf_append(buf, utf8Buf, 1); + beg = ++end; + continue; + } default: - assert("this should never happen" == NULL); + utf8Buf[0] = str[end]; + utf8Buf[1] = 0; + unescaped = utf8Buf; } yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped)); beg = ++end; @@ -183,13 +226,13 @@ int yajl_string_validate_utf8(const unsigned char * s, size_t len) { if (!len) return 1; if (!s) return 0; - + while (len--) { /* single byte */ if (*s <= 0x7f) { /* noop */ } - /* two byte */ + /* two byte */ else if ((*s >> 5) == 0x6) { ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; @@ -201,7 +244,7 @@ int yajl_string_validate_utf8(const unsigned char * s, size_t len) ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; } - /* four byte */ + /* four byte */ else if ((*s >> 3) == 0x1e) { ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; @@ -212,9 +255,33 @@ int yajl_string_validate_utf8(const unsigned char * s, size_t len) } else { return 0; } - + s++; } - + + return 1; +} + +int yajl_string_validate_identifier(const unsigned char * str, size_t len) +{ + const unsigned char * s = str; + int c; + + if (!len || !str) return 0; + + c = *s++; /* First character [$_A-Za-z] */ + if ((c != '$' && c < 'A') || + (c > 'Z' && c != '_' && c < 'a') || + (c > 'z')) + return 0; + + while (--len) { + c = *s++; /* Remaining characters [$_A-Za-z0-9] */ + if ((c != '$' && c < '0') || + (c > '9' && c < 'A') || + (c > 'Z' && c != '_' && c < 'a') || + (c > 'z')) + return 0; + } return 1; } diff --git a/src/yajl_encode.h b/src/yajl_encode.h index 853a1a70..c1e4a725 100644 --- a/src/yajl_encode.h +++ b/src/yajl_encode.h @@ -24,11 +24,14 @@ void yajl_string_encode(const yajl_print_t printer, void * ctx, const unsigned char * str, size_t length, - int escape_solidus); + int escape_solidus, + int output_json5); void yajl_string_decode(yajl_buf buf, const unsigned char * str, size_t length); int yajl_string_validate_utf8(const unsigned char * s, size_t len); +int yajl_string_validate_identifier(const unsigned char * str, size_t len); + #endif diff --git a/src/yajl_gen.c b/src/yajl_gen.c index 0f5c68e8..0dfd030b 100644 --- a/src/yajl_gen.c +++ b/src/yajl_gen.c @@ -58,6 +58,7 @@ yajl_gen_config(yajl_gen g, yajl_gen_option opt, ...) case yajl_gen_beautify: case yajl_gen_validate_utf8: case yajl_gen_escape_solidus: + case yajl_gen_json5: if (va_arg(ap, int)) g->flags |= opt; else g->flags &= ~opt; break; @@ -141,17 +142,17 @@ yajl_gen_free(yajl_gen g) } #define INSERT_SEP \ - if (g->state[g->depth] == yajl_gen_map_key || \ - g->state[g->depth] == yajl_gen_in_array) { \ - g->print(g->ctx, ",", 1); \ - if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); \ - } else if (g->state[g->depth] == yajl_gen_map_val) { \ - g->print(g->ctx, ":", 1); \ - if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, " ", 1); \ - } - -#define INSERT_WHITESPACE \ - if ((g->flags & yajl_gen_beautify)) { \ + if (g->state[g->depth] == yajl_gen_map_key || \ + g->state[g->depth] == yajl_gen_in_array) { \ + g->print(g->ctx, ",", 1); \ + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); \ + } else if (g->state[g->depth] == yajl_gen_map_val) { \ + g->print(g->ctx, ":", 1); \ + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, " ", 1); \ + } + +#define INSERT_WHITESPACE \ + if ((g->flags & yajl_gen_beautify)) { \ if (g->state[g->depth] != yajl_gen_map_val) { \ unsigned int _i; \ for (_i=0;_idepth;_i++) \ @@ -170,8 +171,8 @@ yajl_gen_free(yajl_gen g) /* check that we're not complete, or in error state. in a valid state * to be generating */ #define ENSURE_VALID_STATE \ - if (g->state[g->depth] == yajl_gen_error) { \ - return yajl_gen_in_error_state;\ + if (g->state[g->depth] == yajl_gen_error) { \ + return yajl_gen_in_error_state; \ } else if (g->state[g->depth] == yajl_gen_complete) { \ return yajl_gen_generation_complete; \ } @@ -201,8 +202,9 @@ yajl_gen_free(yajl_gen g) break; \ } \ -#define FINAL_NEWLINE \ - if ((g->flags & yajl_gen_beautify) && g->state[g->depth] == yajl_gen_complete) \ +#define FINAL_NEWLINE \ + if ((g->flags & yajl_gen_beautify) && \ + g->state[g->depth] == yajl_gen_complete) \ g->print(g->ctx, "\n", 1); yajl_gen_status @@ -227,13 +229,24 @@ yajl_gen_status yajl_gen_double(yajl_gen g, double number) { char i[32]; + int special = 1; ENSURE_VALID_STATE; ENSURE_NOT_KEY; - if (isnan(number) || isinf(number)) return yajl_gen_invalid_number; - INSERT_SEP; INSERT_WHITESPACE; - sprintf(i, "%.20g", number); - if (strspn(i, "0123456789-") == strlen(i)) { - strcat(i, ".0"); + if (isnan(number)) { + strcpy(i, "NaN"); } + else if (isinf(number)) { + sprintf(i, "%cInfinity", number < 0 ? '-' : '+'); + } + else { + special = 0; + sprintf(i, "%.17g", number); + if (strspn(i, "0123456789-") == strlen(i)) { + strcat(i, ".0"); + } + } + if (special && !(g->flags & yajl_gen_json5)) + return yajl_gen_invalid_number; + INSERT_SEP; INSERT_WHITESPACE; g->print(g->ctx, i, (unsigned int)strlen(i)); APPENDED_ATOM; FINAL_NEWLINE; @@ -263,9 +276,19 @@ yajl_gen_string(yajl_gen g, const unsigned char * str, } } ENSURE_VALID_STATE; INSERT_SEP; INSERT_WHITESPACE; - g->print(g->ctx, "\"", 1); - yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus); - g->print(g->ctx, "\"", 1); + if (g->flags & yajl_gen_json5 && + (g->state[g->depth] == yajl_gen_map_key || + g->state[g->depth] == yajl_gen_map_start) && + yajl_string_validate_identifier(str, len)) { + /* No need to quote this key */ + g->print(g->ctx, (const char *) str, len); + } + else { + g->print(g->ctx, "\"", 1); + yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus, + g->flags & yajl_gen_json5); + g->print(g->ctx, "\"", 1); + } APPENDED_ATOM; FINAL_NEWLINE; return yajl_gen_status_ok; diff --git a/src/yajl_lex.c b/src/yajl_lex.c index 0b6f7ccf..0f9cf4aa 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -87,6 +87,9 @@ struct yajl_lexer_t { /* shall we allow comments? */ unsigned int allowComments; + /* are we parsing JSON5? */ + unsigned int allowJson5; + /* shall we validate utf8 inside strings? */ unsigned int validateUTF8; @@ -102,13 +105,15 @@ struct yajl_lexer_t { yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc, - unsigned int allowComments, unsigned int validateUTF8) + unsigned int allowComments, unsigned int validateUTF8, + unsigned int allowJson5) { yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t)); memset((void *) lxr, 0, sizeof(struct yajl_lexer_t)); lxr->buf = yajl_buf_alloc(alloc); lxr->allowComments = allowComments; lxr->validateUTF8 = validateUTF8; + lxr->allowJson5 = allowJson5; lxr->alloc = alloc; return lxr; } @@ -121,19 +126,21 @@ yajl_lex_free(yajl_lexer lxr) return; } -/* a lookup table which lets us quickly determine three things: +/* a lookup table which lets us quickly determine various things: * VEC - valid escaped control char - * note. the solidus '/' may be escaped or not. + * Note: the solidus '/' may be escaped or not. * IJC - invalid json char * VHC - valid hex char * NFP - needs further processing (from a string scanning perspective) * NUC - needs utf8 checking when enabled (from a string scanning perspective) + * VIC - valid identifier char (after the first char) */ #define VEC 0x01 #define IJC 0x02 #define VHC 0x04 #define NFP 0x08 #define NUC 0x10 +#define VIC 0x20 static const char charLookupTable[256] = { @@ -142,20 +149,20 @@ static const char charLookupTable[256] = /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , -/*20*/ 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , 0 , 0 , +/*20*/ 0 , 0 , NFP|VEC, 0 , VIC , 0 , 0 , NFP|VEC, /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC , -/*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC , -/*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 , +/*30*/ VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, +/*38*/ VHC|VIC, VHC|VIC, 0 , 0 , 0 , 0 , 0 , 0 , -/*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 , -/*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , -/*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , -/*58*/ 0 , 0 , 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , +/*40*/ 0 , VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VIC , +/*48*/ VIC , VIC , VIC , VIC , VIC , VIC , VIC , VIC , +/*50*/ VIC , VIC , VIC , VIC , VIC , VIC , VIC , VIC , +/*58*/ VIC , VIC , VIC , 0 , NFP|VEC|IJC, 0 , 0 , VIC , -/*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 , -/*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 , -/*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 , -/*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/*60*/ 0 , VHC|VIC, VEC|VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VEC|VHC|VIC, VIC, +/*68*/ VIC , VIC , VIC , VIC , VIC , VIC , VEC|VIC, VIC , +/*70*/ VIC , VIC , VEC|VIC, VIC , VEC|VIC, VIC , VIC , VIC , +/*78*/ VIC , VIC , VIC , 0 , 0 , 0 , 0 , 0 , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , @@ -189,7 +196,7 @@ static const char charLookupTable[256] = * * NOTE: on error the offset will point to the first char of the * invalid utf8 */ -#define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; } +#define UTF8_CHECK_EOF if (*offset >= jsonTextLen) return yajl_tok_eof; static yajl_tok yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText, @@ -266,7 +273,7 @@ yajl_string_scan(const unsigned char * buf, size_t len, int utf8check) static yajl_tok yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, - size_t jsonTextLen, size_t * offset) + size_t jsonTextLen, size_t * offset, const char quote) { yajl_tok tok = yajl_tok_error; int hasEscapes = 0; @@ -301,7 +308,7 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, curChar = readChar(lexer, jsonText, offset); /* quote terminates */ - if (curChar == '"') { + if (curChar == quote) { tok = yajl_tok_string; break; } @@ -321,16 +328,38 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, if (!(charLookupTable[curChar] & VHC)) { /* back up to offending char */ unreadChar(lexer, offset); - lexer->error = yajl_lex_string_invalid_hex_char; + lexer->error = yajl_lex_string_invalid_hex_u_char; + goto finish_string_lex; + } + } + } + else if (lexer->allowJson5 && curChar == 'x') { + unsigned int i = 0; + + for (i=0;i<2;i++) { + STR_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if (!(charLookupTable[curChar] & VHC)) { + /* back up to offending char */ + unreadChar(lexer, offset); + lexer->error = yajl_lex_string_invalid_hex_x_char; goto finish_string_lex; } } - } else if (!(charLookupTable[curChar] & VEC)) { + } + else if (lexer->allowJson5 ? (curChar >= '1' && curChar <= '9') + : !(charLookupTable[curChar] & VEC)) { /* back up to offending char */ unreadChar(lexer, offset); lexer->error = yajl_lex_string_invalid_escaped_char; goto finish_string_lex; } + else if (lexer->allowJson5 && curChar == '\r') { + STR_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if (curChar != '\n') + unreadChar(lexer, offset); + } } /* when not validating UTF8 it's a simple table lookup to determine * if the present character is invalid */ @@ -367,36 +396,83 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof; +/* For both identifiers and numbers, we always have to lex one + * character too many to know when they are complete. + */ + +static yajl_tok +yajl_lex_identifier(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset) +{ + unsigned char c; + + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } while (charLookupTable[c] & VIC); + + /* we always go "one too far" */ + unreadChar(lexer, offset); + + return yajl_tok_identifier; +} + static yajl_tok yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, size_t jsonTextLen, size_t * offset) { - /** XXX: numbers are the only entities in json that we must lex - * _beyond_ in order to know that they are complete. There - * is an ambiguous case for integers at EOF. */ - + const char hexDigits[] = "0123456789abcdefABCDEF"; unsigned char c; + int numRd = 0; yajl_tok tok = yajl_tok_integer; RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); - /* optional leading minus */ - if (c == '-') { + /* optional leading plus/minus */ + if (c == '-' || (lexer->allowJson5 && c == '+')) { RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); } - /* a single zero, or a series of integers */ + if (c == 'I') { + const char * want = "nfinity"; + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + if (c != *want) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_invalid_string; + return yajl_tok_error; + } + } while (*(++want)); + if (!lexer->allowJson5) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_unallowed_special_number; + return yajl_tok_error; + } + return yajl_tok_double; + } + + /* a single zero, hex number, or a series of decimal digits */ if (c == '0') { + numRd++; RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); + if (c == 'x' || c == 'X') { + if (lexer->allowJson5) goto got_hex; + lexer->error = yajl_lex_unallowed_hex_integer; + return yajl_tok_error; + } } else if (c >= '1' && c <= '9') { do { + numRd++; RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); } while (c >= '0' && c <= '9'); + } else if (lexer->allowJson5 && c == '.') { + goto got_decimal; } else { unreadChar(lexer, offset); lexer->error = yajl_lex_missing_integer_after_minus; @@ -405,10 +481,10 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, /* optional fraction (indicates this is floating point) */ if (c == '.') { - int numRd = 0; - + got_decimal: RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); + if (!lexer->allowJson5) numRd = 0; while (c >= '0' && c <= '9') { numRd++; @@ -448,6 +524,25 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, tok = yajl_tok_double; } + goto end_number; + + got_hex: + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + + if (strchr(hexDigits, c)) { + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } while (strchr(hexDigits, c)); + } + else { + unreadChar(lexer, offset); + lexer->error = yajl_lex_missing_hex_digit_after_0x; + return yajl_tok_error; + } + + end_number: /* we always go "one too far" */ unreadChar(lexer, offset); @@ -495,6 +590,23 @@ yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText, return tok; } +/* Macro to reduce code duplication in yajl_lex_lex() */ +#define LEX_WANT(tring) \ + const char * want = tring; \ + do { \ + if (*offset >= jsonTextLen) { \ + tok = yajl_tok_eof; \ + goto lexed; \ + } \ + c = readChar(lexer, jsonText, offset); \ + if (c != *want) { \ + unreadChar(lexer, offset); \ + lexer->error = yajl_lex_invalid_string; \ + tok = yajl_tok_error; \ + goto lexed; \ + } \ + } while (*(++want)) + yajl_tok yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, size_t jsonTextLen, size_t * offset, @@ -519,16 +631,16 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, switch (c) { case '{': - tok = yajl_tok_left_bracket; + tok = yajl_tok_left_brace; goto lexed; case '}': - tok = yajl_tok_right_bracket; + tok = yajl_tok_right_brace; goto lexed; case '[': - tok = yajl_tok_left_brace; + tok = yajl_tok_left_bracket; goto lexed; case ']': - tok = yajl_tok_right_brace; + tok = yajl_tok_right_bracket; goto lexed; case ',': tok = yajl_tok_comma; @@ -540,71 +652,58 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, startOffset++; break; case 't': { - const char * want = "rue"; - do { - if (*offset >= jsonTextLen) { - tok = yajl_tok_eof; - goto lexed; - } - c = readChar(lexer, jsonText, offset); - if (c != *want) { - unreadChar(lexer, offset); - lexer->error = yajl_lex_invalid_string; - tok = yajl_tok_error; - goto lexed; - } - } while (*(++want)); + LEX_WANT("rue"); tok = yajl_tok_bool; goto lexed; } case 'f': { - const char * want = "alse"; - do { - if (*offset >= jsonTextLen) { - tok = yajl_tok_eof; - goto lexed; - } - c = readChar(lexer, jsonText, offset); - if (c != *want) { - unreadChar(lexer, offset); - lexer->error = yajl_lex_invalid_string; - tok = yajl_tok_error; - goto lexed; - } - } while (*(++want)); + LEX_WANT("alse"); tok = yajl_tok_bool; goto lexed; } case 'n': { - const char * want = "ull"; - do { - if (*offset >= jsonTextLen) { - tok = yajl_tok_eof; - goto lexed; - } - c = readChar(lexer, jsonText, offset); - if (c != *want) { - unreadChar(lexer, offset); - lexer->error = yajl_lex_invalid_string; - tok = yajl_tok_error; - goto lexed; - } - } while (*(++want)); + LEX_WANT("ull"); tok = yajl_tok_null; goto lexed; } + case 'I': { + LEX_WANT("nfinity"); + if (!lexer->allowJson5) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_unallowed_special_number; + tok = yajl_tok_error; + } else { + tok = yajl_tok_double; + } + goto lexed; + } + case 'N': { + LEX_WANT("aN"); + if (!lexer->allowJson5) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_unallowed_special_number; + tok = yajl_tok_error; + } else { + tok = yajl_tok_double; + } + goto lexed; + } + case '\'': + if (!lexer->allowJson5) goto invalid; + /* Fall through... */ case '"': { - tok = yajl_lex_string(lexer, (const unsigned char *) jsonText, - jsonTextLen, offset); + tok = yajl_lex_string(lexer, jsonText, jsonTextLen, offset, c); goto lexed; } + case '+': case '.': + if (!lexer->allowJson5) + goto invalid; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { /* integer parsing wants to start from the beginning */ unreadChar(lexer, offset); - tok = yajl_lex_number(lexer, (const unsigned char *) jsonText, - jsonTextLen, offset); + tok = yajl_lex_number(lexer, jsonText, jsonTextLen, offset); goto lexed; } case '/': @@ -636,6 +735,7 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, /* hit error or eof, bail */ goto lexed; default: + invalid: lexer->error = yajl_lex_invalid_char; tok = yajl_tok_error; goto lexed; @@ -670,6 +770,126 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, *outLen -= 2; } +#ifdef YAJL_LEXER_DEBUG + if (tok == yajl_tok_error) { + printf("lexical error: %s\n", + yajl_lex_error_to_string(yajl_lex_get_error(lexer))); + } else if (tok == yajl_tok_eof) { + printf("EOF hit\n"); + } else { + printf("lexed %s: '", tokToStr(tok)); + fwrite(*outBuf, 1, *outLen, stdout); + printf("'\n"); + } +#endif + + return tok; +} + +yajl_tok yajl_lex_key(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset, + const unsigned char ** outBuf, size_t * outLen) +{ + yajl_tok tok = yajl_tok_error; + unsigned char c; + size_t startOffset = *offset; + + *outBuf = NULL; + *outLen = 0; + + for (;;) { + assert(*offset <= jsonTextLen); + + if (*offset >= jsonTextLen) { + tok = yajl_tok_eof; + goto lexed; + } + + c = readChar(lexer, jsonText, offset); + + switch (c) { + case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': + startOffset++; + break; + case '}': + tok = yajl_tok_right_brace; + goto lexed; + case '\'': + if (!lexer->allowJson5) goto invalid; + /* Fall through... */ + case '"': { + tok = yajl_lex_string(lexer, jsonText, jsonTextLen, offset, c); + goto lexed; + } + case '/': + /* If comments are disabled this is an error. */ + if (!lexer->allowComments) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_unallowed_comment; + tok = yajl_tok_error; + goto lexed; + } + /* Comments are enabled, so lex it. + * Possible outcomes are: + * - successful lex (tok_comment, which means continue), + * - malformed comment opening (slash not followed by + * '*' or '/') (tok_error) + * - eof hit. (tok_eof) */ + tok = yajl_lex_comment(lexer, jsonText, jsonTextLen, offset); + if (tok == yajl_tok_comment) { + /* "error" is silly, but that's the initial + * state of tok. guilty until proven innocent. */ + tok = yajl_tok_error; + yajl_buf_clear(lexer->buf); + lexer->bufInUse = 0; + startOffset = *offset; + break; + } + /* hit error or eof, bail */ + goto lexed; + default: + if (lexer->allowJson5 && (c == '$' || c == '_' || + (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { + tok = yajl_lex_identifier(lexer, jsonText, jsonTextLen, offset); + } + else { + invalid: + lexer->error = yajl_lex_invalid_char; + tok = yajl_tok_error; + } + goto lexed; + } + } + + lexed: + /* need to append to buffer if the buffer is in use or + * if it's an EOF token */ + if (tok == yajl_tok_eof || lexer->bufInUse) { + if (!lexer->bufInUse) yajl_buf_clear(lexer->buf); + lexer->bufInUse = 1; + yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset); + lexer->bufOff = 0; + + if (tok != yajl_tok_eof) { + *outBuf = yajl_buf_data(lexer->buf); + *outLen = yajl_buf_len(lexer->buf); + lexer->bufInUse = 0; + } + } else if (tok != yajl_tok_error) { + *outBuf = jsonText + startOffset; + *outLen = *offset - startOffset; + } + + /* For strings skip the quotes. */ + if (tok == yajl_tok_string || + tok == yajl_tok_string_with_escapes) { + assert(*outLen >= 2); + (*outBuf)++; + *outLen -= 2; + } + else if (tok == yajl_tok_identifier) { + tok = yajl_tok_string; + } #ifdef YAJL_LEXER_DEBUG if (tok == yajl_tok_error) { @@ -700,9 +920,12 @@ yajl_lex_error_to_string(yajl_lex_error error) "which it may not."; case yajl_lex_string_invalid_json_char: return "invalid character inside string."; - case yajl_lex_string_invalid_hex_char: + case yajl_lex_string_invalid_hex_u_char: return "invalid (non-hex) character occurs after '\\u' inside " "string."; + case yajl_lex_string_invalid_hex_x_char: + return "invalid (non-hex) character occurs after '\\x' inside " + "string."; case yajl_lex_invalid_char: return "invalid char in json text."; case yajl_lex_invalid_string: @@ -714,10 +937,16 @@ yajl_lex_error_to_string(yajl_lex_error error) "decimal point."; case yajl_lex_missing_integer_after_minus: return "malformed number, a digit is required after the " - "minus sign."; + "plus/minus sign."; case yajl_lex_unallowed_comment: return "probable comment found in input text, comments are " "not enabled."; + case yajl_lex_missing_hex_digit_after_0x: + return "malformed number, a hex digit is required after the 0x/0X."; + case yajl_lex_unallowed_hex_integer: + return "probable hex number found, JSON5 is not enabled."; + case yajl_lex_unallowed_special_number: + return "special number Infinity or NaN found, JSON5 is not enabled."; } return "unknown error code"; } diff --git a/src/yajl_lex.h b/src/yajl_lex.h index fd17c001..a8c5400e 100644 --- a/src/yajl_lex.h +++ b/src/yajl_lex.h @@ -41,7 +41,12 @@ typedef enum { yajl_tok_string, yajl_tok_string_with_escapes, - /* comment tokens are not currently returned to the parser, ever */ + /* These tokens are used within the lexer and never seen by the parser: */ + + /* An unquoted map key, for JSON5 only, returned as yajl_tok_string */ + yajl_tok_identifier, + + /* A comment token, never returned */ yajl_tok_comment } yajl_tok; @@ -49,7 +54,8 @@ typedef struct yajl_lexer_t * yajl_lexer; yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc, unsigned int allowComments, - unsigned int validateUTF8); + unsigned int validateUTF8, + unsigned int allowJson5); void yajl_lex_free(yajl_lexer lexer); @@ -79,6 +85,14 @@ yajl_tok yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, size_t jsonTextLen, size_t * offset, const unsigned char ** outBuf, size_t * outLen); +/** + * A specialized version of yajl_lex_lex for use when the next token is + * a map key, which the parser knows. + */ +yajl_tok yajl_lex_key(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset, + const unsigned char ** outBuf, size_t * outLen); + /** have a peek at the next token, but don't move the lexer forward */ yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText, size_t jsonTextLen, size_t offset); @@ -89,13 +103,17 @@ typedef enum { yajl_lex_string_invalid_utf8, yajl_lex_string_invalid_escaped_char, yajl_lex_string_invalid_json_char, - yajl_lex_string_invalid_hex_char, + yajl_lex_string_invalid_hex_u_char, + yajl_lex_string_invalid_hex_x_char, yajl_lex_invalid_char, yajl_lex_invalid_string, yajl_lex_missing_integer_after_decimal, yajl_lex_missing_integer_after_exponent, yajl_lex_missing_integer_after_minus, - yajl_lex_unallowed_comment + yajl_lex_unallowed_comment, + yajl_lex_missing_hex_digit_after_0x, + yajl_lex_unallowed_hex_integer, + yajl_lex_unallowed_special_number, } yajl_lex_error; const char * yajl_lex_error_to_string(yajl_lex_error error); diff --git a/src/yajl_parser.c b/src/yajl_parser.c index 1a528a64..966f4caa 100644 --- a/src/yajl_parser.c +++ b/src/yajl_parser.c @@ -29,33 +29,52 @@ #include #include -#define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10)) - - /* same semantics as strtol */ long long yajl_parse_integer(const unsigned char *number, unsigned int length) { long long ret = 0; long sign = 1; + long base = 10; + long long max = LLONG_MAX / base; const unsigned char *pos = number; - if (*pos == '-') { pos++; sign = -1; } - if (*pos == '+') { pos++; } + const unsigned char *end = number + length; - while (pos < number + length) { - if ( ret > MAX_VALUE_TO_MULTIPLY ) { - errno = ERANGE; - return sign == 1 ? LLONG_MAX : LLONG_MIN; - } - ret *= 10; - if (LLONG_MAX - ret < (*pos - '0')) { + if (*pos == '-') { + pos++; + sign = -1; + } + else if (*pos == '+') { + pos++; + } + + if (*pos == '0' && + (pos[1] == 'x' || pos[1] == 'X')) { + base = 16; + max = LLONG_MAX / base; + pos += 2; + } + + while (pos < end) { + int digit; + + if (ret > max) { errno = ERANGE; return sign == 1 ? LLONG_MAX : LLONG_MIN; } - if (*pos < '0' || *pos > '9') { + + ret *= base; + digit = *pos++ - '0'; + /* Don't have to check for non-digit characters, + * the lexer has already rejected any bad digits. + */ + if (digit > 9) + digit = (digit - ('A' - '0') + 10) & 0xf; + + if (LLONG_MAX - ret < digit) { errno = ERANGE; return sign == 1 ? LLONG_MAX : LLONG_MIN; } - ret += (*pos++ - '0'); + ret += digit; } return sign * ret; @@ -264,13 +283,13 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, _CC_CHK(hand->callbacks->yajl_null(hand->ctx)); } break; - case yajl_tok_left_bracket: + case yajl_tok_left_brace: if (hand->callbacks && hand->callbacks->yajl_start_map) { _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx)); } stateToPush = yajl_state_map_start; break; - case yajl_tok_left_brace: + case yajl_tok_left_bracket: if (hand->callbacks && hand->callbacks->yajl_start_array) { _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx)); } @@ -330,9 +349,11 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, } } break; - case yajl_tok_right_brace: { - if (yajl_bs_current(hand->stateStack) == - yajl_state_array_start) + case yajl_tok_right_bracket: { + yajl_state s = yajl_bs_current(hand->stateStack); + if (s == yajl_state_array_start || + ((hand->flags & yajl_allow_json5) && + (s == yajl_state_array_need_val))) { if (hand->callbacks && hand->callbacks->yajl_end_array) @@ -346,7 +367,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, } case yajl_tok_colon: case yajl_tok_comma: - case yajl_tok_right_bracket: + case yajl_tok_right_brace: yajl_bs_set(hand->stateStack, yajl_state_parse_error); hand->parseError = "unallowed token at this point in JSON text"; @@ -377,8 +398,8 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, case yajl_state_map_need_key: { /* only difference between these two states is that in * start '}' is valid, whereas in need_key, we've parsed - * a comma, and a string key _must_ follow */ - tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, + * a comma, so unless this is JSON5 a key _must_ follow. */ + tok = yajl_lex_key(hand->lexer, jsonText, jsonTextLen, offset, &buf, &bufLen); switch (tok) { case yajl_tok_eof: @@ -401,9 +422,11 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, } yajl_bs_set(hand->stateStack, yajl_state_map_sep); goto around_again; - case yajl_tok_right_bracket: - if (yajl_bs_current(hand->stateStack) == - yajl_state_map_start) + case yajl_tok_right_brace: { + yajl_state s = yajl_bs_current(hand->stateStack); + if (s == yajl_state_map_start || + ((hand->flags & yajl_allow_json5) && + (s == yajl_state_map_need_key))) { if (hand->callbacks && hand->callbacks->yajl_end_map) { _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx)); @@ -411,10 +434,12 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, yajl_bs_pop(hand->stateStack); goto around_again; } + } default: yajl_bs_set(hand->stateStack, yajl_state_parse_error); - hand->parseError = - "invalid object key (must be a string)"; + hand->parseError = hand->flags & yajl_allow_json5 ? + "invalid object key (must be a string or identifier)" : + "invalid object key (must be a string)"; goto around_again; } } @@ -441,7 +466,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, offset, &buf, &bufLen); switch (tok) { - case yajl_tok_right_bracket: + case yajl_tok_right_brace: if (hand->callbacks && hand->callbacks->yajl_end_map) { _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx)); } @@ -469,7 +494,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, offset, &buf, &bufLen); switch (tok) { - case yajl_tok_right_brace: + case yajl_tok_right_bracket: if (hand->callbacks && hand->callbacks->yajl_end_array) { _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx)); } @@ -495,4 +520,3 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, abort(); return yajl_status_error; } - diff --git a/src/yajl_tree.c b/src/yajl_tree.c index 3d357a32..c7fcbc85 100644 --- a/src/yajl_tree.c +++ b/src/yajl_tree.c @@ -430,7 +430,7 @@ yajl_val yajl_tree_parse (const char *input, memset (error_buffer, 0, error_buffer_size); handle = yajl_alloc (&callbacks, NULL, &ctx); - yajl_config(handle, yajl_allow_comments, 1); + yajl_config(handle, yajl_allow_json5, 1); status = yajl_parse(handle, (unsigned char *) input, diff --git a/test/parsing/cases/a5_codepoints_from_hex.json b/test/parsing/cases/a5_codepoints_from_hex.json new file mode 100644 index 00000000..d0d88d58 --- /dev/null +++ b/test/parsing/cases/a5_codepoints_from_hex.json @@ -0,0 +1 @@ +"\x0a\x07\x21\x40\x7c" diff --git a/test/parsing/cases/a5_codepoints_from_hex.json.gold b/test/parsing/cases/a5_codepoints_from_hex.json.gold new file mode 100644 index 00000000..5e3f00aa --- /dev/null +++ b/test/parsing/cases/a5_codepoints_from_hex.json.gold @@ -0,0 +1,3 @@ +string: ' +!@|' +memory leaks: 0 diff --git a/test/parsing/cases/a5_doubles.json b/test/parsing/cases/a5_doubles.json new file mode 100644 index 00000000..342273f0 --- /dev/null +++ b/test/parsing/cases/a5_doubles.json @@ -0,0 +1 @@ +[ .1e2, 10., +3.141569, -.1e4, NaN, Infinity, +Infinity, -Infinity ] diff --git a/test/parsing/cases/a5_doubles.json.gold b/test/parsing/cases/a5_doubles.json.gold new file mode 100644 index 00000000..8ecafd25 --- /dev/null +++ b/test/parsing/cases/a5_doubles.json.gold @@ -0,0 +1,11 @@ +array open '[' +double: 10 +double: 10 +double: 3.14157 +double: -1000 +double: NaN +double: Infinity +double: Infinity +double: -Infinity +array close ']' +memory leaks: 0 diff --git a/test/parsing/cases/a5_integers.json b/test/parsing/cases/a5_integers.json new file mode 100644 index 00000000..3bc84562 --- /dev/null +++ b/test/parsing/cases/a5_integers.json @@ -0,0 +1,10 @@ +[ +1,+2,+3,+4,+5,+6,+7,+8,+9, + 0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9, + 0xa,0xb,0xc,0xd,0xe,0xf, + 0xA,0xB,0xC,0xD,0xE,0xF, + +0xfedcba98, -0x6789ABCD, + +123456789 , -123456789, + +2147483647, -2147483648, + 0x7fffFFFFffffFFFF, -0x7FFFffffFFFFffff, + 9223372036854775807, -9223372036854775807 +] diff --git a/test/parsing/cases/a5_integers.json.gold b/test/parsing/cases/a5_integers.json.gold new file mode 100644 index 00000000..98a29973 --- /dev/null +++ b/test/parsing/cases/a5_integers.json.gold @@ -0,0 +1,43 @@ +array open '[' +integer: 1 +integer: 2 +integer: 3 +integer: 4 +integer: 5 +integer: 6 +integer: 7 +integer: 8 +integer: 9 +integer: 1 +integer: 2 +integer: 3 +integer: 4 +integer: 5 +integer: 6 +integer: 7 +integer: 8 +integer: 9 +integer: 10 +integer: 11 +integer: 12 +integer: 13 +integer: 14 +integer: 15 +integer: 10 +integer: 11 +integer: 12 +integer: 13 +integer: 14 +integer: 15 +integer: 4275878552 +integer: -1737075661 +integer: 123456789 +integer: -123456789 +integer: 2147483647 +integer: -2147483648 +integer: 9223372036854775807 +integer: -9223372036854775807 +integer: 9223372036854775807 +integer: -9223372036854775807 +array close ']' +memory leaks: 0 diff --git a/test/parsing/cases/a5_invalid_hex_char.json b/test/parsing/cases/a5_invalid_hex_char.json new file mode 100644 index 00000000..056beb5a --- /dev/null +++ b/test/parsing/cases/a5_invalid_hex_char.json @@ -0,0 +1 @@ +"yabba dabba do \x1g !!" diff --git a/test/parsing/cases/a5_invalid_hex_char.json.gold b/test/parsing/cases/a5_invalid_hex_char.json.gold new file mode 100644 index 00000000..848a31ad --- /dev/null +++ b/test/parsing/cases/a5_invalid_hex_char.json.gold @@ -0,0 +1,2 @@ +lexical error: invalid (non-hex) character occurs after '\x' inside string. +memory leaks: 0 diff --git a/test/parsing/cases/a5_map_identifiers.json b/test/parsing/cases/a5_map_identifiers.json new file mode 100644 index 00000000..a7b5744d --- /dev/null +++ b/test/parsing/cases/a5_map_identifiers.json @@ -0,0 +1,11 @@ +{ + $:1, + _:2, + A:3, + Z:4, + a:5, + z:6, + $1:7, + _zz:8, + ZZ9$Zalpha:9 +} diff --git a/test/parsing/cases/a5_map_identifiers.json.gold b/test/parsing/cases/a5_map_identifiers.json.gold new file mode 100644 index 00000000..c829c91c --- /dev/null +++ b/test/parsing/cases/a5_map_identifiers.json.gold @@ -0,0 +1,21 @@ +map open '{' +key: '$' +integer: 1 +key: '_' +integer: 2 +key: 'A' +integer: 3 +key: 'Z' +integer: 4 +key: 'a' +integer: 5 +key: 'z' +integer: 6 +key: '$1' +integer: 7 +key: '_zz' +integer: 8 +key: 'ZZ9$Zalpha' +integer: 9 +map close '}' +memory leaks: 0 diff --git a/test/parsing/cases/a5_simple_with_comments.json b/test/parsing/cases/a5_simple_with_comments.json new file mode 100644 index 00000000..3b79bba9 --- /dev/null +++ b/test/parsing/cases/a5_simple_with_comments.json @@ -0,0 +1,11 @@ +{ + "this": "is", // ignore this + "really": "simple", + /* ignore +this +too * / +** // +(/ +******/ + "json": "right?" +} diff --git a/test/parsing/cases/a5_simple_with_comments.json.gold b/test/parsing/cases/a5_simple_with_comments.json.gold new file mode 100644 index 00000000..80fcad2f --- /dev/null +++ b/test/parsing/cases/a5_simple_with_comments.json.gold @@ -0,0 +1,9 @@ +map open '{' +key: 'this' +string: 'is' +key: 'really' +string: 'simple' +key: 'json' +string: 'right?' +map close '}' +memory leaks: 0 diff --git a/test/parsing/cases/a5_spec_example.json b/test/parsing/cases/a5_spec_example.json new file mode 100644 index 00000000..0e72c646 --- /dev/null +++ b/test/parsing/cases/a5_spec_example.json @@ -0,0 +1,12 @@ +{ + // comments + unquoted: 'and you can quote me on that', + singleQuotes: 'I can use "double quotes" here', + lineBreaks: "Look, Mom! \ +No \\n's!", + hexadecimal: 0xdecaf, + leadingDecimalPoint: .8675309, andTrailing: 8675309., + positiveSign: +1, + trailingComma: 'in objects', andIn: ['arrays',], + "backwardsCompatible": "with JSON", +} diff --git a/test/parsing/cases/a5_spec_example.json.gold b/test/parsing/cases/a5_spec_example.json.gold new file mode 100644 index 00000000..bb6b713d --- /dev/null +++ b/test/parsing/cases/a5_spec_example.json.gold @@ -0,0 +1,25 @@ +map open '{' +key: 'unquoted' +string: 'and you can quote me on that' +key: 'singleQuotes' +string: 'I can use "double quotes" here' +key: 'lineBreaks' +string: 'Look, Mom! No \n's!' +key: 'hexadecimal' +integer: 912559 +key: 'leadingDecimalPoint' +double: 0.867531 +key: 'andTrailing' +double: 8.67531e+06 +key: 'positiveSign' +integer: 1 +key: 'trailingComma' +string: 'in objects' +key: 'andIn' +array open '[' +string: 'arrays' +array close ']' +key: 'backwardsCompatible' +string: 'with JSON' +map close '}' +memory leaks: 0 diff --git a/test/parsing/cases/a5_strings.json b/test/parsing/cases/a5_strings.json new file mode 100644 index 00000000..97dacc38 --- /dev/null +++ b/test/parsing/cases/a5_strings.json @@ -0,0 +1,11 @@ +[ + 'Hello\!', + "\"Evenin\',\" said the barman.", + // The following string has 3 different escaped line-endings, + // LF, CR, and CR+LF, which all disappear from the final string. + "Well \ +hi \ there \ +y'all!", + "\b\f\n\r\t\v\\", + '\A\C\/\D\C', +] diff --git a/test/parsing/cases/a5_strings.json.gold b/test/parsing/cases/a5_strings.json.gold new file mode 100644 index 00000000..7df60a86 --- /dev/null +++ b/test/parsing/cases/a5_strings.json.gold @@ -0,0 +1,9 @@ +array open '[' +string: 'Hello!' +string: '"Evenin'," said the barman.' +string: 'Well hi there y'all!' +string: ' + \' +string: 'AC/DC' +array close ']' +memory leaks: 0 diff --git a/test/parsing/cases/a5_trailing_commas.json b/test/parsing/cases/a5_trailing_commas.json new file mode 100644 index 00000000..f246ce43 --- /dev/null +++ b/test/parsing/cases/a5_trailing_commas.json @@ -0,0 +1 @@ +{"array":[1,2,],"map":{"a":1,},} diff --git a/test/parsing/cases/a5_trailing_commas.json.gold b/test/parsing/cases/a5_trailing_commas.json.gold new file mode 100644 index 00000000..a26d42e9 --- /dev/null +++ b/test/parsing/cases/a5_trailing_commas.json.gold @@ -0,0 +1,13 @@ +map open '{' +key: 'array' +array open '[' +integer: 1 +integer: 2 +array close ']' +key: 'map' +map open '{' +key: 'a' +integer: 1 +map close '}' +map close '}' +memory leaks: 0 diff --git a/test/parsing/cases/hex.json b/test/parsing/cases/hex.json new file mode 100644 index 00000000..dc610764 --- /dev/null +++ b/test/parsing/cases/hex.json @@ -0,0 +1 @@ +0x1 diff --git a/test/parsing/cases/hex.json.gold b/test/parsing/cases/hex.json.gold new file mode 100644 index 00000000..38667f07 --- /dev/null +++ b/test/parsing/cases/hex.json.gold @@ -0,0 +1,2 @@ +lexical error: probable hex number found, JSON5 is not enabled. +memory leaks: 0 diff --git a/test/parsing/cases/infinity.json b/test/parsing/cases/infinity.json new file mode 100644 index 00000000..3c62151d --- /dev/null +++ b/test/parsing/cases/infinity.json @@ -0,0 +1 @@ +Infinity diff --git a/test/parsing/cases/infinity.json.gold b/test/parsing/cases/infinity.json.gold new file mode 100644 index 00000000..3a65d995 --- /dev/null +++ b/test/parsing/cases/infinity.json.gold @@ -0,0 +1,2 @@ +lexical error: special number Infinity or NaN found, JSON5 is not enabled. +memory leaks: 0 diff --git a/test/parsing/cases/lonely_minus_sign.json.gold b/test/parsing/cases/lonely_minus_sign.json.gold index d15ede9b..f6789796 100644 --- a/test/parsing/cases/lonely_minus_sign.json.gold +++ b/test/parsing/cases/lonely_minus_sign.json.gold @@ -5,5 +5,5 @@ bool: true string: 'blue' string: 'baby where are you?' string: 'oh boo hoo!' -lexical error: malformed number, a digit is required after the minus sign. +lexical error: malformed number, a digit is required after the plus/minus sign. memory leaks: 0 diff --git a/test/parsing/cases/minus_infinity.json b/test/parsing/cases/minus_infinity.json new file mode 100644 index 00000000..879e80ee --- /dev/null +++ b/test/parsing/cases/minus_infinity.json @@ -0,0 +1 @@ +-Infinity diff --git a/test/parsing/cases/minus_infinity.json.gold b/test/parsing/cases/minus_infinity.json.gold new file mode 100644 index 00000000..3a65d995 --- /dev/null +++ b/test/parsing/cases/minus_infinity.json.gold @@ -0,0 +1,2 @@ +lexical error: special number Infinity or NaN found, JSON5 is not enabled. +memory leaks: 0 diff --git a/test/parsing/cases/nan.json b/test/parsing/cases/nan.json new file mode 100644 index 00000000..736991a1 --- /dev/null +++ b/test/parsing/cases/nan.json @@ -0,0 +1 @@ +NaN diff --git a/test/parsing/cases/nan.json.gold b/test/parsing/cases/nan.json.gold new file mode 100644 index 00000000..3a65d995 --- /dev/null +++ b/test/parsing/cases/nan.json.gold @@ -0,0 +1,2 @@ +lexical error: special number Infinity or NaN found, JSON5 is not enabled. +memory leaks: 0 diff --git a/test/parsing/run_tests.sh b/test/parsing/run_tests.sh index b37e4dd5..2a1e0915 100755 --- a/test/parsing/run_tests.sh +++ b/test/parsing/run_tests.sh @@ -37,13 +37,16 @@ testsSucceeded=0 testsTotal=0 for file in cases/*.json ; do + allowJson5="" allowComments="" allowGarbage="" allowMultiple="" allowPartials="" - # if the filename starts with dc_, we disallow comments for this test case $(basename $file) in + a5_*) + allowJson5="-5 " + ;; ac_*) allowComments="-c " ;; @@ -64,10 +67,10 @@ for file in cases/*.json ; do iter=1 success="SUCCESS" - # ${ECHO} -n "$testBinShort $allowPartials$allowComments$allowGarbage$allowMultiple-b $iter < $fileShort > ${fileShort}.test : " + # ${ECHO} -n "$testBinShort $allowPartials$allowJson5$allowComments$allowGarbage$allowMultiple-b $iter < $fileShort > ${fileShort}.test : " # parse with a read buffer size ranging from 1-31 to stress stream parsing while [ $iter -lt 32 ] && [ $success = "SUCCESS" ] ; do - $testBin $allowPartials $allowComments $allowGarbage $allowMultiple -b $iter < $file > ${file}.test 2>&1 + $testBin $allowPartials $allowJson5 $allowComments $allowGarbage $allowMultiple -b $iter < $file > ${file}.test 2>&1 diff ${DIFF_FLAGS} ${file}.gold ${file}.test > ${file}.out if [ $? -eq 0 ] ; then if [ $iter -eq 31 ] ; then testsSucceeded=$(( $testsSucceeded + 1 )) ; fi diff --git a/test/parsing/yajl_test.c b/test/parsing/yajl_test.c index c50755bc..af47d6c4 100644 --- a/test/parsing/yajl_test.c +++ b/test/parsing/yajl_test.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -85,7 +86,18 @@ static int test_yajl_integer(void *ctx, long long integerVal) static int test_yajl_double(void *ctx, double doubleVal) { - printf("double: %g\n", doubleVal); + if (doubleVal != doubleVal) { + printf("double: NaN\n"); + } + else if (doubleVal == HUGE_VAL) { + printf("double: Infinity\n"); + } + else if (doubleVal == -HUGE_VAL) { + printf("double: -Infinity\n"); + } + else { + printf("double: %g\n", doubleVal); + } return 1; } @@ -154,6 +166,7 @@ static void usage(const char * progname) "usage: %s [options]\n" "Parse input from stdin as JSON and ouput parsing details " "to stdout\n" + " -5 allow JSON5\n" " -b set the read buffer size\n" " -c allow comments\n" " -g allow *g*arbage after valid JSON text\n" @@ -196,7 +209,9 @@ main(int argc, char ** argv) /* check arguments. We expect exactly one! */ for (i=1;i= argc) usage(argv[0]); diff --git a/verify/json_verify.c b/verify/json_verify.c index 01849e03..478c0be3 100644 --- a/verify/json_verify.c +++ b/verify/json_verify.c @@ -25,6 +25,7 @@ usage(const char * progname) { fprintf(stderr, "%s: validate json from stdin\n" "usage: json_verify [options]\n" + " -5 allow JSON5\n" " -c allow comments\n" " -q quiet mode\n" " -s verify a stream of multiple json entities\n" @@ -52,6 +53,9 @@ main(int argc, char ** argv) unsigned int i; for ( i=1; i < strlen(argv[a]); i++) { switch (argv[a][i]) { + case '5': + yajl_config(hand, yajl_allow_json5, 1); + break; case 'q': quiet = 1; break;