From 3c50a2c3a76a7f61eef9f47de98c3032e7681bd4 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Mon, 13 Aug 2018 00:53:34 -0500 Subject: [PATCH 01/18] Start of JSON5 support Added yajl_allow_json5 config flag, pass it around. Added -5 option to yajl_test, json_reformat and json_verify. --- reformatter/json_reformat.c | 4 ++++ src/api/yajl_parse.h | 8 +++++++- src/yajl.c | 7 +++++-- src/yajl_lex.c | 7 ++++++- src/yajl_lex.h | 3 ++- src/yajl_parser.c | 3 +-- test/parsing/run_tests.sh | 9 ++++++--- test/parsing/yajl_test.c | 5 ++++- verify/json_verify.c | 4 ++++ 9 files changed, 39 insertions(+), 11 deletions(-) diff --git a/reformatter/json_reformat.c b/reformatter/json_reformat.c index 2ed4f398..7ec028fe 100644 --- a/reformatter/json_reformat.c +++ b/reformatter/json_reformat.c @@ -109,6 +109,7 @@ usage(const char * progname) { fprintf(stderr, "%s: reformat json from stdin\n" "usage: json_reformat [options]\n" + " -5 allow JSON5 input\n" " -e escape any forward slashes (for embedding in HTML)\n" " -m minimize json rather than beautify (default)\n" " -s reformat a stream of multiple json entites\n" @@ -143,6 +144,9 @@ main(int argc, char ** argv) unsigned int i; for ( i=1; i < strlen(argv[a]); i++) { switch (argv[a][i]) { + case '5': + yajl_config(hand, yajl_allow_json5, 1); + break; case 'm': yajl_gen_config(g, yajl_gen_beautify, 0); break; diff --git a/src/api/yajl_parse.h b/src/api/yajl_parse.h index 1c25a60d..3d6ffaf4 100644 --- a/src/api/yajl_parse.h +++ b/src/api/yajl_parse.h @@ -156,7 +156,13 @@ extern "C" { * yajl will enter an error state (premature EOF). Setting this * flag suppresses that check and the corresponding error. */ - yajl_allow_partial_values = 0x10 + yajl_allow_partial_values = 0x10, + /** + * The JSON5 standard allows additional formats for numbers, strings + * and object keys which are not permitted in the JSON standard. + * Setting this flag enables JSON5 formats in the lexer and parser. + */ + yajl_allow_json5 = 0x20, } yajl_option; /** allow the modification of parser options subsequent to handle diff --git a/src/yajl.c b/src/yajl.c index d477893f..f1ae2cb9 100644 --- a/src/yajl.c +++ b/src/yajl.c @@ -91,6 +91,7 @@ yajl_config(yajl_handle h, yajl_option opt, ...) case yajl_allow_trailing_garbage: case yajl_allow_multiple_values: case yajl_allow_partial_values: + case yajl_allow_json5: if (va_arg(ap, int)) h->flags |= opt; else h->flags &= ~opt; break; @@ -124,7 +125,8 @@ yajl_parse(yajl_handle hand, const unsigned char * jsonText, if (hand->lexer == NULL) { hand->lexer = yajl_lex_alloc(&(hand->alloc), hand->flags & yajl_allow_comments, - !(hand->flags & yajl_dont_validate_strings)); + !(hand->flags & yajl_dont_validate_strings), + hand->flags & yajl_allow_json5); } status = yajl_do_parse(hand, jsonText, jsonTextLen); @@ -144,7 +146,8 @@ yajl_complete_parse(yajl_handle hand) if (hand->lexer == NULL) { hand->lexer = yajl_lex_alloc(&(hand->alloc), hand->flags & yajl_allow_comments, - !(hand->flags & yajl_dont_validate_strings)); + !(hand->flags & yajl_dont_validate_strings), + hand->flags & yajl_allow_json5); } return yajl_do_finish(hand); diff --git a/src/yajl_lex.c b/src/yajl_lex.c index 0b6f7ccf..56253c7d 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -87,6 +87,9 @@ struct yajl_lexer_t { /* shall we allow comments? */ unsigned int allowComments; + /* are we parsing JSON5? */ + unsigned int allowJson5; + /* shall we validate utf8 inside strings? */ unsigned int validateUTF8; @@ -102,13 +105,15 @@ struct yajl_lexer_t { yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc, - unsigned int allowComments, unsigned int validateUTF8) + unsigned int allowComments, unsigned int validateUTF8, + unsigned int allowJson5) { yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t)); memset((void *) lxr, 0, sizeof(struct yajl_lexer_t)); lxr->buf = yajl_buf_alloc(alloc); lxr->allowComments = allowComments; lxr->validateUTF8 = validateUTF8; + lxr->allowJson5 = allowJson5; lxr->alloc = alloc; return lxr; } diff --git a/src/yajl_lex.h b/src/yajl_lex.h index fd17c001..5df858f1 100644 --- a/src/yajl_lex.h +++ b/src/yajl_lex.h @@ -49,7 +49,8 @@ typedef struct yajl_lexer_t * yajl_lexer; yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc, unsigned int allowComments, - unsigned int validateUTF8); + unsigned int validateUTF8, + unsigned int allowJson5); void yajl_lex_free(yajl_lexer lexer); diff --git a/src/yajl_parser.c b/src/yajl_parser.c index 1a528a64..20fff704 100644 --- a/src/yajl_parser.c +++ b/src/yajl_parser.c @@ -414,7 +414,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, default: yajl_bs_set(hand->stateStack, yajl_state_parse_error); hand->parseError = - "invalid object key (must be a string)"; + "invalid object key (must be a string)"; goto around_again; } } @@ -495,4 +495,3 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, abort(); return yajl_status_error; } - diff --git a/test/parsing/run_tests.sh b/test/parsing/run_tests.sh index b37e4dd5..2a1e0915 100755 --- a/test/parsing/run_tests.sh +++ b/test/parsing/run_tests.sh @@ -37,13 +37,16 @@ testsSucceeded=0 testsTotal=0 for file in cases/*.json ; do + allowJson5="" allowComments="" allowGarbage="" allowMultiple="" allowPartials="" - # if the filename starts with dc_, we disallow comments for this test case $(basename $file) in + a5_*) + allowJson5="-5 " + ;; ac_*) allowComments="-c " ;; @@ -64,10 +67,10 @@ for file in cases/*.json ; do iter=1 success="SUCCESS" - # ${ECHO} -n "$testBinShort $allowPartials$allowComments$allowGarbage$allowMultiple-b $iter < $fileShort > ${fileShort}.test : " + # ${ECHO} -n "$testBinShort $allowPartials$allowJson5$allowComments$allowGarbage$allowMultiple-b $iter < $fileShort > ${fileShort}.test : " # parse with a read buffer size ranging from 1-31 to stress stream parsing while [ $iter -lt 32 ] && [ $success = "SUCCESS" ] ; do - $testBin $allowPartials $allowComments $allowGarbage $allowMultiple -b $iter < $file > ${file}.test 2>&1 + $testBin $allowPartials $allowJson5 $allowComments $allowGarbage $allowMultiple -b $iter < $file > ${file}.test 2>&1 diff ${DIFF_FLAGS} ${file}.gold ${file}.test > ${file}.out if [ $? -eq 0 ] ; then if [ $iter -eq 31 ] ; then testsSucceeded=$(( $testsSucceeded + 1 )) ; fi diff --git a/test/parsing/yajl_test.c b/test/parsing/yajl_test.c index c50755bc..8affe8e5 100644 --- a/test/parsing/yajl_test.c +++ b/test/parsing/yajl_test.c @@ -154,6 +154,7 @@ static void usage(const char * progname) "usage: %s [options]\n" "Parse input from stdin as JSON and ouput parsing details " "to stdout\n" + " -5 allow JSON5\n" " -b set the read buffer size\n" " -c allow comments\n" " -g allow *g*arbage after valid JSON text\n" @@ -196,7 +197,9 @@ main(int argc, char ** argv) /* check arguments. We expect exactly one! */ for (i=1;i= argc) usage(argv[0]); diff --git a/verify/json_verify.c b/verify/json_verify.c index 01849e03..478c0be3 100644 --- a/verify/json_verify.c +++ b/verify/json_verify.c @@ -25,6 +25,7 @@ usage(const char * progname) { fprintf(stderr, "%s: validate json from stdin\n" "usage: json_verify [options]\n" + " -5 allow JSON5\n" " -c allow comments\n" " -q quiet mode\n" " -s verify a stream of multiple json entities\n" @@ -52,6 +53,9 @@ main(int argc, char ** argv) unsigned int i; for ( i=1; i < strlen(argv[a]); i++) { switch (argv[a][i]) { + case '5': + yajl_config(hand, yajl_allow_json5, 1); + break; case 'q': quiet = 1; break; From 4b466c0dbbd49541e87902b9984a90c4d1561292 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Mon, 26 Jun 2017 23:51:17 -0500 Subject: [PATCH 02/18] Fix token names, bracket <=> brace See http://practicaltypography.com/parentheses-brackets-and-braces.html for evidence that this is the proper nomenclature. --- src/yajl_lex.c | 8 ++++---- src/yajl_parser.c | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/yajl_lex.c b/src/yajl_lex.c index 56253c7d..803e8215 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -524,16 +524,16 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, switch (c) { case '{': - tok = yajl_tok_left_bracket; + tok = yajl_tok_left_brace; goto lexed; case '}': - tok = yajl_tok_right_bracket; + tok = yajl_tok_right_brace; goto lexed; case '[': - tok = yajl_tok_left_brace; + tok = yajl_tok_left_bracket; goto lexed; case ']': - tok = yajl_tok_right_brace; + tok = yajl_tok_right_bracket; goto lexed; case ',': tok = yajl_tok_comma; diff --git a/src/yajl_parser.c b/src/yajl_parser.c index 20fff704..c1c04dea 100644 --- a/src/yajl_parser.c +++ b/src/yajl_parser.c @@ -264,13 +264,13 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, _CC_CHK(hand->callbacks->yajl_null(hand->ctx)); } break; - case yajl_tok_left_bracket: + case yajl_tok_left_brace: if (hand->callbacks && hand->callbacks->yajl_start_map) { _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx)); } stateToPush = yajl_state_map_start; break; - case yajl_tok_left_brace: + case yajl_tok_left_bracket: if (hand->callbacks && hand->callbacks->yajl_start_array) { _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx)); } @@ -330,7 +330,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, } } break; - case yajl_tok_right_brace: { + case yajl_tok_right_bracket: { if (yajl_bs_current(hand->stateStack) == yajl_state_array_start) { @@ -346,7 +346,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, } case yajl_tok_colon: case yajl_tok_comma: - case yajl_tok_right_bracket: + case yajl_tok_right_brace: yajl_bs_set(hand->stateStack, yajl_state_parse_error); hand->parseError = "unallowed token at this point in JSON text"; @@ -401,7 +401,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, } yajl_bs_set(hand->stateStack, yajl_state_map_sep); goto around_again; - case yajl_tok_right_bracket: + case yajl_tok_right_brace: if (yajl_bs_current(hand->stateStack) == yajl_state_map_start) { @@ -441,7 +441,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, offset, &buf, &bufLen); switch (tok) { - case yajl_tok_right_bracket: + case yajl_tok_right_brace: if (hand->callbacks && hand->callbacks->yajl_end_map) { _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx)); } @@ -469,7 +469,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, offset, &buf, &bufLen); switch (tok) { - case yajl_tok_right_brace: + case yajl_tok_right_bracket: if (hand->callbacks && hand->callbacks->yajl_end_array) { _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx)); } From e22e8bfe461645b159475e11cafc80d717a0e732 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Tue, 27 Jun 2017 00:02:37 -0500 Subject: [PATCH 03/18] Allow trailing commas in maps & arrays when parsing JSON5 Includes a test case. --- src/yajl_parser.c | 15 ++++++++++----- test/parsing/cases/a5_trailing_commas.json | 1 + test/parsing/cases/a5_trailing_commas.json.gold | 13 +++++++++++++ 3 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 test/parsing/cases/a5_trailing_commas.json create mode 100644 test/parsing/cases/a5_trailing_commas.json.gold diff --git a/src/yajl_parser.c b/src/yajl_parser.c index c1c04dea..f22e5275 100644 --- a/src/yajl_parser.c +++ b/src/yajl_parser.c @@ -331,8 +331,10 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, } break; case yajl_tok_right_bracket: { - if (yajl_bs_current(hand->stateStack) == - yajl_state_array_start) + yajl_state s = yajl_bs_current(hand->stateStack); + if (s == yajl_state_array_start || + ((hand->flags & yajl_allow_json5) && + (s == yajl_state_array_need_val))) { if (hand->callbacks && hand->callbacks->yajl_end_array) @@ -401,9 +403,11 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, } yajl_bs_set(hand->stateStack, yajl_state_map_sep); goto around_again; - case yajl_tok_right_brace: - if (yajl_bs_current(hand->stateStack) == - yajl_state_map_start) + case yajl_tok_right_brace: { + yajl_state s = yajl_bs_current(hand->stateStack); + if (s == yajl_state_map_start || + ((hand->flags & yajl_allow_json5) && + (s == yajl_state_map_need_key))) { if (hand->callbacks && hand->callbacks->yajl_end_map) { _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx)); @@ -411,6 +415,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, yajl_bs_pop(hand->stateStack); goto around_again; } + } default: yajl_bs_set(hand->stateStack, yajl_state_parse_error); hand->parseError = diff --git a/test/parsing/cases/a5_trailing_commas.json b/test/parsing/cases/a5_trailing_commas.json new file mode 100644 index 00000000..f246ce43 --- /dev/null +++ b/test/parsing/cases/a5_trailing_commas.json @@ -0,0 +1 @@ +{"array":[1,2,],"map":{"a":1,},} diff --git a/test/parsing/cases/a5_trailing_commas.json.gold b/test/parsing/cases/a5_trailing_commas.json.gold new file mode 100644 index 00000000..a26d42e9 --- /dev/null +++ b/test/parsing/cases/a5_trailing_commas.json.gold @@ -0,0 +1,13 @@ +map open '{' +key: 'array' +array open '[' +integer: 1 +integer: 2 +array close ']' +key: 'map' +map open '{' +key: 'a' +integer: 1 +map close '}' +map close '}' +memory leaks: 0 From 28346e377b2bde9ccdaeba31645dbc179a73817b Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Mon, 13 Aug 2018 00:58:25 -0500 Subject: [PATCH 04/18] JSON5: Modified lexer for some number support If configured for JSON5 the lexer now allows a leading or trailing decimal point on doubles, and an explicit leading + sign on integers or double numbers. Added tests to check these. --- src/yajl_lex.c | 17 +++++++++++++---- test/parsing/cases/a5_doubles.json | 1 + test/parsing/cases/a5_doubles.json.gold | 7 +++++++ test/parsing/cases/a5_integers.json | 3 +++ test/parsing/cases/a5_integers.json.gold | 14 ++++++++++++++ 5 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 test/parsing/cases/a5_doubles.json create mode 100644 test/parsing/cases/a5_doubles.json.gold create mode 100644 test/parsing/cases/a5_integers.json create mode 100644 test/parsing/cases/a5_integers.json.gold diff --git a/src/yajl_lex.c b/src/yajl_lex.c index 803e8215..d751e140 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -381,27 +381,32 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, * is an ambiguous case for integers at EOF. */ unsigned char c; + int numRd = 0; yajl_tok tok = yajl_tok_integer; RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); - /* optional leading minus */ - if (c == '-') { + /* optional leading plus/minus */ + if (c == '-' || (lexer->allowJson5 && c == '+')) { RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); } /* a single zero, or a series of integers */ if (c == '0') { + numRd++; RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); } else if (c >= '1' && c <= '9') { do { + numRd++; RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); } while (c >= '0' && c <= '9'); + } else if (lexer->allowJson5 && c == '.') { + goto got_decimal; } else { unreadChar(lexer, offset); lexer->error = yajl_lex_missing_integer_after_minus; @@ -410,10 +415,10 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, /* optional fraction (indicates this is floating point) */ if (c == '.') { - int numRd = 0; - + got_decimal: RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); + if (!lexer->allowJson5) numRd = 0; while (c >= '0' && c <= '9') { numRd++; @@ -603,6 +608,9 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, jsonTextLen, offset); goto lexed; } + case '+': case '.': + if (!lexer->allowJson5) + goto invalid; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { @@ -641,6 +649,7 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, /* hit error or eof, bail */ goto lexed; default: + invalid: lexer->error = yajl_lex_invalid_char; tok = yajl_tok_error; goto lexed; diff --git a/test/parsing/cases/a5_doubles.json b/test/parsing/cases/a5_doubles.json new file mode 100644 index 00000000..5a3bf8da --- /dev/null +++ b/test/parsing/cases/a5_doubles.json @@ -0,0 +1 @@ +[ .1e2, 10., +3.141569, -.1e4] diff --git a/test/parsing/cases/a5_doubles.json.gold b/test/parsing/cases/a5_doubles.json.gold new file mode 100644 index 00000000..76d927cb --- /dev/null +++ b/test/parsing/cases/a5_doubles.json.gold @@ -0,0 +1,7 @@ +array open '[' +double: 10 +double: 10 +double: 3.14157 +double: -1000 +array close ']' +memory leaks: 0 diff --git a/test/parsing/cases/a5_integers.json b/test/parsing/cases/a5_integers.json new file mode 100644 index 00000000..96b96ffa --- /dev/null +++ b/test/parsing/cases/a5_integers.json @@ -0,0 +1,3 @@ +[ +1,+2,+3,+4,+5,+6,+7, + +123456789 , -123456789, + +2147483647, -2147483647 ] diff --git a/test/parsing/cases/a5_integers.json.gold b/test/parsing/cases/a5_integers.json.gold new file mode 100644 index 00000000..7ac4bee1 --- /dev/null +++ b/test/parsing/cases/a5_integers.json.gold @@ -0,0 +1,14 @@ +array open '[' +integer: 1 +integer: 2 +integer: 3 +integer: 4 +integer: 5 +integer: 6 +integer: 7 +integer: 123456789 +integer: -123456789 +integer: 2147483647 +integer: -2147483647 +array close ']' +memory leaks: 0 From 10416068e11912176808b9fd815dfae52767aeb3 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Mon, 13 Aug 2018 01:13:55 -0500 Subject: [PATCH 05/18] Turning on JSON5 also allows comments Includes the simple test case. --- src/yajl.c | 3 ++- test/parsing/cases/a5_simple_with_comments.json | 11 +++++++++++ test/parsing/cases/a5_simple_with_comments.json.gold | 9 +++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 test/parsing/cases/a5_simple_with_comments.json create mode 100644 test/parsing/cases/a5_simple_with_comments.json.gold diff --git a/src/yajl.c b/src/yajl.c index f1ae2cb9..46534fbb 100644 --- a/src/yajl.c +++ b/src/yajl.c @@ -86,12 +86,13 @@ yajl_config(yajl_handle h, yajl_option opt, ...) va_start(ap, opt); switch(opt) { + case yajl_allow_json5: + opt |= yajl_allow_comments; /* JSON5 allows comments */ case yajl_allow_comments: case yajl_dont_validate_strings: case yajl_allow_trailing_garbage: case yajl_allow_multiple_values: case yajl_allow_partial_values: - case yajl_allow_json5: if (va_arg(ap, int)) h->flags |= opt; else h->flags &= ~opt; break; diff --git a/test/parsing/cases/a5_simple_with_comments.json b/test/parsing/cases/a5_simple_with_comments.json new file mode 100644 index 00000000..3b79bba9 --- /dev/null +++ b/test/parsing/cases/a5_simple_with_comments.json @@ -0,0 +1,11 @@ +{ + "this": "is", // ignore this + "really": "simple", + /* ignore +this +too * / +** // +(/ +******/ + "json": "right?" +} diff --git a/test/parsing/cases/a5_simple_with_comments.json.gold b/test/parsing/cases/a5_simple_with_comments.json.gold new file mode 100644 index 00000000..80fcad2f --- /dev/null +++ b/test/parsing/cases/a5_simple_with_comments.json.gold @@ -0,0 +1,9 @@ +map open '{' +key: 'this' +string: 'is' +key: 'really' +string: 'simple' +key: 'json' +string: 'right?' +map close '}' +memory leaks: 0 From 014b14e00744c94bb44564d612b64b3c1c9fc0ff Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Fri, 17 Aug 2018 23:36:24 -0500 Subject: [PATCH 06/18] Add JSON5 support for hexadecimal integers, with test cases yajl_parse_integer still doesn't handle LLONG_MIN in base 10 or 16. --- src/yajl_lex.c | 33 ++++++++++++++++- src/yajl_lex.h | 4 +- src/yajl_parser.c | 47 +++++++++++++++++------- test/parsing/cases/a5_integers.json | 11 +++++- test/parsing/cases/a5_integers.json.gold | 31 +++++++++++++++- test/parsing/cases/hex.json | 1 + test/parsing/cases/hex.json.gold | 2 + 7 files changed, 109 insertions(+), 20 deletions(-) create mode 100644 test/parsing/cases/hex.json create mode 100644 test/parsing/cases/hex.json.gold diff --git a/src/yajl_lex.c b/src/yajl_lex.c index d751e140..c77fd374 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -380,6 +380,7 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, * _beyond_ in order to know that they are complete. There * is an ambiguous case for integers at EOF. */ + const char hexDigits[] = "0123456789abcdefABCDEF"; unsigned char c; int numRd = 0; @@ -394,11 +395,16 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, c = readChar(lexer, jsonText, offset); } - /* a single zero, or a series of integers */ + /* a single zero, hex number, or a series of decimal digits */ if (c == '0') { numRd++; RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); + if (c == 'x' || c == 'X') { + if (lexer->allowJson5) goto got_hex; + lexer->error = yajl_lex_unallowed_hex_integer; + return yajl_tok_error; + } } else if (c >= '1' && c <= '9') { do { numRd++; @@ -415,7 +421,7 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, /* optional fraction (indicates this is floating point) */ if (c == '.') { - got_decimal: + got_decimal: RETURN_IF_EOF; c = readChar(lexer, jsonText, offset); if (!lexer->allowJson5) numRd = 0; @@ -458,6 +464,25 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, tok = yajl_tok_double; } + goto end_number; + + got_hex: + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + + if (strchr(hexDigits, c)) { + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } while (strchr(hexDigits, c)); + } + else { + unreadChar(lexer, offset); + lexer->error = yajl_lex_missing_hex_digit_after_0x; + return yajl_tok_error; + } + + end_number: /* we always go "one too far" */ unreadChar(lexer, offset); @@ -732,6 +757,10 @@ yajl_lex_error_to_string(yajl_lex_error error) case yajl_lex_unallowed_comment: return "probable comment found in input text, comments are " "not enabled."; + case yajl_lex_missing_hex_digit_after_0x: + return "malformed number, a hex digit is required after the 0x/0X."; + case yajl_lex_unallowed_hex_integer: + return "probable hex number found, JSON5 is not enabled."; } return "unknown error code"; } diff --git a/src/yajl_lex.h b/src/yajl_lex.h index 5df858f1..761d1b81 100644 --- a/src/yajl_lex.h +++ b/src/yajl_lex.h @@ -96,7 +96,9 @@ typedef enum { yajl_lex_missing_integer_after_decimal, yajl_lex_missing_integer_after_exponent, yajl_lex_missing_integer_after_minus, - yajl_lex_unallowed_comment + yajl_lex_unallowed_comment, + yajl_lex_missing_hex_digit_after_0x, + yajl_lex_unallowed_hex_integer, } yajl_lex_error; const char * yajl_lex_error_to_string(yajl_lex_error error); diff --git a/src/yajl_parser.c b/src/yajl_parser.c index f22e5275..eea5afd6 100644 --- a/src/yajl_parser.c +++ b/src/yajl_parser.c @@ -29,33 +29,52 @@ #include #include -#define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10)) - - /* same semantics as strtol */ long long yajl_parse_integer(const unsigned char *number, unsigned int length) { long long ret = 0; long sign = 1; + long base = 10; + long long max = LLONG_MAX / base; const unsigned char *pos = number; - if (*pos == '-') { pos++; sign = -1; } - if (*pos == '+') { pos++; } + const unsigned char *end = number + length; - while (pos < number + length) { - if ( ret > MAX_VALUE_TO_MULTIPLY ) { - errno = ERANGE; - return sign == 1 ? LLONG_MAX : LLONG_MIN; - } - ret *= 10; - if (LLONG_MAX - ret < (*pos - '0')) { + if (*pos == '-') { + pos++; + sign = -1; + } + else if (*pos == '+') { + pos++; + } + + if (*pos == '0' && + (pos[1] == 'x' || pos[1] == 'X')) { + base = 16; + max = LLONG_MAX / base; + pos += 2; + } + + while (pos < end) { + int digit; + + if (ret > max) { errno = ERANGE; return sign == 1 ? LLONG_MAX : LLONG_MIN; } - if (*pos < '0' || *pos > '9') { + + ret *= base; + digit = *pos++ - '0'; + /* Don't have to check for non-digit characters, + * the lexer has already rejected any bad digits. + */ + if (digit > 9) + digit = (digit - ('A' - '0') + 10) & 0xf; + + if (LLONG_MAX - ret < digit) { errno = ERANGE; return sign == 1 ? LLONG_MAX : LLONG_MIN; } - ret += (*pos++ - '0'); + ret += digit; } return sign * ret; diff --git a/test/parsing/cases/a5_integers.json b/test/parsing/cases/a5_integers.json index 96b96ffa..3bc84562 100644 --- a/test/parsing/cases/a5_integers.json +++ b/test/parsing/cases/a5_integers.json @@ -1,3 +1,10 @@ -[ +1,+2,+3,+4,+5,+6,+7, +[ +1,+2,+3,+4,+5,+6,+7,+8,+9, + 0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9, + 0xa,0xb,0xc,0xd,0xe,0xf, + 0xA,0xB,0xC,0xD,0xE,0xF, + +0xfedcba98, -0x6789ABCD, +123456789 , -123456789, - +2147483647, -2147483647 ] + +2147483647, -2147483648, + 0x7fffFFFFffffFFFF, -0x7FFFffffFFFFffff, + 9223372036854775807, -9223372036854775807 +] diff --git a/test/parsing/cases/a5_integers.json.gold b/test/parsing/cases/a5_integers.json.gold index 7ac4bee1..98a29973 100644 --- a/test/parsing/cases/a5_integers.json.gold +++ b/test/parsing/cases/a5_integers.json.gold @@ -6,9 +6,38 @@ integer: 4 integer: 5 integer: 6 integer: 7 +integer: 8 +integer: 9 +integer: 1 +integer: 2 +integer: 3 +integer: 4 +integer: 5 +integer: 6 +integer: 7 +integer: 8 +integer: 9 +integer: 10 +integer: 11 +integer: 12 +integer: 13 +integer: 14 +integer: 15 +integer: 10 +integer: 11 +integer: 12 +integer: 13 +integer: 14 +integer: 15 +integer: 4275878552 +integer: -1737075661 integer: 123456789 integer: -123456789 integer: 2147483647 -integer: -2147483647 +integer: -2147483648 +integer: 9223372036854775807 +integer: -9223372036854775807 +integer: 9223372036854775807 +integer: -9223372036854775807 array close ']' memory leaks: 0 diff --git a/test/parsing/cases/hex.json b/test/parsing/cases/hex.json new file mode 100644 index 00000000..dc610764 --- /dev/null +++ b/test/parsing/cases/hex.json @@ -0,0 +1 @@ +0x1 diff --git a/test/parsing/cases/hex.json.gold b/test/parsing/cases/hex.json.gold new file mode 100644 index 00000000..38667f07 --- /dev/null +++ b/test/parsing/cases/hex.json.gold @@ -0,0 +1,2 @@ +lexical error: probable hex number found, JSON5 is not enabled. +memory leaks: 0 From 1426c455dcf96963c52b7c54e27e237f8b36d610 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Sat, 18 Aug 2018 01:02:38 -0500 Subject: [PATCH 07/18] Added JSON5 support for parsing special numbers, with test cases NaN and both Infinities. Special handling was added to yajl_test since different OSs don't always generate the same output for special numbers (nan/NaN/...). --- src/yajl_lex.c | 107 +++++++++++------- src/yajl_lex.h | 1 + test/parsing/cases/a5_doubles.json | 2 +- test/parsing/cases/a5_doubles.json.gold | 4 + test/parsing/cases/infinity.json | 1 + test/parsing/cases/infinity.json.gold | 2 + .../parsing/cases/lonely_minus_sign.json.gold | 2 +- test/parsing/cases/minus_infinity.json | 1 + test/parsing/cases/minus_infinity.json.gold | 2 + test/parsing/cases/nan.json | 1 + test/parsing/cases/nan.json.gold | 2 + test/parsing/yajl_test.c | 14 ++- 12 files changed, 93 insertions(+), 46 deletions(-) create mode 100644 test/parsing/cases/infinity.json create mode 100644 test/parsing/cases/infinity.json.gold create mode 100644 test/parsing/cases/minus_infinity.json create mode 100644 test/parsing/cases/minus_infinity.json.gold create mode 100644 test/parsing/cases/nan.json create mode 100644 test/parsing/cases/nan.json.gold diff --git a/src/yajl_lex.c b/src/yajl_lex.c index c77fd374..2c02f21e 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -395,6 +395,25 @@ yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, c = readChar(lexer, jsonText, offset); } + if (c == 'I') { + const char * want = "nfinity"; + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + if (c != *want) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_invalid_string; + return yajl_tok_error; + } + } while (*(++want)); + if (!lexer->allowJson5) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_unallowed_special_number; + return yajl_tok_error; + } + return yajl_tok_double; + } + /* a single zero, hex number, or a series of decimal digits */ if (c == '0') { numRd++; @@ -530,6 +549,23 @@ yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText, return tok; } +/* Macro to reduce code duplication in yajl_lex_lex() */ +#define LEX_WANT(tring) \ + const char * want = tring; \ + do { \ + if (*offset >= jsonTextLen) { \ + tok = yajl_tok_eof; \ + goto lexed; \ + } \ + c = readChar(lexer, jsonText, offset); \ + if (c != *want) { \ + unreadChar(lexer, offset); \ + lexer->error = yajl_lex_invalid_string; \ + tok = yajl_tok_error; \ + goto lexed; \ + } \ + } while (*(++want)) + yajl_tok yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, size_t jsonTextLen, size_t * offset, @@ -575,59 +611,42 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, startOffset++; break; case 't': { - const char * want = "rue"; - do { - if (*offset >= jsonTextLen) { - tok = yajl_tok_eof; - goto lexed; - } - c = readChar(lexer, jsonText, offset); - if (c != *want) { - unreadChar(lexer, offset); - lexer->error = yajl_lex_invalid_string; - tok = yajl_tok_error; - goto lexed; - } - } while (*(++want)); + LEX_WANT("rue"); tok = yajl_tok_bool; goto lexed; } case 'f': { - const char * want = "alse"; - do { - if (*offset >= jsonTextLen) { - tok = yajl_tok_eof; - goto lexed; - } - c = readChar(lexer, jsonText, offset); - if (c != *want) { - unreadChar(lexer, offset); - lexer->error = yajl_lex_invalid_string; - tok = yajl_tok_error; - goto lexed; - } - } while (*(++want)); + LEX_WANT("alse"); tok = yajl_tok_bool; goto lexed; } case 'n': { - const char * want = "ull"; - do { - if (*offset >= jsonTextLen) { - tok = yajl_tok_eof; - goto lexed; - } - c = readChar(lexer, jsonText, offset); - if (c != *want) { - unreadChar(lexer, offset); - lexer->error = yajl_lex_invalid_string; - tok = yajl_tok_error; - goto lexed; - } - } while (*(++want)); + LEX_WANT("ull"); tok = yajl_tok_null; goto lexed; } + case 'I': { + LEX_WANT("nfinity"); + if (!lexer->allowJson5) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_unallowed_special_number; + tok = yajl_tok_error; + } else { + tok = yajl_tok_double; + } + goto lexed; + } + case 'N': { + LEX_WANT("aN"); + if (!lexer->allowJson5) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_unallowed_special_number; + tok = yajl_tok_error; + } else { + tok = yajl_tok_double; + } + goto lexed; + } case '"': { tok = yajl_lex_string(lexer, (const unsigned char *) jsonText, jsonTextLen, offset); @@ -753,7 +772,7 @@ yajl_lex_error_to_string(yajl_lex_error error) "decimal point."; case yajl_lex_missing_integer_after_minus: return "malformed number, a digit is required after the " - "minus sign."; + "plus/minus sign."; case yajl_lex_unallowed_comment: return "probable comment found in input text, comments are " "not enabled."; @@ -761,6 +780,8 @@ yajl_lex_error_to_string(yajl_lex_error error) return "malformed number, a hex digit is required after the 0x/0X."; case yajl_lex_unallowed_hex_integer: return "probable hex number found, JSON5 is not enabled."; + case yajl_lex_unallowed_special_number: + return "special number Infinity or NaN found, JSON5 is not enabled."; } return "unknown error code"; } diff --git a/src/yajl_lex.h b/src/yajl_lex.h index 761d1b81..17fd3038 100644 --- a/src/yajl_lex.h +++ b/src/yajl_lex.h @@ -99,6 +99,7 @@ typedef enum { yajl_lex_unallowed_comment, yajl_lex_missing_hex_digit_after_0x, yajl_lex_unallowed_hex_integer, + yajl_lex_unallowed_special_number, } yajl_lex_error; const char * yajl_lex_error_to_string(yajl_lex_error error); diff --git a/test/parsing/cases/a5_doubles.json b/test/parsing/cases/a5_doubles.json index 5a3bf8da..342273f0 100644 --- a/test/parsing/cases/a5_doubles.json +++ b/test/parsing/cases/a5_doubles.json @@ -1 +1 @@ -[ .1e2, 10., +3.141569, -.1e4] +[ .1e2, 10., +3.141569, -.1e4, NaN, Infinity, +Infinity, -Infinity ] diff --git a/test/parsing/cases/a5_doubles.json.gold b/test/parsing/cases/a5_doubles.json.gold index 76d927cb..8ecafd25 100644 --- a/test/parsing/cases/a5_doubles.json.gold +++ b/test/parsing/cases/a5_doubles.json.gold @@ -3,5 +3,9 @@ double: 10 double: 10 double: 3.14157 double: -1000 +double: NaN +double: Infinity +double: Infinity +double: -Infinity array close ']' memory leaks: 0 diff --git a/test/parsing/cases/infinity.json b/test/parsing/cases/infinity.json new file mode 100644 index 00000000..3c62151d --- /dev/null +++ b/test/parsing/cases/infinity.json @@ -0,0 +1 @@ +Infinity diff --git a/test/parsing/cases/infinity.json.gold b/test/parsing/cases/infinity.json.gold new file mode 100644 index 00000000..3a65d995 --- /dev/null +++ b/test/parsing/cases/infinity.json.gold @@ -0,0 +1,2 @@ +lexical error: special number Infinity or NaN found, JSON5 is not enabled. +memory leaks: 0 diff --git a/test/parsing/cases/lonely_minus_sign.json.gold b/test/parsing/cases/lonely_minus_sign.json.gold index d15ede9b..f6789796 100644 --- a/test/parsing/cases/lonely_minus_sign.json.gold +++ b/test/parsing/cases/lonely_minus_sign.json.gold @@ -5,5 +5,5 @@ bool: true string: 'blue' string: 'baby where are you?' string: 'oh boo hoo!' -lexical error: malformed number, a digit is required after the minus sign. +lexical error: malformed number, a digit is required after the plus/minus sign. memory leaks: 0 diff --git a/test/parsing/cases/minus_infinity.json b/test/parsing/cases/minus_infinity.json new file mode 100644 index 00000000..879e80ee --- /dev/null +++ b/test/parsing/cases/minus_infinity.json @@ -0,0 +1 @@ +-Infinity diff --git a/test/parsing/cases/minus_infinity.json.gold b/test/parsing/cases/minus_infinity.json.gold new file mode 100644 index 00000000..3a65d995 --- /dev/null +++ b/test/parsing/cases/minus_infinity.json.gold @@ -0,0 +1,2 @@ +lexical error: special number Infinity or NaN found, JSON5 is not enabled. +memory leaks: 0 diff --git a/test/parsing/cases/nan.json b/test/parsing/cases/nan.json new file mode 100644 index 00000000..736991a1 --- /dev/null +++ b/test/parsing/cases/nan.json @@ -0,0 +1 @@ +NaN diff --git a/test/parsing/cases/nan.json.gold b/test/parsing/cases/nan.json.gold new file mode 100644 index 00000000..3a65d995 --- /dev/null +++ b/test/parsing/cases/nan.json.gold @@ -0,0 +1,2 @@ +lexical error: special number Infinity or NaN found, JSON5 is not enabled. +memory leaks: 0 diff --git a/test/parsing/yajl_test.c b/test/parsing/yajl_test.c index 8affe8e5..af47d6c4 100644 --- a/test/parsing/yajl_test.c +++ b/test/parsing/yajl_test.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -85,7 +86,18 @@ static int test_yajl_integer(void *ctx, long long integerVal) static int test_yajl_double(void *ctx, double doubleVal) { - printf("double: %g\n", doubleVal); + if (doubleVal != doubleVal) { + printf("double: NaN\n"); + } + else if (doubleVal == HUGE_VAL) { + printf("double: Infinity\n"); + } + else if (doubleVal == -HUGE_VAL) { + printf("double: -Infinity\n"); + } + else { + printf("double: %g\n", doubleVal); + } return 1; } From 1df4f86fb1b8a8d22f4c2c3401c44be5946c2ab3 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Sun, 19 Aug 2018 13:42:19 -0500 Subject: [PATCH 08/18] Add yajl_gen_json5 option and generator support for special numbers When this flag is set, the yajl_gen_double() routine can output the values NaN, -Infinity and +Infinity. --- src/api/yajl_gen.h | 18 ++++++++++++++---- src/yajl_gen.c | 22 +++++++++++++++++----- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/api/yajl_gen.h b/src/api/yajl_gen.h index a74cff1b..78ed3fb7 100644 --- a/src/api/yajl_gen.h +++ b/src/api/yajl_gen.h @@ -98,7 +98,14 @@ extern "C" { * iterest of saving bytes. Setting this flag will cause YAJL to * always escape '/' in generated JSON strings. */ - yajl_gen_escape_solidus = 0x10 + yajl_gen_escape_solidus = 0x10, + /** + * Special numbers such as NaN and Infinity cannot be represented in + * the original JSON, but are permitted in JSON5. Setting this flag + * allows yajl_gen_double to output the JSON5 representation of these + * special numbers instead of returning with an error. + */ + yajl_gen_json5 = 0x20, } yajl_gen_option; /** allow the modification of generator options subsequent to handle @@ -121,9 +128,12 @@ extern "C" { YAJL_API void yajl_gen_free(yajl_gen handle); YAJL_API yajl_gen_status yajl_gen_integer(yajl_gen hand, long long int number); - /** generate a floating point number. number may not be infinity or - * NaN, as these have no representation in JSON. In these cases the - * generator will return 'yajl_gen_invalid_number' */ + /** generate a floating point number + * \param number the value to output, which may only be Infinity or NaN + * if the yajl_gen_json5 flag is set, as these values have + * no legal representation in JSON. In these cases the + * generator will return 'yajl_gen_invalid_number' + */ YAJL_API yajl_gen_status yajl_gen_double(yajl_gen hand, double number); YAJL_API yajl_gen_status yajl_gen_number(yajl_gen hand, const char * num, diff --git a/src/yajl_gen.c b/src/yajl_gen.c index 0f5c68e8..3e43e9cd 100644 --- a/src/yajl_gen.c +++ b/src/yajl_gen.c @@ -58,6 +58,7 @@ yajl_gen_config(yajl_gen g, yajl_gen_option opt, ...) case yajl_gen_beautify: case yajl_gen_validate_utf8: case yajl_gen_escape_solidus: + case yajl_gen_json5: if (va_arg(ap, int)) g->flags |= opt; else g->flags &= ~opt; break; @@ -227,13 +228,24 @@ yajl_gen_status yajl_gen_double(yajl_gen g, double number) { char i[32]; + int special = 1; ENSURE_VALID_STATE; ENSURE_NOT_KEY; - if (isnan(number) || isinf(number)) return yajl_gen_invalid_number; - INSERT_SEP; INSERT_WHITESPACE; - sprintf(i, "%.20g", number); - if (strspn(i, "0123456789-") == strlen(i)) { - strcat(i, ".0"); + if (isnan(number)) { + strcpy(i, "NaN"); + } + else if (isinf(number)) { + sprintf(i, "%cInfinity", number < 0 ? '-' : '+'); } + else { + special = 0; + sprintf(i, "%.20g", number); + if (strspn(i, "0123456789-") == strlen(i)) { + strcat(i, ".0"); + } + } + if (special && !(g->flags & yajl_gen_json5)) + return yajl_gen_invalid_number; + INSERT_SEP; INSERT_WHITESPACE; g->print(g->ctx, i, (unsigned int)strlen(i)); APPENDED_ATOM; FINAL_NEWLINE; From 3280eb642b972668193d76b4fe0d2036f187253d Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Sun, 19 Aug 2018 13:49:54 -0500 Subject: [PATCH 09/18] Modify json_reformat for JSON5 Set yajl_gen_json5. Replace reformat_number with reformat_integer and reformat_double. --- reformatter/json_reformat.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/reformatter/json_reformat.c b/reformatter/json_reformat.c index 7ec028fe..0e6958c1 100644 --- a/reformatter/json_reformat.c +++ b/reformatter/json_reformat.c @@ -45,10 +45,16 @@ static int reformat_boolean(void * ctx, int boolean) GEN_AND_RETURN(yajl_gen_bool(g, boolean)); } -static int reformat_number(void * ctx, const char * s, size_t l) +static int reformat_integer(void * ctx, long long int i) { yajl_gen g = (yajl_gen) ctx; - GEN_AND_RETURN(yajl_gen_number(g, s, l)); + GEN_AND_RETURN(yajl_gen_integer(g, i)); +} + +static int reformat_double(void * ctx, double d) +{ + yajl_gen g = (yajl_gen) ctx; + GEN_AND_RETURN(yajl_gen_double(g, d)); } static int reformat_string(void * ctx, const unsigned char * stringVal, @@ -93,9 +99,9 @@ static int reformat_end_array(void * ctx) static yajl_callbacks callbacks = { reformat_null, reformat_boolean, + reformat_integer, + reformat_double, NULL, - NULL, - reformat_number, reformat_string, reformat_start_map, reformat_map_key, @@ -146,6 +152,7 @@ main(int argc, char ** argv) switch (argv[a][i]) { case '5': yajl_config(hand, yajl_allow_json5, 1); + yajl_gen_config(g, yajl_gen_json5, 1); break; case 'm': yajl_gen_config(g, yajl_gen_beautify, 0); From 3a2a88f03da53f5f414e661a5d571ebbb4487ec4 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Mon, 20 Aug 2018 23:54:09 -0500 Subject: [PATCH 10/18] JSON5 support for generating unquoted map keys Added a new routine to yajl_encode.c that validates bare identifiers. Use this in yajl_gen_string() to avoid quoting keys we don't have to. Added a separate -g option to json_reformat to distinguish JSON5 output from the -5 option that flags input as being JSON5. --- reformatter/json_reformat.c | 3 +++ src/yajl_encode.c | 44 ++++++++++++++++++++++++++++--------- src/yajl_encode.h | 2 ++ src/yajl_gen.c | 15 ++++++++++--- 4 files changed, 51 insertions(+), 13 deletions(-) diff --git a/reformatter/json_reformat.c b/reformatter/json_reformat.c index 0e6958c1..fd1e0eba 100644 --- a/reformatter/json_reformat.c +++ b/reformatter/json_reformat.c @@ -116,6 +116,7 @@ usage(const char * progname) fprintf(stderr, "%s: reformat json from stdin\n" "usage: json_reformat [options]\n" " -5 allow JSON5 input\n" + " -g generate JSON5 output\n" " -e escape any forward slashes (for embedding in HTML)\n" " -m minimize json rather than beautify (default)\n" " -s reformat a stream of multiple json entites\n" @@ -152,6 +153,8 @@ main(int argc, char ** argv) switch (argv[a][i]) { case '5': yajl_config(hand, yajl_allow_json5, 1); + break; + case 'g': yajl_gen_config(g, yajl_gen_json5, 1); break; case 'm': diff --git a/src/yajl_encode.c b/src/yajl_encode.c index fd082581..5d7141a0 100644 --- a/src/yajl_encode.c +++ b/src/yajl_encode.c @@ -87,7 +87,7 @@ static void hexToDigit(unsigned int * val, const unsigned char * hex) } } -static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf) +static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf) { if (codepoint < 0x80) { utf8Buf[0] = (char) codepoint; @@ -117,7 +117,7 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, size_t len) { size_t beg = 0; - size_t end = 0; + size_t end = 0; while (end < len) { if (str[end] == '\\') { @@ -144,8 +144,8 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, unsigned int surrogate = 0; hexToDigit(&surrogate, str + end + 2); codepoint = - (((codepoint & 0x3F) << 10) | - ((((codepoint >> 6) & 0xF) + 1) << 16) | + (((codepoint & 0x3F) << 10) | + ((((codepoint >> 6) & 0xF) + 1) << 16) | (surrogate & 0x3FF)); end += 5; } else { @@ -153,7 +153,7 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, break; } } - + Utf32toUtf8(codepoint, utf8Buf); unescaped = utf8Buf; @@ -183,13 +183,13 @@ int yajl_string_validate_utf8(const unsigned char * s, size_t len) { if (!len) return 1; if (!s) return 0; - + while (len--) { /* single byte */ if (*s <= 0x7f) { /* noop */ } - /* two byte */ + /* two byte */ else if ((*s >> 5) == 0x6) { ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; @@ -201,7 +201,7 @@ int yajl_string_validate_utf8(const unsigned char * s, size_t len) ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; } - /* four byte */ + /* four byte */ else if ((*s >> 3) == 0x1e) { ADV_PTR; if (!((*s >> 6) == 0x2)) return 0; @@ -212,9 +212,33 @@ int yajl_string_validate_utf8(const unsigned char * s, size_t len) } else { return 0; } - + s++; } - + + return 1; +} + +int yajl_string_validate_identifier(const unsigned char * str, size_t len) +{ + const unsigned char * s = str; + int c; + + if (!len || !str) return 0; + + c = *s++; /* First character [$_A-Za-z] */ + if ((c != '$' && c < 'A') || + (c > 'Z' && c != '_' && c < 'a') || + (c > 'z')) + return 0; + + while (--len) { + c = *s++; /* Remaining characters [$_A-Za-z0-9] */ + if ((c != '$' && c < '0') || + (c > '9' && c < 'A') || + (c > 'Z' && c != '_' && c < 'a') || + (c > 'z')) + return 0; + } return 1; } diff --git a/src/yajl_encode.h b/src/yajl_encode.h index 853a1a70..7f3cdc3e 100644 --- a/src/yajl_encode.h +++ b/src/yajl_encode.h @@ -31,4 +31,6 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, int yajl_string_validate_utf8(const unsigned char * s, size_t len); +int yajl_string_validate_identifier(const unsigned char * str, size_t len); + #endif diff --git a/src/yajl_gen.c b/src/yajl_gen.c index 3e43e9cd..d62ea144 100644 --- a/src/yajl_gen.c +++ b/src/yajl_gen.c @@ -275,9 +275,18 @@ yajl_gen_string(yajl_gen g, const unsigned char * str, } } ENSURE_VALID_STATE; INSERT_SEP; INSERT_WHITESPACE; - g->print(g->ctx, "\"", 1); - yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus); - g->print(g->ctx, "\"", 1); + if (g->flags & yajl_gen_json5 && + (g->state[g->depth] == yajl_gen_map_key || + g->state[g->depth] == yajl_gen_map_start) && + yajl_string_validate_identifier(str, len)) { + /* No need to quote this key */ + g->print(g->ctx, (const char *) str, len); + } + else { + g->print(g->ctx, "\"", 1); + yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus); + g->print(g->ctx, "\"", 1); + } APPENDED_ATOM; FINAL_NEWLINE; return yajl_gen_status_ok; From 9071ce3f2589b344dcec479ad49119d9a32bdd16 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Tue, 21 Aug 2018 21:24:57 -0500 Subject: [PATCH 11/18] Accept unquoted identifiers as map keys, with test case Adds another lexer entry point for lexing map keys only, adjust parser to use this instead of the general lexer. Also defines another lexer token for internal use only. --- src/yajl_lex.c | 169 ++++++++++++++++-- src/yajl_lex.h | 15 +- src/yajl_parser.c | 7 +- test/parsing/cases/a5_map_identifiers.json | 11 ++ .../cases/a5_map_identifiers.json.gold | 21 +++ 5 files changed, 202 insertions(+), 21 deletions(-) create mode 100644 test/parsing/cases/a5_map_identifiers.json create mode 100644 test/parsing/cases/a5_map_identifiers.json.gold diff --git a/src/yajl_lex.c b/src/yajl_lex.c index 2c02f21e..7cf6275e 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -126,19 +126,21 @@ yajl_lex_free(yajl_lexer lxr) return; } -/* a lookup table which lets us quickly determine three things: +/* a lookup table which lets us quickly determine various things: * VEC - valid escaped control char - * note. the solidus '/' may be escaped or not. + * Note: the solidus '/' may be escaped or not. * IJC - invalid json char * VHC - valid hex char * NFP - needs further processing (from a string scanning perspective) * NUC - needs utf8 checking when enabled (from a string scanning perspective) + * VIC - valid identifier char (after the first char) */ #define VEC 0x01 #define IJC 0x02 #define VHC 0x04 #define NFP 0x08 #define NUC 0x10 +#define VIC 0x20 static const char charLookupTable[256] = { @@ -147,20 +149,20 @@ static const char charLookupTable[256] = /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , -/*20*/ 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , 0 , 0 , +/*20*/ 0 , 0 , NFP|VEC|IJC, 0 , VIC , 0 , 0 , 0 , /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC , -/*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC , -/*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 , +/*30*/ VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, +/*38*/ VHC|VIC, VHC|VIC, 0 , 0 , 0 , 0 , 0 , 0 , -/*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 , -/*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , -/*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , -/*58*/ 0 , 0 , 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , +/*40*/ 0 , VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VIC , +/*48*/ VIC , VIC , VIC , VIC , VIC , VIC , VIC , VIC , +/*50*/ VIC , VIC , VIC , VIC , VIC , VIC , VIC , VIC , +/*58*/ VIC , VIC , VIC , 0 , NFP|VEC|IJC, 0 , 0 , VIC , -/*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 , -/*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 , -/*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 , -/*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/*60*/ 0 , VHC|VIC, VEC|VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VEC|VHC|VIC, VIC, +/*68*/ VIC , VIC , VIC , VIC , VIC , VIC , VEC|VIC, VIC , +/*70*/ VIC , VIC , VEC , VIC , VEC|VIC, VIC , VIC , VIC , +/*78*/ VIC , VIC , VIC , 0 , 0 , 0 , 0 , 0 , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , @@ -372,14 +374,31 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof; +/* For both identifiers and numbers, we always have to lex one + * character too many to know when they are complete. + */ + static yajl_tok -yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, +yajl_lex_identifier(yajl_lexer lexer, const unsigned char * jsonText, size_t jsonTextLen, size_t * offset) { - /** XXX: numbers are the only entities in json that we must lex - * _beyond_ in order to know that they are complete. There - * is an ambiguous case for integers at EOF. */ + unsigned char c; + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } while (charLookupTable[c] & VIC); + + /* we always go "one too far" */ + unreadChar(lexer, offset); + + return yajl_tok_identifier; +} + +static yajl_tok +yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset) +{ const char hexDigits[] = "0123456789abcdefABCDEF"; unsigned char c; int numRd = 0; @@ -728,6 +747,122 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, *outLen -= 2; } +#ifdef YAJL_LEXER_DEBUG + if (tok == yajl_tok_error) { + printf("lexical error: %s\n", + yajl_lex_error_to_string(yajl_lex_get_error(lexer))); + } else if (tok == yajl_tok_eof) { + printf("EOF hit\n"); + } else { + printf("lexed %s: '", tokToStr(tok)); + fwrite(*outBuf, 1, *outLen, stdout); + printf("'\n"); + } +#endif + + return tok; +} + +yajl_tok yajl_lex_key(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset, + const unsigned char ** outBuf, size_t * outLen) +{ + yajl_tok tok = yajl_tok_error; + unsigned char c; + size_t startOffset = *offset; + + *outBuf = NULL; + *outLen = 0; + + for (;;) { + assert(*offset <= jsonTextLen); + + if (*offset >= jsonTextLen) { + tok = yajl_tok_eof; + goto lexed; + } + + c = readChar(lexer, jsonText, offset); + + switch (c) { + case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': + startOffset++; + break; + case '}': + tok = yajl_tok_right_brace; + goto lexed; + case '"': { + tok = yajl_lex_string(lexer, jsonText, jsonTextLen, offset); + goto lexed; + } + case '/': + /* If comments are disabled this is an error. */ + if (!lexer->allowComments) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_unallowed_comment; + tok = yajl_tok_error; + goto lexed; + } + /* Comments are enabled, so lex it. + * Possible outcomes are: + * - successful lex (tok_comment, which means continue), + * - malformed comment opening (slash not followed by + * '*' or '/') (tok_error) + * - eof hit. (tok_eof) */ + tok = yajl_lex_comment(lexer, jsonText, jsonTextLen, offset); + if (tok == yajl_tok_comment) { + /* "error" is silly, but that's the initial + * state of tok. guilty until proven innocent. */ + tok = yajl_tok_error; + yajl_buf_clear(lexer->buf); + lexer->bufInUse = 0; + startOffset = *offset; + break; + } + /* hit error or eof, bail */ + goto lexed; + default: + if (lexer->allowJson5 && (c == '$' || c == '_' || + (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { + tok = yajl_lex_identifier(lexer, jsonText, jsonTextLen, offset); + } + else { + lexer->error = yajl_lex_invalid_char; + tok = yajl_tok_error; + } + goto lexed; + } + } + + lexed: + /* need to append to buffer if the buffer is in use or + * if it's an EOF token */ + if (tok == yajl_tok_eof || lexer->bufInUse) { + if (!lexer->bufInUse) yajl_buf_clear(lexer->buf); + lexer->bufInUse = 1; + yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset); + lexer->bufOff = 0; + + if (tok != yajl_tok_eof) { + *outBuf = yajl_buf_data(lexer->buf); + *outLen = yajl_buf_len(lexer->buf); + lexer->bufInUse = 0; + } + } else if (tok != yajl_tok_error) { + *outBuf = jsonText + startOffset; + *outLen = *offset - startOffset; + } + + /* For strings skip the quotes. */ + if (tok == yajl_tok_string || + tok == yajl_tok_string_with_escapes) { + assert(*outLen >= 2); + (*outBuf)++; + *outLen -= 2; + } + else if (tok == yajl_tok_identifier) { + tok = yajl_tok_string; + } #ifdef YAJL_LEXER_DEBUG if (tok == yajl_tok_error) { diff --git a/src/yajl_lex.h b/src/yajl_lex.h index 17fd3038..b23cb073 100644 --- a/src/yajl_lex.h +++ b/src/yajl_lex.h @@ -41,7 +41,12 @@ typedef enum { yajl_tok_string, yajl_tok_string_with_escapes, - /* comment tokens are not currently returned to the parser, ever */ + /* These tokens are used within the lexer and never seen by the parser: */ + + /* An unquoted map key, for JSON5 only, returned as yajl_tok_string */ + yajl_tok_identifier, + + /* A comment token, never returned */ yajl_tok_comment } yajl_tok; @@ -80,6 +85,14 @@ yajl_tok yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, size_t jsonTextLen, size_t * offset, const unsigned char ** outBuf, size_t * outLen); +/** + * A specialized version of yajl_lex_lex for use when the next token is + * a map key, which the parser knows. + */ +yajl_tok yajl_lex_key(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset, + const unsigned char ** outBuf, size_t * outLen); + /** have a peek at the next token, but don't move the lexer forward */ yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText, size_t jsonTextLen, size_t offset); diff --git a/src/yajl_parser.c b/src/yajl_parser.c index eea5afd6..966f4caa 100644 --- a/src/yajl_parser.c +++ b/src/yajl_parser.c @@ -398,8 +398,8 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, case yajl_state_map_need_key: { /* only difference between these two states is that in * start '}' is valid, whereas in need_key, we've parsed - * a comma, and a string key _must_ follow */ - tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen, + * a comma, so unless this is JSON5 a key _must_ follow. */ + tok = yajl_lex_key(hand->lexer, jsonText, jsonTextLen, offset, &buf, &bufLen); switch (tok) { case yajl_tok_eof: @@ -437,7 +437,8 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText, } default: yajl_bs_set(hand->stateStack, yajl_state_parse_error); - hand->parseError = + hand->parseError = hand->flags & yajl_allow_json5 ? + "invalid object key (must be a string or identifier)" : "invalid object key (must be a string)"; goto around_again; } diff --git a/test/parsing/cases/a5_map_identifiers.json b/test/parsing/cases/a5_map_identifiers.json new file mode 100644 index 00000000..a7b5744d --- /dev/null +++ b/test/parsing/cases/a5_map_identifiers.json @@ -0,0 +1,11 @@ +{ + $:1, + _:2, + A:3, + Z:4, + a:5, + z:6, + $1:7, + _zz:8, + ZZ9$Zalpha:9 +} diff --git a/test/parsing/cases/a5_map_identifiers.json.gold b/test/parsing/cases/a5_map_identifiers.json.gold new file mode 100644 index 00000000..c829c91c --- /dev/null +++ b/test/parsing/cases/a5_map_identifiers.json.gold @@ -0,0 +1,21 @@ +map open '{' +key: '$' +integer: 1 +key: '_' +integer: 2 +key: 'A' +integer: 3 +key: 'Z' +integer: 4 +key: 'a' +integer: 5 +key: 'z' +integer: 6 +key: '$1' +integer: 7 +key: '_zz' +integer: 8 +key: 'ZZ9$Zalpha' +integer: 9 +map close '}' +memory leaks: 0 From 7cdec071acf04bfbca3f446fda8e6f08d4cf299e Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Wed, 22 Aug 2018 00:38:00 -0500 Subject: [PATCH 12/18] Support for JSON5 character escapes in strings, with tests Any character other than the digits 1-9 may be preceded by a reverse solidus '\', and unless the combination has an explicitly defined expansion the character is included without the solidus. JSON5 adds \', \0 and \v to the set of defined escapes, and an escaped newline is omitted from a string. --- src/yajl_encode.c | 20 +++++++++++++++++--- src/yajl_lex.c | 10 +++++++++- test/parsing/cases/a5_strings.json | 11 +++++++++++ test/parsing/cases/a5_strings.json.gold | 9 +++++++++ 4 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 test/parsing/cases/a5_strings.json create mode 100644 test/parsing/cases/a5_strings.json.gold diff --git a/src/yajl_encode.c b/src/yajl_encode.c index 5d7141a0..404c9f99 100644 --- a/src/yajl_encode.c +++ b/src/yajl_encode.c @@ -128,8 +128,6 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, case 'r': unescaped = "\r"; break; case 'n': unescaped = "\n"; break; case '\\': unescaped = "\\"; break; - case '/': unescaped = "/"; break; - case '"': unescaped = "\""; break; case 'f': unescaped = "\f"; break; case 'b': unescaped = "\b"; break; case 't': unescaped = "\t"; break; @@ -165,8 +163,24 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, break; } + /* The following escapes are only valid when parsing JSON5. + * The lexer catches them when allowJson5 is not set. + */ + case '\n': beg = ++end; continue; + case '\r': + if (str[++end] == '\n') ++end; + beg = end; + continue; + case '0': + utf8Buf[0] = '\0'; + yajl_buf_append(buf, utf8Buf, 1); + beg = ++end; + continue; + case 'v': unescaped = "\v"; break; default: - assert("this should never happen" == NULL); + utf8Buf[0] = str[end]; + utf8Buf[1] = 0; + unescaped = utf8Buf; } yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped)); beg = ++end; diff --git a/src/yajl_lex.c b/src/yajl_lex.c index 7cf6275e..e8db0294 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -332,12 +332,20 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, goto finish_string_lex; } } - } else if (!(charLookupTable[curChar] & VEC)) { + } + else if (lexer->allowJson5 ? (curChar >= '1' && curChar <= '9') + : !(charLookupTable[curChar] & VEC)) { /* back up to offending char */ unreadChar(lexer, offset); lexer->error = yajl_lex_string_invalid_escaped_char; goto finish_string_lex; } + else if (lexer->allowJson5 && curChar == '\r') { + STR_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if (curChar != '\n') + unreadChar(lexer, offset); + } } /* when not validating UTF8 it's a simple table lookup to determine * if the present character is invalid */ diff --git a/test/parsing/cases/a5_strings.json b/test/parsing/cases/a5_strings.json new file mode 100644 index 00000000..fdb49a00 --- /dev/null +++ b/test/parsing/cases/a5_strings.json @@ -0,0 +1,11 @@ +[ + "Hello\!", + "\"Evenin\',\" said the barman.", + // The following string has 3 different escaped line-endings, + // LF, CR, and CR+LF, which all disappear from the final string. + "Well \ +hi \ there \ +y'all!", + "\b\f\n\r\t\v\\", + "\A\C\/\D\C", +] diff --git a/test/parsing/cases/a5_strings.json.gold b/test/parsing/cases/a5_strings.json.gold new file mode 100644 index 00000000..7df60a86 --- /dev/null +++ b/test/parsing/cases/a5_strings.json.gold @@ -0,0 +1,9 @@ +array open '[' +string: 'Hello!' +string: '"Evenin'," said the barman.' +string: 'Well hi there y'all!' +string: ' + \' +string: 'AC/DC' +array close ']' +memory leaks: 0 From 5213dc07672ef5df00d97a6f8373e762ed8874a0 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Sun, 2 Aug 2020 01:27:11 -0500 Subject: [PATCH 13/18] Support for JSON5 \xXX hex character escapes in strings, with tests Teach the lexer/parser to recognize and decode them in JSON5 mode. Teach the encoder to use them in JSON5 mode. Add another error message for bad hex digits. Test cases to show they work, and that the bad-digit check fires. --- src/yajl_encode.c | 45 +++++++++++++++---- src/yajl_encode.h | 3 +- src/yajl_gen.c | 3 +- src/yajl_lex.c | 21 ++++++++- src/yajl_lex.h | 3 +- .../parsing/cases/a5_codepoints_from_hex.json | 1 + .../cases/a5_codepoints_from_hex.json.gold | 3 ++ test/parsing/cases/a5_invalid_hex_char.json | 1 + .../cases/a5_invalid_hex_char.json.gold | 2 + 9 files changed, 69 insertions(+), 13 deletions(-) create mode 100644 test/parsing/cases/a5_codepoints_from_hex.json create mode 100644 test/parsing/cases/a5_codepoints_from_hex.json.gold create mode 100644 test/parsing/cases/a5_invalid_hex_char.json create mode 100644 test/parsing/cases/a5_invalid_hex_char.json.gold diff --git a/src/yajl_encode.c b/src/yajl_encode.c index 404c9f99..c056e596 100644 --- a/src/yajl_encode.c +++ b/src/yajl_encode.c @@ -33,13 +33,22 @@ yajl_string_encode(const yajl_print_t print, void * ctx, const unsigned char * str, size_t len, - int escape_solidus) + int escape_solidus, + int output_json5) { size_t beg = 0; size_t end = 0; char hexBuf[7]; - hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0'; - hexBuf[6] = 0; + char *hexAt; + if (output_json5) { + hexBuf[0] = '\\'; hexBuf[1] = 'x'; + hexBuf[4] = 0; + hexAt = &hexBuf[2]; + } else { + hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0'; + hexBuf[6] = 0; + hexAt = &hexBuf[4]; + } while (end < len) { const char * escaped = NULL; @@ -57,9 +66,20 @@ yajl_string_encode(const yajl_print_t print, case '\f': escaped = "\\f"; break; case '\b': escaped = "\\b"; break; case '\t': escaped = "\\t"; break; + case '\0': + if (output_json5) { + escaped = "\\0"; break; + } + goto ashex; + case '\v': + if (output_json5) { + escaped = "\\v"; break; + } + goto ashex; default: if ((unsigned char) str[end] < 32) { - CharToHex(str[end], hexBuf + 4); + ashex: + CharToHex(str[end], hexAt); escaped = hexBuf; } break; @@ -75,10 +95,10 @@ yajl_string_encode(const yajl_print_t print, print(ctx, (const char *) (str + beg), end - beg); } -static void hexToDigit(unsigned int * val, const unsigned char * hex) +static void hexToDigit(unsigned int * val, unsigned int len, const unsigned char * hex) { unsigned int i; - for (i=0;i<4;i++) { + for (i=0;i= 'A') c = (c & ~0x20) - 7; c -= '0'; @@ -133,14 +153,14 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, case 't': unescaped = "\t"; break; case 'u': { unsigned int codepoint = 0; - hexToDigit(&codepoint, str + ++end); + hexToDigit(&codepoint, 4, str + ++end); end+=3; /* check if this is a surrogate */ if ((codepoint & 0xFC00) == 0xD800) { end++; if (str[end] == '\\' && str[end + 1] == 'u') { unsigned int surrogate = 0; - hexToDigit(&surrogate, str + end + 2); + hexToDigit(&surrogate, 4, str + end + 2); codepoint = (((codepoint & 0x3F) << 10) | ((((codepoint >> 6) & 0xF) + 1) << 16) | @@ -177,6 +197,15 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, beg = ++end; continue; case 'v': unescaped = "\v"; break; + case 'x': { + unsigned int codepoint = 0; + hexToDigit(&codepoint, 2, str + ++end); + end++; + utf8Buf[0] = (char) codepoint; + yajl_buf_append(buf, utf8Buf, 1); + beg = ++end; + continue; + } default: utf8Buf[0] = str[end]; utf8Buf[1] = 0; diff --git a/src/yajl_encode.h b/src/yajl_encode.h index 7f3cdc3e..c1e4a725 100644 --- a/src/yajl_encode.h +++ b/src/yajl_encode.h @@ -24,7 +24,8 @@ void yajl_string_encode(const yajl_print_t printer, void * ctx, const unsigned char * str, size_t length, - int escape_solidus); + int escape_solidus, + int output_json5); void yajl_string_decode(yajl_buf buf, const unsigned char * str, size_t length); diff --git a/src/yajl_gen.c b/src/yajl_gen.c index d62ea144..a6d7bfbe 100644 --- a/src/yajl_gen.c +++ b/src/yajl_gen.c @@ -284,7 +284,8 @@ yajl_gen_string(yajl_gen g, const unsigned char * str, } else { g->print(g->ctx, "\"", 1); - yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus); + yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus, + g->flags & yajl_gen_json5); g->print(g->ctx, "\"", 1); } APPENDED_ATOM; diff --git a/src/yajl_lex.c b/src/yajl_lex.c index e8db0294..5aebccdd 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -328,7 +328,21 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, if (!(charLookupTable[curChar] & VHC)) { /* back up to offending char */ unreadChar(lexer, offset); - lexer->error = yajl_lex_string_invalid_hex_char; + lexer->error = yajl_lex_string_invalid_hex_u_char; + goto finish_string_lex; + } + } + } + else if (lexer->allowJson5 && curChar == 'x') { + unsigned int i = 0; + + for (i=0;i<2;i++) { + STR_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if (!(charLookupTable[curChar] & VHC)) { + /* back up to offending char */ + unreadChar(lexer, offset); + lexer->error = yajl_lex_string_invalid_hex_x_char; goto finish_string_lex; } } @@ -901,9 +915,12 @@ yajl_lex_error_to_string(yajl_lex_error error) "which it may not."; case yajl_lex_string_invalid_json_char: return "invalid character inside string."; - case yajl_lex_string_invalid_hex_char: + case yajl_lex_string_invalid_hex_u_char: return "invalid (non-hex) character occurs after '\\u' inside " "string."; + case yajl_lex_string_invalid_hex_x_char: + return "invalid (non-hex) character occurs after '\\x' inside " + "string."; case yajl_lex_invalid_char: return "invalid char in json text."; case yajl_lex_invalid_string: diff --git a/src/yajl_lex.h b/src/yajl_lex.h index b23cb073..a8c5400e 100644 --- a/src/yajl_lex.h +++ b/src/yajl_lex.h @@ -103,7 +103,8 @@ typedef enum { yajl_lex_string_invalid_utf8, yajl_lex_string_invalid_escaped_char, yajl_lex_string_invalid_json_char, - yajl_lex_string_invalid_hex_char, + yajl_lex_string_invalid_hex_u_char, + yajl_lex_string_invalid_hex_x_char, yajl_lex_invalid_char, yajl_lex_invalid_string, yajl_lex_missing_integer_after_decimal, diff --git a/test/parsing/cases/a5_codepoints_from_hex.json b/test/parsing/cases/a5_codepoints_from_hex.json new file mode 100644 index 00000000..d0d88d58 --- /dev/null +++ b/test/parsing/cases/a5_codepoints_from_hex.json @@ -0,0 +1 @@ +"\x0a\x07\x21\x40\x7c" diff --git a/test/parsing/cases/a5_codepoints_from_hex.json.gold b/test/parsing/cases/a5_codepoints_from_hex.json.gold new file mode 100644 index 00000000..5e3f00aa --- /dev/null +++ b/test/parsing/cases/a5_codepoints_from_hex.json.gold @@ -0,0 +1,3 @@ +string: ' +!@|' +memory leaks: 0 diff --git a/test/parsing/cases/a5_invalid_hex_char.json b/test/parsing/cases/a5_invalid_hex_char.json new file mode 100644 index 00000000..056beb5a --- /dev/null +++ b/test/parsing/cases/a5_invalid_hex_char.json @@ -0,0 +1 @@ +"yabba dabba do \x1g !!" diff --git a/test/parsing/cases/a5_invalid_hex_char.json.gold b/test/parsing/cases/a5_invalid_hex_char.json.gold new file mode 100644 index 00000000..848a31ad --- /dev/null +++ b/test/parsing/cases/a5_invalid_hex_char.json.gold @@ -0,0 +1,2 @@ +lexical error: invalid (non-hex) character occurs after '\x' inside string. +memory leaks: 0 From b626c6ebb44e42010a9b54e465680b79bd71f337 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Sat, 25 Aug 2018 01:01:12 -0500 Subject: [PATCH 14/18] Support for 'single-quoted strings', with tests Also adds missing character flag VIC for 'r'. The a5_spec_example test was copied from the JSON5 spec. --- src/yajl_lex.c | 25 ++++++++++++-------- test/parsing/cases/a5_spec_example.json | 12 ++++++++++ test/parsing/cases/a5_spec_example.json.gold | 25 ++++++++++++++++++++ test/parsing/cases/a5_strings.json | 4 ++-- 4 files changed, 54 insertions(+), 12 deletions(-) create mode 100644 test/parsing/cases/a5_spec_example.json create mode 100644 test/parsing/cases/a5_spec_example.json.gold diff --git a/src/yajl_lex.c b/src/yajl_lex.c index 5aebccdd..701155f0 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -149,7 +149,7 @@ static const char charLookupTable[256] = /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , -/*20*/ 0 , 0 , NFP|VEC|IJC, 0 , VIC , 0 , 0 , 0 , +/*20*/ 0 , 0 , NFP|VEC, 0 , VIC , 0 , 0 , NFP|VEC, /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC , /*30*/ VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, /*38*/ VHC|VIC, VHC|VIC, 0 , 0 , 0 , 0 , 0 , 0 , @@ -161,7 +161,7 @@ static const char charLookupTable[256] = /*60*/ 0 , VHC|VIC, VEC|VHC|VIC, VHC|VIC, VHC|VIC, VHC|VIC, VEC|VHC|VIC, VIC, /*68*/ VIC , VIC , VIC , VIC , VIC , VIC , VEC|VIC, VIC , -/*70*/ VIC , VIC , VEC , VIC , VEC|VIC, VIC , VIC , VIC , +/*70*/ VIC , VIC , VEC|VIC, VIC , VEC|VIC, VIC , VIC , VIC , /*78*/ VIC , VIC , VIC , 0 , 0 , 0 , 0 , 0 , NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , @@ -273,7 +273,7 @@ yajl_string_scan(const unsigned char * buf, size_t len, int utf8check) static yajl_tok yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, - size_t jsonTextLen, size_t * offset) + size_t jsonTextLen, size_t * offset, const char quote) { yajl_tok tok = yajl_tok_error; int hasEscapes = 0; @@ -308,7 +308,7 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, curChar = readChar(lexer, jsonText, offset); /* quote terminates */ - if (curChar == '"') { + if (curChar == quote) { tok = yajl_tok_string; break; } @@ -402,7 +402,7 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, static yajl_tok yajl_lex_identifier(yajl_lexer lexer, const unsigned char * jsonText, - size_t jsonTextLen, size_t * offset) + size_t jsonTextLen, size_t * offset) { unsigned char c; @@ -688,9 +688,11 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, } goto lexed; } + case '\'': + if (!lexer->allowJson5) goto invalid; + /* Fall through... */ case '"': { - tok = yajl_lex_string(lexer, (const unsigned char *) jsonText, - jsonTextLen, offset); + tok = yajl_lex_string(lexer, jsonText, jsonTextLen, offset, c); goto lexed; } case '+': case '.': @@ -701,8 +703,7 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, case '5': case '6': case '7': case '8': case '9': { /* integer parsing wants to start from the beginning */ unreadChar(lexer, offset); - tok = yajl_lex_number(lexer, (const unsigned char *) jsonText, - jsonTextLen, offset); + tok = yajl_lex_number(lexer, jsonText, jsonTextLen, offset); goto lexed; } case '/': @@ -813,8 +814,11 @@ yajl_tok yajl_lex_key(yajl_lexer lexer, const unsigned char * jsonText, case '}': tok = yajl_tok_right_brace; goto lexed; + case '\'': + if (!lexer->allowJson5) goto invalid; + /* Fall through... */ case '"': { - tok = yajl_lex_string(lexer, jsonText, jsonTextLen, offset); + tok = yajl_lex_string(lexer, jsonText, jsonTextLen, offset, c); goto lexed; } case '/': @@ -849,6 +853,7 @@ yajl_tok yajl_lex_key(yajl_lexer lexer, const unsigned char * jsonText, tok = yajl_lex_identifier(lexer, jsonText, jsonTextLen, offset); } else { + invalid: lexer->error = yajl_lex_invalid_char; tok = yajl_tok_error; } diff --git a/test/parsing/cases/a5_spec_example.json b/test/parsing/cases/a5_spec_example.json new file mode 100644 index 00000000..0e72c646 --- /dev/null +++ b/test/parsing/cases/a5_spec_example.json @@ -0,0 +1,12 @@ +{ + // comments + unquoted: 'and you can quote me on that', + singleQuotes: 'I can use "double quotes" here', + lineBreaks: "Look, Mom! \ +No \\n's!", + hexadecimal: 0xdecaf, + leadingDecimalPoint: .8675309, andTrailing: 8675309., + positiveSign: +1, + trailingComma: 'in objects', andIn: ['arrays',], + "backwardsCompatible": "with JSON", +} diff --git a/test/parsing/cases/a5_spec_example.json.gold b/test/parsing/cases/a5_spec_example.json.gold new file mode 100644 index 00000000..bb6b713d --- /dev/null +++ b/test/parsing/cases/a5_spec_example.json.gold @@ -0,0 +1,25 @@ +map open '{' +key: 'unquoted' +string: 'and you can quote me on that' +key: 'singleQuotes' +string: 'I can use "double quotes" here' +key: 'lineBreaks' +string: 'Look, Mom! No \n's!' +key: 'hexadecimal' +integer: 912559 +key: 'leadingDecimalPoint' +double: 0.867531 +key: 'andTrailing' +double: 8.67531e+06 +key: 'positiveSign' +integer: 1 +key: 'trailingComma' +string: 'in objects' +key: 'andIn' +array open '[' +string: 'arrays' +array close ']' +key: 'backwardsCompatible' +string: 'with JSON' +map close '}' +memory leaks: 0 diff --git a/test/parsing/cases/a5_strings.json b/test/parsing/cases/a5_strings.json index fdb49a00..97dacc38 100644 --- a/test/parsing/cases/a5_strings.json +++ b/test/parsing/cases/a5_strings.json @@ -1,5 +1,5 @@ [ - "Hello\!", + 'Hello\!', "\"Evenin\',\" said the barman.", // The following string has 3 different escaped line-endings, // LF, CR, and CR+LF, which all disappear from the final string. @@ -7,5 +7,5 @@ hi \ there \ y'all!", "\b\f\n\r\t\v\\", - "\A\C\/\D\C", + '\A\C\/\D\C', ] From 48bd6ada50e634ccbfcb37266f117805f18dbd1f Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Sat, 25 Aug 2018 01:29:27 -0500 Subject: [PATCH 15/18] Clean up the formatting of some macros --- src/yajl_gen.c | 33 +++++++++++++++++---------------- src/yajl_lex.c | 2 +- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/yajl_gen.c b/src/yajl_gen.c index a6d7bfbe..0dfd030b 100644 --- a/src/yajl_gen.c +++ b/src/yajl_gen.c @@ -142,17 +142,17 @@ yajl_gen_free(yajl_gen g) } #define INSERT_SEP \ - if (g->state[g->depth] == yajl_gen_map_key || \ - g->state[g->depth] == yajl_gen_in_array) { \ - g->print(g->ctx, ",", 1); \ - if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); \ - } else if (g->state[g->depth] == yajl_gen_map_val) { \ - g->print(g->ctx, ":", 1); \ - if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, " ", 1); \ - } - -#define INSERT_WHITESPACE \ - if ((g->flags & yajl_gen_beautify)) { \ + if (g->state[g->depth] == yajl_gen_map_key || \ + g->state[g->depth] == yajl_gen_in_array) { \ + g->print(g->ctx, ",", 1); \ + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); \ + } else if (g->state[g->depth] == yajl_gen_map_val) { \ + g->print(g->ctx, ":", 1); \ + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, " ", 1); \ + } + +#define INSERT_WHITESPACE \ + if ((g->flags & yajl_gen_beautify)) { \ if (g->state[g->depth] != yajl_gen_map_val) { \ unsigned int _i; \ for (_i=0;_idepth;_i++) \ @@ -171,8 +171,8 @@ yajl_gen_free(yajl_gen g) /* check that we're not complete, or in error state. in a valid state * to be generating */ #define ENSURE_VALID_STATE \ - if (g->state[g->depth] == yajl_gen_error) { \ - return yajl_gen_in_error_state;\ + if (g->state[g->depth] == yajl_gen_error) { \ + return yajl_gen_in_error_state; \ } else if (g->state[g->depth] == yajl_gen_complete) { \ return yajl_gen_generation_complete; \ } @@ -202,8 +202,9 @@ yajl_gen_free(yajl_gen g) break; \ } \ -#define FINAL_NEWLINE \ - if ((g->flags & yajl_gen_beautify) && g->state[g->depth] == yajl_gen_complete) \ +#define FINAL_NEWLINE \ + if ((g->flags & yajl_gen_beautify) && \ + g->state[g->depth] == yajl_gen_complete) \ g->print(g->ctx, "\n", 1); yajl_gen_status @@ -238,7 +239,7 @@ yajl_gen_double(yajl_gen g, double number) } else { special = 0; - sprintf(i, "%.20g", number); + sprintf(i, "%.17g", number); if (strspn(i, "0123456789-") == strlen(i)) { strcat(i, ".0"); } diff --git a/src/yajl_lex.c b/src/yajl_lex.c index 701155f0..0f9cf4aa 100644 --- a/src/yajl_lex.c +++ b/src/yajl_lex.c @@ -196,7 +196,7 @@ static const char charLookupTable[256] = * * NOTE: on error the offset will point to the first char of the * invalid utf8 */ -#define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; } +#define UTF8_CHECK_EOF if (*offset >= jsonTextLen) return yajl_tok_eof; static yajl_tok yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText, From f95b8e9befdbf8df166d516a943ce0e5b1a4521a Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Fri, 31 Aug 2018 00:33:22 -0500 Subject: [PATCH 16/18] Switch yajl_tree to accept JSON5 --- src/yajl_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yajl_tree.c b/src/yajl_tree.c index 3d357a32..c7fcbc85 100644 --- a/src/yajl_tree.c +++ b/src/yajl_tree.c @@ -430,7 +430,7 @@ yajl_val yajl_tree_parse (const char *input, memset (error_buffer, 0, error_buffer_size); handle = yajl_alloc (&callbacks, NULL, &ctx); - yajl_config(handle, yajl_allow_comments, 1); + yajl_config(handle, yajl_allow_json5, 1); status = yajl_parse(handle, (unsigned char *) input, From aaf8992c5692d80d6c58ea388f710b620e2a5411 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Thu, 30 Aug 2018 00:15:40 -0500 Subject: [PATCH 17/18] Doxygen text and markup fixes and updates Includes some major text reformatting, so this could trigger merge conflicts against other pull requests. --- src/api/yajl_common.h | 26 +++--- src/api/yajl_gen.h | 148 ++++++++++++++++++++------------- src/api/yajl_parse.h | 186 ++++++++++++++++++++++++------------------ 3 files changed, 216 insertions(+), 144 deletions(-) diff --git a/src/api/yajl_common.h b/src/api/yajl_common.h index 9596ef98..4ee7053c 100644 --- a/src/api/yajl_common.h +++ b/src/api/yajl_common.h @@ -23,6 +23,9 @@ extern "C" { #endif +/** A limit used by the generator API, YAJL_MAX_DEPTH is the maximum + * depth to which arrays and maps may be nested. + */ #define YAJL_MAX_DEPTH 128 /* msft dll export gunk. To build a DLL on windows, you @@ -38,33 +41,34 @@ extern "C" { # if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 # define YAJL_API __attribute__ ((visibility("default"))) # else +/** Marks a yajl routine for export from the DLL/shared library. */ # define YAJL_API # endif #endif -/** pointer to a malloc function, supporting client overriding memory - * allocation routines */ +/** Pointer to a malloc() function, supporting client overriding memory + * allocation routines. */ typedef void * (*yajl_malloc_func)(void *ctx, size_t sz); -/** pointer to a free function, supporting client overriding memory - * allocation routines */ +/** Pointer to a free() function, supporting client overriding memory + * allocation routines. */ typedef void (*yajl_free_func)(void *ctx, void * ptr); -/** pointer to a realloc function which can resize an allocation. */ +/** Pointer to a realloc() function which can resize an allocation. */ typedef void * (*yajl_realloc_func)(void *ctx, void * ptr, size_t sz); -/** A structure which can be passed to yajl_*_alloc routines to allow the +/** A structure which can be passed to yajl_*_alloc() routines to allow the * client to specify memory allocation functions to be used. */ typedef struct { - /** pointer to a function that can allocate uninitialized memory */ + /** Pointer to a function that can allocate uninitialized memory. */ yajl_malloc_func malloc; - /** pointer to a function that can resize memory allocations */ + /** Pointer to a function that can resize memory allocations. */ yajl_realloc_func realloc; - /** pointer to a function that can free memory allocated using - * reallocFunction or mallocFunction */ + /** Pointer to a function that can free memory allocated using + * reallocFunction or mallocFunction. */ yajl_free_func free; - /** a context pointer that will be passed to above allocation routines */ + /** A context pointer that will be passed to above allocation routines. */ void * ctx; } yajl_alloc_funcs; diff --git a/src/api/yajl_gen.h b/src/api/yajl_gen.h index 78ed3fb7..fce60aeb 100644 --- a/src/api/yajl_gen.h +++ b/src/api/yajl_gen.h @@ -29,110 +29,145 @@ #ifdef __cplusplus extern "C" { #endif - /** generator status codes */ + /** Generator status codes. */ typedef enum { - /** no error */ + /** No error. */ yajl_gen_status_ok = 0, - /** at a point where a map key is generated, a function other than - * yajl_gen_string was called */ + /** At a point where a map key is generated, a function other than + * yajl_gen_string() was called. */ yajl_gen_keys_must_be_strings, /** YAJL's maximum generation depth was exceeded. see - * YAJL_MAX_DEPTH */ + * \ref YAJL_MAX_DEPTH. */ yajl_max_depth_exceeded, - /** A generator function (yajl_gen_XXX) was called while in an error - * state */ + /** A generator function (yajl_gen_XXX()) was called while in an error + * state. */ yajl_gen_in_error_state, - /** A complete JSON document has been generated */ + /** A complete JSON document has been generated. */ yajl_gen_generation_complete, - /** yajl_gen_double was passed an invalid floating point value + /** yajl_gen_double() was passed an invalid floating point value * (infinity or NaN). */ yajl_gen_invalid_number, /** A print callback was passed in, so there is no internal - * buffer to get from */ + * buffer to get from. */ yajl_gen_no_buf, - /** returned from yajl_gen_string() when the yajl_gen_validate_utf8 - * option is enabled and an invalid was passed by client code. + /** Returned from yajl_gen_string() when the \ref yajl_gen_validate_utf8 + * option is enabled and invalid UTF8 was passed by client code. */ yajl_gen_invalid_string } yajl_gen_status; - /** an opaque handle to a generator */ + /** An opaque handle to a generator */ typedef struct yajl_gen_t * yajl_gen; - /** a callback used for "printing" the results. */ + /** A callback used for "printing" the results. */ typedef void (*yajl_print_t)(void * ctx, const char * str, size_t len); - /** configuration parameters for the parser, these may be passed to - * yajl_gen_config() along with option specific argument(s). In general, - * all configuration parameters default to *off*. */ + /** Configuration parameters for the parser, these may be passed to + * yajl_gen_config() followed by option specific argument(s). In general, + * all boolean configuration parameters default to *off*. */ typedef enum { - /** generate indented (beautiful) output */ + /** + * Generate indented (beautiful) output. + * + * yajl_gen_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_beautify, 1); // Human format please + * \endcode + */ yajl_gen_beautify = 0x01, /** - * Set an indent string which is used when yajl_gen_beautify - * is enabled. Maybe something like \\t or some number of - * spaces. The default is four spaces ' '. + * Set the indent string which is used when \ref yajl_gen_beautify + * is enabled, which may only contain whitespace characters such as + * \c \\t or some number of spaces. The default is four spaces ' '. + * + * yajl_gen_config() argument type: const char * + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_indent_string, " "); // 2 spaces + * \endcode */ yajl_gen_indent_string = 0x02, /** * Set a function and context argument that should be used to - * output generated json. the function should conform to the - * yajl_print_t prototype while the context argument is a + * output the generated json. The function should conform to the + * \ref yajl_print_t prototype while the context argument may be any * void * of your choosing. * - * example: - * yajl_gen_config(g, yajl_gen_print_callback, myFunc, myVoidPtr); + * yajl_gen_config() arguments: \ref yajl_print_t, void * + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_print_callback, myFunc, myVoidPtr); + * \endcode */ yajl_gen_print_callback = 0x04, /** * Normally the generator does not validate that strings you * pass to it via yajl_gen_string() are valid UTF8. Enabling * this option will cause it to do so. + * + * yajl_gen_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_validate_utf8, 1); // Check UTF8 + * \endcode */ yajl_gen_validate_utf8 = 0x08, /** - * the forward solidus (slash or '/' in human) is not required to be + * The forward solidus (slash or '/' in human) is not required to be * escaped in json text. By default, YAJL will not escape it in the * iterest of saving bytes. Setting this flag will cause YAJL to * always escape '/' in generated JSON strings. + * + * yajl_gen_config() argument type: int (boolean) */ yajl_gen_escape_solidus = 0x10, /** * Special numbers such as NaN and Infinity cannot be represented in * the original JSON, but are permitted in JSON5. Setting this flag - * allows yajl_gen_double to output the JSON5 representation of these - * special numbers instead of returning with an error. + * allows YAJL to output the JSON5 representation of these special + * numbers instead of returning with an error, and to emit map keys + * that are valid javascript identifiers without quotes. + * + * yajl_gen_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_gen_config(g, yajl_gen_json5, 1); // Output JSON5 + * \endcode */ yajl_gen_json5 = 0x20, } yajl_gen_option; - /** allow the modification of generator options subsequent to handle - * allocation (via yajl_alloc) - * \returns zero in case of errors, non-zero otherwise + /** Set generator options associated with a generator handle. See the + * \ref yajl_gen_option documentation for details of the available + * options and their arguments. + * \returns Zero in case of error, non-zero otherwise. */ - YAJL_API int yajl_gen_config(yajl_gen g, yajl_gen_option opt, ...); + YAJL_API int yajl_gen_config(yajl_gen hand, yajl_gen_option opt, ...); - /** allocate a generator handle - * \param allocFuncs an optional pointer to a structure which allows - * the client to overide the memory allocation - * used by yajl. May be NULL, in which case - * malloc/free/realloc will be used. + /** Allocate a generator handle + * \param allocFuncs An optional pointer to a structure which allows the + * client to provide memory allocation functions for + * use by yajl. May be \c NULL to use the C runtime + * library's malloc(), free() and realloc(). * - * \returns an allocated handle on success, NULL on failure (bad params) + * \returns An allocated handle on success, \c NULL on failure (bad params) */ YAJL_API yajl_gen yajl_gen_alloc(const yajl_alloc_funcs * allocFuncs); - /** free a generator handle */ - YAJL_API void yajl_gen_free(yajl_gen handle); + /** Free a generator handle. */ + YAJL_API void yajl_gen_free(yajl_gen hand); YAJL_API yajl_gen_status yajl_gen_integer(yajl_gen hand, long long int number); - /** generate a floating point number - * \param number the value to output, which may only be Infinity or NaN - * if the yajl_gen_json5 flag is set, as these values have - * no legal representation in JSON. In these cases the - * generator will return 'yajl_gen_invalid_number' + /** Generate a floating point number. + * \param hand The generator handle. + * \param number The value to output. The values Infinity or NaN are + * only accepted if the \ref yajl_gen_json5 option is set, + * as these values have no legal representation in JSON; + * the generator will return \ref yajl_gen_invalid_number + * otherwise. */ YAJL_API yajl_gen_status yajl_gen_double(yajl_gen hand, double number); YAJL_API yajl_gen_status yajl_gen_number(yajl_gen hand, @@ -148,26 +183,29 @@ extern "C" { YAJL_API yajl_gen_status yajl_gen_array_open(yajl_gen hand); YAJL_API yajl_gen_status yajl_gen_array_close(yajl_gen hand); - /** access the null terminated generator buffer. If incrementally + /** Access the zero-terminated generator buffer. If incrementally * outputing JSON, one should call yajl_gen_clear to clear the * buffer. This allows stream generation. */ YAJL_API yajl_gen_status yajl_gen_get_buf(yajl_gen hand, const unsigned char ** buf, size_t * len); - /** clear yajl's output buffer, but maintain all internal generation - * state. This function will not "reset" the generator state, and is + /** Clear yajl's output buffer, but maintain all internal generation + * state. This function will not reset the generator state, and is * intended to enable incremental JSON outputing. */ YAJL_API void yajl_gen_clear(yajl_gen hand); - /** Reset the generator state. Allows a client to generate multiple - * json entities in a stream. The "sep" string will be inserted to - * separate the previously generated entity from the current, - * NULL means *no separation* of entites (clients beware, generating - * multiple JSON numbers without a separator, for instance, will result in ambiguous output) + /** Reset the generator state. Allows a client to generate multiple + * JSON entities in a stream. + * \param hand The generator handle. + * \param sep This string will be inserted to separate the previously + * generated output from the following; passing \c NULL means + * *no separation* of entites (beware that generating + * multiple JSON numbers without a separator creates + * ambiguous output). * - * Note: this call will not clear yajl's output buffer. This - * may be accomplished explicitly by calling yajl_gen_clear() */ + * Note: This call does not clear yajl's output buffer, which must be + * accomplished explicitly by calling yajl_gen_clear(). */ YAJL_API void yajl_gen_reset(yajl_gen hand, const char * sep); #ifdef __cplusplus diff --git a/src/api/yajl_parse.h b/src/api/yajl_parse.h index 3d6ffaf4..b0d6d147 100644 --- a/src/api/yajl_parse.h +++ b/src/api/yajl_parse.h @@ -29,47 +29,47 @@ #ifdef __cplusplus extern "C" { #endif - /** error codes returned from this interface */ + /** Error codes returned from this interface. */ typedef enum { - /** no error was encountered */ + /** No error was encountered. */ yajl_status_ok, - /** a client callback returned zero, stopping the parse */ + /** A client callback returned zero, stopping the parse. */ yajl_status_client_canceled, - /** An error occured during the parse. Call yajl_get_error for - * more information about the encountered error */ + /** An error occured during the parse. Call yajl_get_error() for + * more information about the encountered error. */ yajl_status_error } yajl_status; - /** attain a human readable, english, string for an error */ + /** Return a human readable, english string for an error code. */ YAJL_API const char * yajl_status_to_string(yajl_status code); - /** an opaque handle to a parser */ + /** An opaque handle to a parser. */ typedef struct yajl_handle_t * yajl_handle; - /** yajl is an event driven parser. this means as json elements are + /** yajl is an event driven parser. This means as json elements are * parsed, you are called back to do something with the data. The * functions in this table indicate the various events for which * you will be called back. Each callback accepts a "context" - * pointer, this is a void * that is passed into the yajl_parse + * pointer, this is a \c void \c * that is passed into the yajl_parse() * function which the client code may use to pass around context. * * All callbacks return an integer. If non-zero, the parse will * continue. If zero, the parse will be canceled and - * yajl_status_client_canceled will be returned from the parse. + * \c yajl_status_client_canceled will be returned from the parse. * - * \attention { + * \attention * A note about the handling of numbers: * + * \attention * yajl will only convert numbers that can be represented in a - * double or a 64 bit (long long) int. All other numbers will - * be passed to the client in string form using the yajl_number - * callback. Furthermore, if yajl_number is not NULL, it will - * always be used to return numbers, that is yajl_integer and - * yajl_double will be ignored. If yajl_number is NULL but one - * of yajl_integer or yajl_double are defined, parsing of a + * double or a 64 bit (long long) int. All other numbers will be + * passed to the client in string form using the yajl_number() + * callback. Furthermore, if yajl_number() is not NULL, it will + * always be used to return numbers, that is yajl_integer() and + * yajl_double() will be ignored. If yajl_number() is NULL but one + * of yajl_integer() or yajl_double() are defined, parsing of a * number larger than is representable in a double or 64 bit * integer will result in a parse error. - * } */ typedef struct { int (* yajl_null)(void * ctx); @@ -77,12 +77,12 @@ extern "C" { int (* yajl_integer)(void * ctx, long long integerVal); int (* yajl_double)(void * ctx, double doubleVal); /** A callback which passes the string representation of the number - * back to the client. Will be used for all numbers when present */ + * back to the client. Will be used for all numbers when present. */ int (* yajl_number)(void * ctx, const char * numberVal, size_t numberLen); - /** strings are returned as pointers into the JSON text when, - * possible, as a result, they are _not_ null padded */ + /** Strings are returned as pointers into the JSON text when + * possible. As a result they are _not_ zero-terminated. */ int (* yajl_string)(void * ctx, const unsigned char * stringVal, size_t stringLen); @@ -95,121 +95,151 @@ extern "C" { int (* yajl_end_array)(void * ctx); } yajl_callbacks; - /** allocate a parser handle - * \param callbacks a yajl callbacks structure specifying the + /** Allocate a parser handle. + * \param callbacks A \c yajl_callbacks structure specifying the * functions to call when different JSON entities - * are encountered in the input text. May be NULL, + * are encountered in the input text. May be \c NULL, * which is only useful for validation. - * \param afs memory allocation functions, may be NULL for to use - * C runtime library routines (malloc and friends) - * \param ctx a context pointer that will be passed to callbacks. + * \param afs Memory allocation functions, may be \c NULL to use the + * C runtime library routines (malloc() and friends). + * \param ctx A context pointer that will be passed to callbacks. */ YAJL_API yajl_handle yajl_alloc(const yajl_callbacks * callbacks, yajl_alloc_funcs * afs, void * ctx); - /** configuration parameters for the parser, these may be passed to - * yajl_config() along with option specific argument(s). In general, - * all configuration parameters default to *off*. */ + /** Configuration parameters for the parser, these should be passed to + * yajl_config() followed by any option specific argument(s). In general, + * all boolean configuration parameters default to *off*. */ typedef enum { - /** Ignore javascript style comments present in - * JSON input. Non-standard, but rather fun - * arguments: toggled off with integer zero, on otherwise. + /** + * Ignore javascript style comments present in JSON input. These are + * not standard in JSON, although they are allowed in JSON5 input. + * + * yajl_config() argument type: int (boolean) * - * example: - * yajl_config(h, yajl_allow_comments, 1); // turn comment support on + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_comments, 1); // turn comment support on + * \endcode */ yajl_allow_comments = 0x01, /** * When set the parser will verify that all strings in JSON input are - * valid UTF8 and will emit a parse error if this is not so. When set, + * valid UTF8 and will emit a parse error if this is not so. When set, * this option makes parsing slightly more expensive (~7% depending - * on processor and compiler in use) + * on the processor and compiler in use). + * + * yajl_config() argument type: int (boolean) * - * example: - * yajl_config(h, yajl_dont_validate_strings, 1); // disable utf8 checking + * Example: \code{.cpp} + * yajl_config(h, yajl_dont_validate_strings, 1); // disable utf8 checking + * \endcode */ yajl_dont_validate_strings = 0x02, /** - * By default, upon calls to yajl_complete_parse(), yajl will - * ensure the entire input text was consumed and will raise an error - * otherwise. Enabling this flag will cause yajl to disable this - * check. This can be useful when parsing json out of a that contains more - * than a single JSON document. + * By default, upon calls to yajl_complete_parse(), yajl will ensure + * the entire input text was consumed and will raise an error + * otherwise. Turning this flag on cause yajl to disable the garbage + * check. This can be useful when parsing JSON out of an input stream + * that contains more than a single JSON document. + * + * yajl_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_trailing_garbage, 1); // non-JSON follows + * \endcode */ yajl_allow_trailing_garbage = 0x04, /** - * Allow multiple values to be parsed by a single handle. The - * entire text must be valid JSON, and values can be seperated - * by any kind of whitespace. This flag will change the - * behavior of the parser, and cause it continue parsing after - * a value is parsed, rather than transitioning into a - * complete state. This option can be useful when parsing multiple - * values from an input stream. + * Allow multiple values to be parsed by a single handle. The entire + * text must be valid JSON, and values can be seperated by any kind of + * whitespace. This flag will change the behavior of the parser, and + * cause it to continue parsing after a value is parsed, rather than + * transitioning into a complete state. This option can be useful when + * parsing multiple values from an input stream. + * + * yajl_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_multiple_values, 1); // multi-doc stream + * \endcode */ yajl_allow_multiple_values = 0x08, /** - * When yajl_complete_parse() is called the parser will - * check that the top level value was completely consumed. I.E., - * if called whilst in the middle of parsing a value - * yajl will enter an error state (premature EOF). Setting this - * flag suppresses that check and the corresponding error. + * When yajl_complete_parse() is called the parser will check that the + * top level value was completely consumed. If called whilst in the + * middle of parsing a value, yajl will enter an error state (premature + * EOF). Setting this flag suppresses that check and the corresponding + * error. + * + * yajl_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_partial_values, 1); // might stop early + * \endcode */ yajl_allow_partial_values = 0x10, /** * The JSON5 standard allows additional formats for numbers, strings - * and object keys which are not permitted in the JSON standard. - * Setting this flag enables JSON5 formats in the lexer and parser. + * and object keys which are not permitted by the JSON standard. + * Setting this flag tells yajl to accept JSON5 standard input. + * This flag also enables \c yajl_allow_comments since comments are + * part of the JSON5 standard. + * + * yajl_config() argument type: int (boolean) + * + * Example: \code{.cpp} + * yajl_config(h, yajl_allow_json5, 1); // We accept JSON5! + * \endcode */ yajl_allow_json5 = 0x20, } yajl_option; - /** allow the modification of parser options subsequent to handle - * allocation (via yajl_alloc) - * \returns zero in case of errors, non-zero otherwise + /** Set parser options associated with a parser handle. See the + * \ref yajl_option documentation for details of the available options + * and their arguments. + * \returns Zero in case of error, non-zero otherwise. */ - YAJL_API int yajl_config(yajl_handle h, yajl_option opt, ...); + YAJL_API int yajl_config(yajl_handle hand, yajl_option opt, ...); - /** free a parser handle */ - YAJL_API void yajl_free(yajl_handle handle); + /** Free a parser handle. */ + YAJL_API void yajl_free(yajl_handle hand); /** Parse some json! - * \param hand - a handle to the json parser allocated with yajl_alloc - * \param jsonText - a pointer to the UTF8 json text to be parsed - * \param jsonTextLength - the length, in bytes, of input text + * \param hand A handle to the json parser allocated with yajl_alloc(). + * \param jsonText A pointer to the UTF8 json text to be parsed. + * \param jsonTextLength The length, in bytes, of input text. */ YAJL_API yajl_status yajl_parse(yajl_handle hand, const unsigned char * jsonText, size_t jsonTextLength); /** Parse any remaining buffered json. + * * Since yajl is a stream-based parser, without an explicit end of * input, yajl sometimes can't decide if content at the end of the - * stream is valid or not. For example, if "1" has been fed in, + * stream is valid or not. For example, if "1" has been fed in, * yajl can't know whether another digit is next or some character * that would terminate the integer token. * - * \param hand - a handle to the json parser allocated with yajl_alloc + * \param hand a handle to the json parser allocated with yajl_alloc(). */ YAJL_API yajl_status yajl_complete_parse(yajl_handle hand); - /** get an error string describing the state of the - * parse. + /** Get an error string describing the state of the parse. * - * If verbose is non-zero, the message will include the JSON - * text where the error occured, along with an arrow pointing to - * the specific char. + * If verbose is non-zero, the message will include the JSON text where + * the error occured, along with an arrow pointing to the specific char. * * \returns A dynamically allocated string will be returned which should - * be freed with yajl_free_error + * be freed with yajl_free_error(). */ YAJL_API unsigned char * yajl_get_error(yajl_handle hand, int verbose, const unsigned char * jsonText, size_t jsonTextLength); - /** - * get the amount of data consumed from the last chunk passed to YAJL. + /** Get the amount of data consumed from the last chunk passed to yajl. * * In the case of a successful parse this can help you understand if * the entire buffer was consumed (which will allow you to handle @@ -222,7 +252,7 @@ extern "C" { */ YAJL_API size_t yajl_get_bytes_consumed(yajl_handle hand); - /** free an error returned from yajl_get_error */ + /** Free an error returned from yajl_get_error(). */ YAJL_API void yajl_free_error(yajl_handle hand, unsigned char * str); #ifdef __cplusplus From e9672c3b2eedafca7e53dbcd7c70b325e970c2a4 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Fri, 31 Aug 2018 00:34:47 -0500 Subject: [PATCH 18/18] Documentation enhancements for yajl_tree --- src/api/yajl_tree.h | 59 +++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/src/api/yajl_tree.h b/src/api/yajl_tree.h index 1c1e06a5..64565f37 100644 --- a/src/api/yajl_tree.h +++ b/src/api/yajl_tree.h @@ -37,7 +37,7 @@ extern "C" { #endif -/** possible data types that a yajl_val_s can hold */ +/** Possible data types that a yajl_val_s can hold */ typedef enum { yajl_t_string = 1, yajl_t_number = 2, @@ -99,33 +99,33 @@ struct yajl_val_s /** * Parse a string. * - * Parses an null-terminated string containing JSON data and returns a pointer + * Parses a zero-terminated string containing JSON5 data and returns a pointer * to the top-level value (root of the parse tree). * * \param input Pointer to a null-terminated utf8 string containing - * JSON data. + * JSON or JSON5 data. * \param error_buffer Pointer to a buffer in which an error message will - * be stored if \em yajl_tree_parse fails, or + * be stored if yajl_tree_parse() fails, or * \c NULL. The buffer will be initialized before * parsing, so its content will be destroyed even if - * \em yajl_tree_parse succeeds. + * yajl_tree_parse() succeeds. * \param error_buffer_size Size of the memory area pointed to by - * \em error_buffer_size. If \em error_buffer_size is - * \c NULL, this argument is ignored. + * \p error_buffer. If \p error_buffer + * is \c NULL, this argument is ignored. * * \returns Pointer to the top-level value or \c NULL on error. The memory - * pointed to must be freed using \em yajl_tree_free. In case of an error, a - * null terminated message describing the error in more detail is stored in - * \em error_buffer if it is not \c NULL. + * pointed to must be freed using yajl_tree_free(). In case of an error, a + * zero-terminated message describing the error in more detail is stored in + * \p error_buffer if it is not \c NULL. */ YAJL_API yajl_val yajl_tree_parse (const char *input, char *error_buffer, size_t error_buffer_size); /** - * Free a parse tree returned by "yajl_tree_parse". + * Free a parse tree returned by yajl_tree_parse(). * - * \param v Pointer to a JSON value returned by "yajl_tree_parse". Passing NULL + * \param v Pointer to a JSON value returned by yajl_tree_parse(). Passing \c NULL * is valid and results in a no-op. */ YAJL_API void yajl_tree_free (yajl_val v); @@ -134,10 +134,10 @@ YAJL_API void yajl_tree_free (yajl_val v); * Access a nested value inside a tree. * * \param parent the node under which you'd like to extract values. - * \param path A null terminated array of strings, each the name of an object key - * \param type the yajl_type of the object you seek, or yajl_t_any if any will do. + * \param path A null terminated array of strings, each the name of an object key. + * \param type the \ref yajl_type of the object you seek, or \ref yajl_t_any if any will do. * - * \returns a pointer to the found value, or NULL if we came up empty. + * \returns a pointer to the found value, or \c NULL if we came up empty. * * Future Ideas: it'd be nice to move path to a string and implement support for * a teeny tiny micro language here, so you can extract array elements, do things @@ -146,7 +146,11 @@ YAJL_API void yajl_tree_free (yajl_val v); */ YAJL_API yajl_val yajl_tree_get(yajl_val parent, const char ** path, yajl_type type); -/* Various convenience macros to check the type of a `yajl_val` */ +/** @name Type Check Macros + * + * Convenience macros to check the type of a \ref yajl_val. + */ +/**@{*/ #define YAJL_IS_STRING(v) (((v) != NULL) && ((v)->type == yajl_t_string)) #define YAJL_IS_NUMBER(v) (((v) != NULL) && ((v)->type == yajl_t_number)) #define YAJL_IS_INTEGER(v) (YAJL_IS_NUMBER(v) && ((v)->u.number.flags & YAJL_NUMBER_INT_VALID)) @@ -156,29 +160,38 @@ YAJL_API yajl_val yajl_tree_get(yajl_val parent, const char ** path, yajl_type t #define YAJL_IS_TRUE(v) (((v) != NULL) && ((v)->type == yajl_t_true )) #define YAJL_IS_FALSE(v) (((v) != NULL) && ((v)->type == yajl_t_false )) #define YAJL_IS_NULL(v) (((v) != NULL) && ((v)->type == yajl_t_null )) +/**@}*/ + +/** @name Value Get Macros + * + * Macros to fetch values from a \ref yajl_val. + */ +/**@{*/ -/** Given a yajl_val_string return a ptr to the bare string it contains, - * or NULL if the value is not a string. */ +/** Given a \ref yajl_t_string return a ptr to the bare string it contains, + * or \c NULL if the value is not a string. */ #define YAJL_GET_STRING(v) (YAJL_IS_STRING(v) ? (v)->u.string : NULL) /** Get the string representation of a number. You should check type first, - * perhaps using YAJL_IS_NUMBER */ + * perhaps using \ref YAJL_IS_NUMBER */ #define YAJL_GET_NUMBER(v) ((v)->u.number.r) /** Get the double representation of a number. You should check type first, - * perhaps using YAJL_IS_DOUBLE */ + * perhaps using \ref YAJL_IS_DOUBLE */ #define YAJL_GET_DOUBLE(v) ((v)->u.number.d) /** Get the 64bit (long long) integer representation of a number. You should - * check type first, perhaps using YAJL_IS_INTEGER */ + * check type first, perhaps using \ref YAJL_IS_INTEGER */ #define YAJL_GET_INTEGER(v) ((v)->u.number.i) -/** Get a pointer to a yajl_val_object or NULL if the value is not an object. */ +/** Get a pointer to a \ref yajl_t_object or \c NULL if the value is not an object. */ #define YAJL_GET_OBJECT(v) (YAJL_IS_OBJECT(v) ? &(v)->u.object : NULL) -/** Get a pointer to a yajl_val_array or NULL if the value is not an object. */ +/** Get a pointer to a \ref yajl_t_array or \c NULL if the value is not an object. */ #define YAJL_GET_ARRAY(v) (YAJL_IS_ARRAY(v) ? &(v)->u.array : NULL) +/**@}*/ + #ifdef __cplusplus } #endif