From ef0c147a6ec2124f96186b703b923b31ec79391c Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Tue, 20 Aug 2024 11:43:34 -0400 Subject: [PATCH] [WIP] Gracefully handle validating empty JSONL datasets Signed-off-by: Juan Cruz Viotti --- DEPENDENCIES | 2 +- src/command_metaschema.cc | 6 +- src/command_test.cc | 2 +- src/command_validate.cc | 11 +- src/utils.cc | 12 +- src/utils.h | 1 + test/CMakeLists.txt | 2 + test/test/fail_multi_resolve.sh | 2 +- test/test/fail_multi_resolve_verbose.sh | 2 +- test/test/fail_true_resolve_fragment.sh | 4 +- test/test/fail_true_single_resolve.sh | 2 +- test/test/fail_true_single_resolve_verbose.sh | 2 +- test/validate/fail_2019_09.sh | 4 +- test/validate/fail_2020_12.sh | 4 +- test/validate/fail_draft4.sh | 4 +- test/validate/fail_draft6.sh | 4 +- test/validate/fail_draft7.sh | 4 +- test/validate/fail_jsonl_all.sh | 4 +- test/validate/fail_jsonl_all_verbose.sh | 4 +- test/validate/fail_jsonl_one.sh | 4 +- test/validate/fail_jsonl_one_verbose.sh | 6 +- test/validate/fail_many.sh | 4 +- test/validate/fail_many_verbose.sh | 4 +- test/validate/pass_jsonl_empty.sh | 28 + test/validate/pass_jsonl_empty_verbose.sh | 30 + test/validate/pass_jsonl_verbose.sh | 4 +- vendor/jsontoolkit/src/jsonl/iterator.cc | 21 +- .../src/jsonschema/compile_describe.cc | 542 ++++++++++++++++-- .../src/jsonschema/compile_evaluate.cc | 14 +- .../jsontoolkit/jsonschema_compile.h | 13 +- vendor/jsontoolkit/src/uri/uri.cc | 11 + 31 files changed, 662 insertions(+), 95 deletions(-) create mode 100755 test/validate/pass_jsonl_empty.sh create mode 100755 test/validate/pass_jsonl_empty_verbose.sh diff --git a/DEPENDENCIES b/DEPENDENCIES index 0777ef0f..f2ee8c85 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,4 +1,4 @@ vendorpull https://github.com/sourcemeta/vendorpull dea311b5bfb53b6926a4140267959ae334d3ecf4 noa https://github.com/sourcemeta/noa 7e26abce7a4e31e86a16ef2851702a56773ca527 -jsontoolkit https://github.com/sourcemeta/jsontoolkit 00251a4161434463c24bc18418e3ffd37f998f29 +jsontoolkit https://github.com/sourcemeta/jsontoolkit 4d1dfef7be91ecadd810370b3d8a1d2e591bf574 hydra https://github.com/sourcemeta/hydra 3c53d3fdef79e9ba603d48470a508cc45472a0dc diff --git a/src/command_metaschema.cc b/src/command_metaschema.cc index 15fd8480..ee80e187 100644 --- a/src/command_metaschema.cc +++ b/src/command_metaschema.cc @@ -30,9 +30,9 @@ auto intelligence::jsonschema::cli::metaschema( const auto dialect{sourcemeta::jsontoolkit::dialect(entry.second)}; assert(dialect.has_value()); + const auto metaschema{ + sourcemeta::jsontoolkit::metaschema(entry.second, custom_resolver)}; if (!cache.contains(dialect.value())) { - const auto metaschema{ - sourcemeta::jsontoolkit::metaschema(entry.second, custom_resolver)}; const auto metaschema_template{sourcemeta::jsontoolkit::compile( metaschema, sourcemeta::jsontoolkit::default_schema_walker, custom_resolver, sourcemeta::jsontoolkit::default_schema_compiler)}; @@ -43,7 +43,7 @@ auto intelligence::jsonschema::cli::metaschema( if (sourcemeta::jsontoolkit::evaluate( cache.at(dialect.value()), entry.second, sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast, - pretty_evaluate_callback(error, + pretty_evaluate_callback(error, metaschema, sourcemeta::jsontoolkit::empty_pointer))) { log_verbose(options) << entry.first.string() diff --git a/src/command_test.cc b/src/command_test.cc index aa009b7d..8d3dd204 100644 --- a/src/command_test.cc +++ b/src/command_test.cc @@ -242,7 +242,7 @@ auto intelligence::jsonschema::cli::test( schema_template, get_data(test_case, entry.first.parent_path(), verbose), sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast, - pretty_evaluate_callback(error, {"$ref"}))}; + pretty_evaluate_callback(error, schema.value(), {"$ref"}))}; std::ostringstream test_case_description; if (test_case.defines("description")) { diff --git a/src/command_validate.cc b/src/command_validate.cc index d3e79eb4..3405c153 100644 --- a/src/command_validate.cc +++ b/src/command_validate.cc @@ -67,6 +67,7 @@ auto intelligence::jsonschema::cli::validate( auto stream{sourcemeta::jsontoolkit::read_file(instance_path)}; try { for (const auto &instance : sourcemeta::jsontoolkit::JSONL{stream}) { + index += 1; std::ostringstream error; bool subresult = true; if (benchmark) { @@ -88,7 +89,7 @@ auto intelligence::jsonschema::cli::validate( schema_template, instance, sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast, pretty_evaluate_callback( - error, sourcemeta::jsontoolkit::empty_pointer)); + error, instance, sourcemeta::jsontoolkit::empty_pointer)); } if (subresult) { @@ -110,13 +111,15 @@ auto intelligence::jsonschema::cli::validate( result = false; break; } - - index += 1; } } catch (const sourcemeta::jsontoolkit::ParseError &error) { // For producing better error messages throw sourcemeta::jsontoolkit::FileParseError(instance_path, error); } + + if (index == 0) { + log_verbose(options) << "warning: The JSONL file is empty\n"; + } } else { const auto instance{sourcemeta::jsontoolkit::from_file(instance_path)}; std::ostringstream error; @@ -139,7 +142,7 @@ auto intelligence::jsonschema::cli::validate( subresult = sourcemeta::jsontoolkit::evaluate( schema_template, instance, sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast, - pretty_evaluate_callback(error, + pretty_evaluate_callback(error, instance, sourcemeta::jsontoolkit::empty_pointer)); } diff --git a/src/utils.cc b/src/utils.cc index 18f35f04..60a25058 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -176,23 +176,27 @@ auto parse_options(const std::span &arguments, } auto pretty_evaluate_callback(std::ostringstream &output, + const sourcemeta::jsontoolkit::JSON &instance, const sourcemeta::jsontoolkit::Pointer &base) -> sourcemeta::jsontoolkit::SchemaCompilerEvaluationCallback { output << "error: Schema validation failure\n"; - return [&output, &base]( + return [&output, &instance, &base]( const sourcemeta::jsontoolkit::SchemaCompilerEvaluationType, const bool result, const sourcemeta::jsontoolkit::SchemaCompilerTemplate::value_type &step, const sourcemeta::jsontoolkit::Pointer &evaluate_path, const sourcemeta::jsontoolkit::Pointer &instance_location, - const sourcemeta::jsontoolkit::JSON &, - const sourcemeta::jsontoolkit::JSON &) -> void { + const sourcemeta::jsontoolkit::JSON &annotation) -> void { if (result) { return; } - output << " " << sourcemeta::jsontoolkit::describe(step) << "\n"; + output << " " + << sourcemeta::jsontoolkit::describe(result, step, evaluate_path, + instance_location, instance, + annotation) + << "\n"; output << " at instance location \""; sourcemeta::jsontoolkit::stringify(instance_location, output); output << "\"\n"; diff --git a/src/utils.h b/src/utils.h index dcc80938..f7d172e6 100644 --- a/src/utils.h +++ b/src/utils.h @@ -28,6 +28,7 @@ auto for_each_json(const std::vector &arguments, std::pair>; auto pretty_evaluate_callback(std::ostringstream &, + const sourcemeta::jsontoolkit::JSON &, const sourcemeta::jsontoolkit::Pointer &) -> sourcemeta::jsontoolkit::SchemaCompilerEvaluationCallback; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 829295ea..4da6ec16 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -66,6 +66,8 @@ add_jsonschema_test_unix(validate/fail_draft7) add_jsonschema_test_unix(validate/fail_2019_09) add_jsonschema_test_unix(validate/fail_2020_12) add_jsonschema_test_unix(validate/pass_jsonl) +add_jsonschema_test_unix(validate/pass_jsonl_empty) +add_jsonschema_test_unix(validate/pass_jsonl_empty_verbose) add_jsonschema_test_unix(validate/pass_jsonl_verbose) add_jsonschema_test_unix(validate/fail_jsonl_invalid_entry) add_jsonschema_test_unix(validate/fail_jsonl_one) diff --git a/test/test/fail_multi_resolve.sh b/test/test/fail_multi_resolve.sh index e1f82e77..0fc8a95d 100755 --- a/test/test/fail_multi_resolve.sh +++ b/test/test/fail_multi_resolve.sh @@ -52,7 +52,7 @@ $(realpath "$TMP")/test.json: 2/4 FAIL First failure error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type object at instance location "" at evaluate path "/type" diff --git a/test/test/fail_multi_resolve_verbose.sh b/test/test/fail_multi_resolve_verbose.sh index 07b12b4e..5cd7ff39 100755 --- a/test/test/fail_multi_resolve_verbose.sh +++ b/test/test/fail_multi_resolve_verbose.sh @@ -54,7 +54,7 @@ $(realpath "$TMP")/test.json: 2/4 FAIL First failure error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type object at instance location "" at evaluate path "/type" diff --git a/test/test/fail_true_resolve_fragment.sh b/test/test/fail_true_resolve_fragment.sh index 9cb3dc1f..2c6f2673 100755 --- a/test/test/fail_true_resolve_fragment.sh +++ b/test/test/fail_true_resolve_fragment.sh @@ -40,10 +40,10 @@ $(realpath "$TMP")/test.json: 1/1 FAIL Fail error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type object at instance location "" at evaluate path "/type" - Mark the current position of the evaluation process for future jumps + The object value was expected to validate against the statically referenced schema at instance location "" at evaluate path "" EOF diff --git a/test/test/fail_true_single_resolve.sh b/test/test/fail_true_single_resolve.sh index 3fb8d05d..0e6f63b4 100755 --- a/test/test/fail_true_single_resolve.sh +++ b/test/test/fail_true_single_resolve.sh @@ -47,7 +47,7 @@ $(realpath "$TMP")/test.json: 2/3 FAIL Unexpected error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type object at instance location "" at evaluate path "/type" EOF diff --git a/test/test/fail_true_single_resolve_verbose.sh b/test/test/fail_true_single_resolve_verbose.sh index bb8a9176..f6f613bb 100755 --- a/test/test/fail_true_single_resolve_verbose.sh +++ b/test/test/fail_true_single_resolve_verbose.sh @@ -49,7 +49,7 @@ $(realpath "$TMP")/test.json: 2/3 FAIL Unexpected error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type object at instance location "" at evaluate path "/type" diff --git a/test/validate/fail_2019_09.sh b/test/validate/fail_2019_09.sh index 3a7465b5..182b4ce5 100755 --- a/test/validate/fail_2019_09.sh +++ b/test/validate/fail_2019_09.sh @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" fail: $(realpath "$TMP")/instance.json error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type integer at instance location "/foo" at evaluate path "/properties/foo/type" - The target is expected to match all of the given assertions + The object value was expected to validate against the single defined property subschema at instance location "" at evaluate path "/properties" EOF diff --git a/test/validate/fail_2020_12.sh b/test/validate/fail_2020_12.sh index 67b2298a..61a21605 100755 --- a/test/validate/fail_2020_12.sh +++ b/test/validate/fail_2020_12.sh @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" fail: $(realpath "$TMP")/instance.json error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type integer at instance location "/foo" at evaluate path "/properties/foo/type" - The target is expected to match all of the given assertions + The object value was expected to validate against the single defined property subschema at instance location "" at evaluate path "/properties" EOF diff --git a/test/validate/fail_draft4.sh b/test/validate/fail_draft4.sh index cea69457..ae43eeff 100755 --- a/test/validate/fail_draft4.sh +++ b/test/validate/fail_draft4.sh @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" fail: $(realpath "$TMP")/instance.json error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type integer at instance location "/foo" at evaluate path "/properties/foo/type" - The target is expected to match all of the given assertions + The object value was expected to validate against the single defined property subschema at instance location "" at evaluate path "/properties" EOF diff --git a/test/validate/fail_draft6.sh b/test/validate/fail_draft6.sh index e9442d74..98bc14b8 100755 --- a/test/validate/fail_draft6.sh +++ b/test/validate/fail_draft6.sh @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" fail: $(realpath "$TMP")/instance.json error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type integer at instance location "/foo" at evaluate path "/properties/foo/type" - The target is expected to match all of the given assertions + The object value was expected to validate against the single defined property subschema at instance location "" at evaluate path "/properties" EOF diff --git a/test/validate/fail_draft7.sh b/test/validate/fail_draft7.sh index 3bdd02fe..0f34fe89 100755 --- a/test/validate/fail_draft7.sh +++ b/test/validate/fail_draft7.sh @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" fail: $(realpath "$TMP")/instance.json error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type integer at instance location "/foo" at evaluate path "/properties/foo/type" - The target is expected to match all of the given assertions + The object value was expected to validate against the single defined property subschema at instance location "" at evaluate path "/properties" EOF diff --git a/test/validate/fail_jsonl_all.sh b/test/validate/fail_jsonl_all.sh index d11a8942..2567f660 100755 --- a/test/validate/fail_jsonl_all.sh +++ b/test/validate/fail_jsonl_all.sh @@ -25,14 +25,14 @@ EOF test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" -fail: $(realpath "$TMP")/instance.jsonl (entry #0) +fail: $(realpath "$TMP")/instance.jsonl (entry #1) { "foo": 1 } error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type array but it was of type object at instance location "" at evaluate path "/type" EOF diff --git a/test/validate/fail_jsonl_all_verbose.sh b/test/validate/fail_jsonl_all_verbose.sh index ac8d59e1..5a7b570a 100755 --- a/test/validate/fail_jsonl_all_verbose.sh +++ b/test/validate/fail_jsonl_all_verbose.sh @@ -26,14 +26,14 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl -fail: $(realpath "$TMP")/instance.jsonl (entry #0) +fail: $(realpath "$TMP")/instance.jsonl (entry #1) { "foo": 1 } error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type array but it was of type object at instance location "" at evaluate path "/type" EOF diff --git a/test/validate/fail_jsonl_one.sh b/test/validate/fail_jsonl_one.sh index 42b1e961..bc219c5e 100755 --- a/test/validate/fail_jsonl_one.sh +++ b/test/validate/fail_jsonl_one.sh @@ -25,7 +25,7 @@ EOF test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" -fail: $(realpath "$TMP")/instance.jsonl (entry #1) +fail: $(realpath "$TMP")/instance.jsonl (entry #2) [ { @@ -34,7 +34,7 @@ fail: $(realpath "$TMP")/instance.jsonl (entry #1) ] error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type object but it was of type array at instance location "" at evaluate path "/type" EOF diff --git a/test/validate/fail_jsonl_one_verbose.sh b/test/validate/fail_jsonl_one_verbose.sh index 786c9a95..7c3fdb31 100755 --- a/test/validate/fail_jsonl_one_verbose.sh +++ b/test/validate/fail_jsonl_one_verbose.sh @@ -26,9 +26,9 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl -ok: $(realpath "$TMP")/instance.jsonl (entry #0) +ok: $(realpath "$TMP")/instance.jsonl (entry #1) matches $(realpath "$TMP")/schema.json -fail: $(realpath "$TMP")/instance.jsonl (entry #1) +fail: $(realpath "$TMP")/instance.jsonl (entry #2) [ { @@ -37,7 +37,7 @@ fail: $(realpath "$TMP")/instance.jsonl (entry #1) ] error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type object but it was of type array at instance location "" at evaluate path "/type" EOF diff --git a/test/validate/fail_many.sh b/test/validate/fail_many.sh index d29c2c27..4134715b 100755 --- a/test/validate/fail_many.sh +++ b/test/validate/fail_many.sh @@ -42,10 +42,10 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" fail: $(realpath "$TMP")/instance_2.json error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type integer at instance location "/foo" at evaluate path "/properties/foo/type" - The target is expected to match all of the given assertions + The object value was expected to validate against the single defined property subschema at instance location "" at evaluate path "/properties" EOF diff --git a/test/validate/fail_many_verbose.sh b/test/validate/fail_many_verbose.sh index 3e010b9d..90e41af7 100755 --- a/test/validate/fail_many_verbose.sh +++ b/test/validate/fail_many_verbose.sh @@ -44,10 +44,10 @@ ok: $(realpath "$TMP")/instance_1.json matches $(realpath "$TMP")/schema.json fail: $(realpath "$TMP")/instance_2.json error: Schema validation failure - The target document is expected to be of the given type + The value was expected to be of type string but it was of type integer at instance location "/foo" at evaluate path "/properties/foo/type" - The target is expected to match all of the given assertions + The object value was expected to validate against the single defined property subschema at instance location "" at evaluate path "/properties" ok: $(realpath "$TMP")/instance_3.json diff --git a/test/validate/pass_jsonl_empty.sh b/test/validate/pass_jsonl_empty.sh new file mode 100755 index 00000000..15526a96 --- /dev/null +++ b/test/validate/pass_jsonl_empty.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/schema.json" +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "properties": { + "foo": { + "type": "string" + } + } +} +EOF + +touch "$TMP/instance.jsonl" + +"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2> "$TMP/output.txt" 1>&2 + +cat << EOF > "$TMP/expected.txt" +EOF + +diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/validate/pass_jsonl_empty_verbose.sh b/test/validate/pass_jsonl_empty_verbose.sh new file mode 100755 index 00000000..2fd4ecd9 --- /dev/null +++ b/test/validate/pass_jsonl_empty_verbose.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/schema.json" +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "properties": { + "foo": { + "type": "string" + } + } +} +EOF + +touch "$TMP/instance.jsonl" + +"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" --verbose 2> "$TMP/output.txt" 1>&2 + +cat << EOF > "$TMP/expected.txt" +Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl +warning: The JSONL file is empty +EOF + +diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/validate/pass_jsonl_verbose.sh b/test/validate/pass_jsonl_verbose.sh index 0b20dcb9..e94e0735 100755 --- a/test/validate/pass_jsonl_verbose.sh +++ b/test/validate/pass_jsonl_verbose.sh @@ -28,12 +28,12 @@ EOF cat << EOF > "$TMP/expected.txt" Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl -ok: $(realpath "$TMP")/instance.jsonl (entry #0) - matches $(realpath "$TMP")/schema.json ok: $(realpath "$TMP")/instance.jsonl (entry #1) matches $(realpath "$TMP")/schema.json ok: $(realpath "$TMP")/instance.jsonl (entry #2) matches $(realpath "$TMP")/schema.json +ok: $(realpath "$TMP")/instance.jsonl (entry #3) + matches $(realpath "$TMP")/schema.json EOF diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/vendor/jsontoolkit/src/jsonl/iterator.cc b/vendor/jsontoolkit/src/jsonl/iterator.cc index 1efe4cec..10dbdb1e 100644 --- a/vendor/jsontoolkit/src/jsonl/iterator.cc +++ b/vendor/jsontoolkit/src/jsonl/iterator.cc @@ -15,7 +15,26 @@ struct ConstJSONLIterator::Internal { */ auto ConstJSONLIterator::parse_next() -> JSON { - if (this->data && !this->data->eof()) { + while (this->data && !this->data->eof()) { + switch (this->data->peek()) { + // Whitespace + case internal::token_jsonl_whitespace_space: + case internal::token_jsonl_whitespace_tabulation: + case internal::token_jsonl_whitespace_carriage_return: + this->column += 1; + this->data->ignore(1); + break; + case JSON::CharTraits::eof(): + this->data = nullptr; + break; + default: + goto parse_start; + } + } + +parse_start: + if (this->data) { + assert(!this->data->eof()); return parse(*this->data, this->line, this->column); } else { // Just as a cheap placeholder diff --git a/vendor/jsontoolkit/src/jsonschema/compile_describe.cc b/vendor/jsontoolkit/src/jsonschema/compile_describe.cc index fef2fb5a..ec37149f 100644 --- a/vendor/jsontoolkit/src/jsonschema/compile_describe.cc +++ b/vendor/jsontoolkit/src/jsonschema/compile_describe.cc @@ -1,23 +1,188 @@ #include -#include // std::visit +#include // std::any_of +#include // assert +#include // std::ostringstream +#include // std::visit namespace { using namespace sourcemeta::jsontoolkit; +template +auto step_value(const SchemaCompilerStepValue &value) -> const T & { + assert(std::holds_alternative(value)); + return std::get(value); +} + +template auto step_value(const T &step) -> decltype(auto) { + return step_value(step.value); +} + +auto to_string(const JSON::Type type) -> std::string { + // Otherwise the type "real" might not make a lot + // of sense to JSON Schema users + if (type == JSON::Type::Real) { + return "number"; + } else { + std::ostringstream result; + result << type; + return result.str(); + } +} + +auto escape_string(const std::string &input) -> std::string { + std::ostringstream result; + result << '"'; + + for (const auto character : input) { + if (character == '"') { + result << "\\\""; + } else { + result << character; + } + } + + result << '"'; + return result.str(); +} + +auto describe_type_check(const bool valid, const JSON::Type current, + const JSON::Type expected, + std::ostringstream &message) -> void { + message << "The value was expected to be of type "; + message << to_string(expected); + if (!valid) { + message << " but it was of type "; + message << to_string(current); + } +} + +auto describe_types_check(const bool valid, const JSON::Type current, + const std::set &expected, + std::ostringstream &message) -> void { + assert(expected.size() > 1); + auto copy = expected; + if (copy.contains(JSON::Type::Real) && copy.contains(JSON::Type::Integer)) { + copy.erase(JSON::Type::Integer); + } + + if (copy.size() == 1) { + describe_type_check(valid, current, *(copy.cbegin()), message); + return; + } + + message << "The value was expected to be of type "; + for (auto iterator = copy.cbegin(); iterator != copy.cend(); ++iterator) { + if (std::next(iterator) == copy.cend()) { + message << "or " << to_string(*iterator); + } else { + message << to_string(*iterator) << ", "; + } + } + + if (valid) { + message << " and it was of type "; + } else { + message << " but it was of type "; + } + + if (valid && current == JSON::Type::Integer && + copy.contains(JSON::Type::Real)) { + message << "number"; + } else { + message << to_string(current); + } +} + +auto describe_reference(const JSON &target) -> std::string { + std::ostringstream message; + message << "The " << to_string(target.type()) + << " value was expected to validate against the statically " + "referenced schema"; + return message.str(); +} + +auto is_within_keyword(const Pointer &evaluate_path, + const std::string &keyword) -> bool { + return std::any_of(evaluate_path.cbegin(), evaluate_path.cend(), + [&keyword](const auto &token) { + return token.is_property() && + token.to_property() == keyword; + }); +} + struct DescribeVisitor { + const bool valid; + const Pointer &evaluate_path; + const std::string &keyword; + const Pointer &instance_location; + const JSON ⌖ + const JSON &annotation; + auto operator()(const SchemaCompilerLogicalOr &) const -> std::string { return "The target is expected to match at least one of the given " "assertions"; } - auto operator()(const SchemaCompilerLogicalAnd &) const -> std::string { + + auto operator()(const SchemaCompilerLogicalAnd &step) const -> std::string { + if (this->keyword == "allOf") { + assert(!step.children.empty()); + std::ostringstream message; + message << "The " << to_string(this->target.type()) + << " value was expected to validate against the "; + if (step.children.size() > 1) { + message << step.children.size() << " given subschemas"; + } else { + message << "given subschema"; + } + + return message.str(); + } + + if (this->keyword == "then" || this->keyword == "else") { + assert(!step.children.empty()); + std::ostringstream message; + message << "Because of the conditional outcome, the " + << to_string(this->target.type()) + << " value was expected to validate against the "; + if (step.children.size() > 1) { + message << step.children.size() << " given subschemas"; + } else { + message << "given subschema"; + } + + return message.str(); + } + + if (this->keyword == "properties") { + assert(!step.children.empty()); + assert(this->target.is_object()); + std::ostringstream message; + message << "The object value was expected to validate against the "; + if (step.children.size() == 1) { + message << "single defined property subschema"; + } else { + message << step.children.size() << " defined properties subschemas"; + } + + return message.str(); + } + return "The target is expected to match all of the given assertions"; } + auto operator()(const SchemaCompilerLogicalXor &) const -> std::string { return "The target is expected to match one and only one of the given " "assertions"; } auto operator()(const SchemaCompilerLogicalTry &) const -> std::string { + if (this->keyword == "if") { + std::ostringstream message; + message << "The " << to_string(this->target.type()) + << " value was tested against the conditional subschema"; + return message.str(); + } + return "The target might match all of the given assertions"; } auto operator()(const SchemaCompilerLogicalNot &) const -> std::string { @@ -45,30 +210,74 @@ struct DescribeVisitor { return "The target object is expected to define all of the given " "properties"; } + auto operator()(const SchemaCompilerControlLabel &) const -> std::string { - return "Mark the current position of the evaluation process for future " - "jumps"; + return describe_reference(this->target); } + auto operator()(const SchemaCompilerControlMark &) const -> std::string { - return "Mark the current position of the evaluation process for future " - "jumps"; + return describe_reference(this->target); } + auto operator()(const SchemaCompilerControlJump &) const -> std::string { - return "Jump to another point of the evaluation process"; + return describe_reference(this->target); } + auto operator()(const SchemaCompilerControlDynamicAnchorJump &) const -> std::string { return "Jump to a dynamic anchor"; } + auto operator()(const SchemaCompilerAnnotationPublic &) const -> std::string { + if (this->keyword == "if") { + assert(this->annotation == JSON{true}); + std::ostringstream message; + message + << "The " << to_string(this->target.type()) + << " value successfully validated against the conditional subschema"; + return message.str(); + } + return "Emit an annotation"; } + auto operator()(const SchemaCompilerLoopProperties &) const -> std::string { return "Loop over the properties of the target object"; } + auto operator()(const SchemaCompilerLoopKeys &) const -> std::string { + if (this->keyword == "propertyNames") { + assert(this->target.is_object()); + std::ostringstream message; + + if (this->target.size() == 0) { + assert(this->valid); + message << "The object is empty and no properties are expected to " + "validate against the given subschema"; + } else if (this->target.size() == 1) { + message << "The object property "; + message << escape_string(this->target.as_object().cbegin()->first); + message << " is expected to validate against the given subschema"; + } else { + message << "The object properties "; + for (auto iterator = this->target.as_object().cbegin(); + iterator != this->target.as_object().cend(); ++iterator) { + if (std::next(iterator) == this->target.as_object().cend()) { + message << "and " << escape_string(iterator->first); + } else { + message << escape_string(iterator->first) << ", "; + } + } + + message << " are expected to validate against the given subschema"; + } + + return message.str(); + } + return "Loop over the property keys of the target object"; } + auto operator()(const SchemaCompilerLoopItems &) const -> std::string { return "Loop over the items of the target array"; } @@ -77,54 +286,266 @@ struct DescribeVisitor { return "Loop over the items of the target array potentially bound by an " "annotation result"; } - auto operator()(const SchemaCompilerLoopContains &) const -> std::string { - return "A certain number of array items must satisfy the given constraints"; + + auto operator()(const SchemaCompilerLoopContains &step) const -> std::string { + assert(this->target.is_array()); + std::ostringstream message; + const auto &value{step_value(step)}; + const auto minimum{std::get<0>(value)}; + const auto maximum{std::get<1>(value)}; + bool plural{true}; + + message << "The array value was expected to contain "; + if (maximum.has_value()) { + if (minimum == maximum.value() && minimum == 0) { + message << "any number of"; + } else if (minimum == maximum.value()) { + message << "exactly " << minimum; + if (minimum == 1) { + plural = false; + } + } else if (minimum == 0) { + message << "up to " << maximum.value(); + if (maximum.value() == 1) { + plural = false; + } + } else { + message << minimum << " to " << maximum.value(); + if (maximum.value() == 1) { + plural = false; + } + } + } else { + message << "at least " << minimum; + if (minimum == 1) { + plural = false; + } + } + + if (plural) { + message << " items that validate against the given subschema"; + } else { + message << " item that validates against the given subschema"; + } + + return message.str(); } + auto operator()(const SchemaCompilerAssertionFail &) const -> std::string { return "Abort evaluation on failure"; } - auto operator()(const SchemaCompilerAssertionDefines &) const -> std::string { - return "The target object is expected to define the given property"; - } + auto - operator()(const SchemaCompilerAssertionDefinesAll &) const -> std::string { - return "The target object is expected to define all of the given " - "properties"; - } - auto operator()(const SchemaCompilerAssertionType &) const -> std::string { - return "The target document is expected to be of the given type"; + operator()(const SchemaCompilerAssertionDefines &step) const -> std::string { + std::ostringstream message; + message << "The object value was expected to define the property " + << escape_string(step_value(step)); + return message.str(); } - auto operator()(const SchemaCompilerAssertionTypeAny &) const -> std::string { - return "The target document is expected to be of one of the given types"; + + auto operator()(const SchemaCompilerAssertionDefinesAll &step) const + -> std::string { + const auto &value{step_value(step)}; + assert(value.size() > 1); + std::ostringstream message; + message << "The object value was expected to define properties "; + for (auto iterator = value.cbegin(); iterator != value.cend(); ++iterator) { + if (std::next(iterator) == value.cend()) { + message << "and " << escape_string(*iterator); + } else { + message << escape_string(*iterator) << ", "; + } + } + + if (this->valid) { + return message.str(); + } + + assert(this->target.is_object()); + std::set missing; + for (const auto &property : value) { + if (!this->target.defines(property)) { + missing.insert(property); + } + } + + assert(!missing.empty()); + if (missing.size() == 1) { + message << " but did not define the property " + << escape_string(*(missing.cbegin())); + } else { + message << " but did not define properties "; + for (auto iterator = missing.cbegin(); iterator != missing.cend(); + ++iterator) { + if (std::next(iterator) == value.cend()) { + message << "and " << escape_string(*iterator); + } else { + message << escape_string(*iterator) << ", "; + } + } + } + + return message.str(); } + auto - operator()(const SchemaCompilerAssertionTypeStrict &) const -> std::string { - return "The target document is expected to be of the given type"; + operator()(const SchemaCompilerAssertionType &step) const -> std::string { + std::ostringstream message; + describe_type_check(this->valid, this->target.type(), step_value(step), + message); + return message.str(); } - auto operator()(const SchemaCompilerAssertionTypeStrictAny &) const + + auto operator()(const SchemaCompilerAssertionTypeStrict &step) const -> std::string { - return "The target document is expected to be of one of the given types"; + std::ostringstream message; + const auto &value{step_value(step)}; + if (!this->valid && value == JSON::Type::Real && + this->target.type() == JSON::Type::Integer) { + message + << "The value was expected to be a real number but it was an integer"; + } else if (!this->valid && value == JSON::Type::Integer && + this->target.type() == JSON::Type::Real) { + message + << "The value was expected to be an integer but it was a real number"; + } else { + describe_type_check(this->valid, this->target.type(), value, message); + } + + return message.str(); } - auto operator()(const SchemaCompilerAssertionRegex &) const -> std::string { - return "The target string is expected to match the given regular " - "expression"; + + auto + operator()(const SchemaCompilerAssertionTypeAny &step) const -> std::string { + std::ostringstream message; + describe_types_check(this->valid, this->target.type(), step_value(step), + message); + return message.str(); + } + + auto operator()(const SchemaCompilerAssertionTypeStrictAny &step) const + -> std::string { + std::ostringstream message; + describe_types_check(this->valid, this->target.type(), step_value(step), + message); + return message.str(); } + auto - operator()(const SchemaCompilerAssertionSizeGreater &) const -> std::string { + operator()(const SchemaCompilerAssertionRegex &step) const -> std::string { + assert(this->target.is_string()); + std::ostringstream message; + message << "The string value " << escape_string(this->target.to_string()) + << " was expected to match the regular expression " + << escape_string(step_value(step).second); + return message.str(); + } + + auto operator()(const SchemaCompilerAssertionSizeGreater &step) const + -> std::string { + if (this->keyword == "minLength") { + std::ostringstream message; + const auto minimum{step_value(step) + 1}; + + if (is_within_keyword(this->evaluate_path, "propertyNames")) { + assert(this->instance_location.back().is_property()); + message << "The object property name " + << escape_string(this->instance_location.back().to_property()); + } else { + message << "The string value "; + stringify(this->target, message); + } + + message << " was expected to consist of at least " << minimum + << (minimum == 1 ? " character" : " characters"); + + if (this->valid) { + message << " and"; + } else { + message << " but"; + } + + message << " it consisted of "; + + if (is_within_keyword(this->evaluate_path, "propertyNames")) { + message << this->instance_location.back().to_property().size(); + message << (this->instance_location.back().to_property().size() == 1 + ? " character" + : " characters"); + } else { + message << this->target.size(); + message << (this->target.size() == 1 ? " character" : " characters"); + } + + return message.str(); + } + return "The target size is expected to be greater than the given number"; } + auto - operator()(const SchemaCompilerAssertionSizeLess &) const -> std::string { + operator()(const SchemaCompilerAssertionSizeLess &step) const -> std::string { + if (this->keyword == "maxLength") { + std::ostringstream message; + const auto maximum{step_value(step) - 1}; + + if (is_within_keyword(this->evaluate_path, "propertyNames")) { + assert(this->instance_location.back().is_property()); + message << "The object property name " + << escape_string(this->instance_location.back().to_property()); + } else { + message << "The string value "; + stringify(this->target, message); + } + + message << " was expected to consist of at most " << maximum + << (maximum == 1 ? " character" : " characters"); + + if (this->valid) { + message << " and"; + } else { + message << " but"; + } + + message << " it consisted of "; + + if (is_within_keyword(this->evaluate_path, "propertyNames")) { + message << this->instance_location.back().to_property().size(); + message << (this->instance_location.back().to_property().size() == 1 + ? " character" + : " characters"); + } else { + message << this->target.size(); + message << (this->target.size() == 1 ? " character" : " characters"); + } + + return message.str(); + } + return "The target size is expected to be less than the given number"; } + auto operator()(const SchemaCompilerAssertionSizeEqual &) const -> std::string { return "The target size is expected to be equal to the given number"; } - auto operator()(const SchemaCompilerAssertionEqual &) const -> std::string { + auto + operator()(const SchemaCompilerAssertionEqual &step) const -> std::string { + if (this->keyword == "const") { + std::ostringstream message; + const auto &value{step_value(step)}; + message << "The " << to_string(this->target.type()) << " value "; + stringify(this->target, message); + message << " was expected to equal the " << to_string(value.type()) + << " constant "; + stringify(value, message); + return message.str(); + } + return "The target is expected to be equal to the given value"; } + auto operator()(const SchemaCompilerAssertionGreaterEqual &) const { return "The target number is expected to be greater than or equal to the " "given number"; @@ -134,19 +555,55 @@ struct DescribeVisitor { return "The target number is expected to be less than or equal to the " "given number"; } - auto operator()(const SchemaCompilerAssertionGreater &) const -> std::string { - return "The target number is expected to be greater than the given number"; + + auto + operator()(const SchemaCompilerAssertionGreater &step) const -> std::string { + std::ostringstream message; + const auto &value{step_value(step)}; + message << "The " << to_string(this->target.type()) << " value "; + stringify(this->target, message); + message << " was expected to be greater than the " + << to_string(value.type()) << " "; + stringify(value, message); + if (!this->valid && value == this->target) { + message << ", but they were equal"; + } + + return message.str(); } - auto operator()(const SchemaCompilerAssertionLess &) const -> std::string { - return "The target number is expected to be less than the given number"; + + auto + operator()(const SchemaCompilerAssertionLess &step) const -> std::string { + std::ostringstream message; + const auto &value{step_value(step)}; + message << "The " << to_string(this->target.type()) << " value "; + stringify(this->target, message); + message << " was expected to be less than the " << to_string(value.type()) + << " "; + stringify(value, message); + if (!this->valid && value == this->target) { + message << ", but they were equal"; + } + + return message.str(); } + auto operator()(const SchemaCompilerAssertionUnique &) const -> std::string { return "The target array is expected to not contain duplicates"; } - auto - operator()(const SchemaCompilerAssertionDivisible &) const -> std::string { - return "The target number is expected to be divisible by the given number"; + + auto operator()(const SchemaCompilerAssertionDivisible &step) const + -> std::string { + std::ostringstream message; + const auto &value{step_value(step)}; + message << "The " << to_string(this->target.type()) << " value "; + stringify(this->target, message); + message << " was expected to be divisible by the " + << to_string(value.type()) << " "; + stringify(value, message); + return message.str(); } + auto operator()(const SchemaCompilerAssertionStringType &) const -> std::string { return "The target string is expected to match the given logical type"; @@ -161,8 +618,15 @@ struct DescribeVisitor { namespace sourcemeta::jsontoolkit { -auto describe(const SchemaCompilerTemplate::value_type &step) -> std::string { - return std::visit(DescribeVisitor{}, step); +auto describe(const bool valid, const SchemaCompilerTemplate::value_type &step, + const Pointer &evaluate_path, const Pointer &instance_location, + const JSON &instance, const JSON &annotation) -> std::string { + assert(evaluate_path.back().is_property()); + return std::visit( + DescribeVisitor{valid, evaluate_path, evaluate_path.back().to_property(), + instance_location, get(instance, instance_location), + annotation}, + step); } } // namespace sourcemeta::jsontoolkit diff --git a/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc b/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc index 19fa27d4..6263b721 100644 --- a/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc +++ b/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc @@ -269,7 +269,6 @@ auto callback_noop( const sourcemeta::jsontoolkit::SchemaCompilerTemplate::value_type &, const sourcemeta::jsontoolkit::Pointer &, const sourcemeta::jsontoolkit::Pointer &, - const sourcemeta::jsontoolkit::JSON &, const sourcemeta::jsontoolkit::JSON &) noexcept -> void {} auto evaluate_step( @@ -283,12 +282,12 @@ auto evaluate_step( #define CALLBACK_PRE(current_instance_location) \ callback(SchemaCompilerEvaluationType::Pre, true, step, \ - context.evaluate_path(), current_instance_location, instance, \ + context.evaluate_path(), current_instance_location, \ context.value(nullptr)); #define CALLBACK_POST(current_step) \ callback(SchemaCompilerEvaluationType::Post, result, step, \ - context.evaluate_path(), context.instance_location(), instance, \ + context.evaluate_path(), context.instance_location(), \ context.value(nullptr)); \ context.pop(current_step); \ return result; @@ -325,6 +324,8 @@ auto evaluate_step( EVALUATE_CONDITION_GUARD(assertion, instance); CALLBACK_PRE(context.instance_location()); const auto &value{context.resolve_value(assertion.value, instance)}; + // Otherwise we are we even emitting this instruction? + assert(value.size() > 1); const auto &target{ context.resolve_target(assertion.target, instance)}; assert(target.is_object()); @@ -357,6 +358,8 @@ auto evaluate_step( EVALUATE_CONDITION_GUARD(assertion, instance); CALLBACK_PRE(context.instance_location()); const auto &value{context.resolve_value(assertion.value, instance)}; + // Otherwise we are we even emitting this instruction? + assert(value.size() > 1); const auto &target{ context.resolve_target(assertion.target, instance)}; // In non-strict mode, we consider a real number that represents an @@ -382,6 +385,8 @@ auto evaluate_step( EVALUATE_CONDITION_GUARD(assertion, instance); CALLBACK_PRE(context.instance_location()); const auto &value{context.resolve_value(assertion.value, instance)}; + // Otherwise we are we even emitting this instruction? + assert(value.size() > 1); const auto &target{ context.resolve_target(assertion.target, instance)}; result = value.contains(target.type()); @@ -817,8 +822,7 @@ auto evaluate_step( if (value.second) { CALLBACK_PRE(current_instance_location); callback(SchemaCompilerEvaluationType::Post, result, step, - context.evaluate_path(), current_instance_location, instance, - value.first); + context.evaluate_path(), current_instance_location, value.first); } context.pop(annotation); diff --git a/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema_compile.h b/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema_compile.h index 56dd49f9..978996cb 100644 --- a/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema_compile.h +++ b/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema_compile.h @@ -517,21 +517,22 @@ enum class SchemaCompilerEvaluationType { Pre, Post }; /// - The step that was just evaluated /// - The evaluation path /// - The instance location -/// - The instance document /// - The annotation result, if any (otherwise null) /// /// You can use this callback mechanism to implement arbitrary output formats. -using SchemaCompilerEvaluationCallback = std::function; +using SchemaCompilerEvaluationCallback = + std::function; /// @ingroup jsonschema /// /// This function translates a step execution into a human-readable string. /// Useful as the building block for producing user-friendly evaluation results. auto SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT -describe(const SchemaCompilerTemplate::value_type &step) -> std::string; +describe(const bool valid, const SchemaCompilerTemplate::value_type &step, + const Pointer &evaluate_path, const Pointer &instance_location, + const JSON &instance, const JSON &annotation) -> std::string; // TODO: Support standard output formats. Maybe through pre-made evaluation // callbacks? diff --git a/vendor/jsontoolkit/src/uri/uri.cc b/vendor/jsontoolkit/src/uri/uri.cc index cf94de7f..1ae7c08b 100644 --- a/vendor/jsontoolkit/src/uri/uri.cc +++ b/vendor/jsontoolkit/src/uri/uri.cc @@ -236,7 +236,13 @@ auto URI::host() const -> std::optional { auto URI::port() const -> std::optional { return this->port_; } auto URI::path() const -> std::optional { + // NOTE: This is a workaround for the fact that `uriparser` does not + // parse /.. as a segment, then we store nothing in the path_ field. + // By that we can't add the initial slash to the URI. if (!this->path_.has_value()) { + if (this->data == "/..") { + return "/"; + } return std::nullopt; } @@ -316,6 +322,11 @@ auto URI::recompose_without_fragment() const -> std::optional { } } + const auto user_info{this->userinfo()}; + if (user_info.has_value()) { + result << user_info.value() << "@"; + } + // Host const auto result_host{this->host()}; if (result_host.has_value()) {