Skip to content

Commit

Permalink
[WIP] Gracefully handle validating empty JSONL datasets
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti committed Aug 20, 2024
1 parent 6ffc926 commit ef0c147
Show file tree
Hide file tree
Showing 31 changed files with 662 additions and 95 deletions.
2 changes: 1 addition & 1 deletion DEPENDENCIES
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
vendorpull https://github.com/sourcemeta/vendorpull dea311b5bfb53b6926a4140267959ae334d3ecf4
noa https://github.com/sourcemeta/noa 7e26abce7a4e31e86a16ef2851702a56773ca527
jsontoolkit https://github.com/sourcemeta/jsontoolkit 00251a4161434463c24bc18418e3ffd37f998f29
jsontoolkit https://github.com/sourcemeta/jsontoolkit 4d1dfef7be91ecadd810370b3d8a1d2e591bf574
hydra https://github.com/sourcemeta/hydra 3c53d3fdef79e9ba603d48470a508cc45472a0dc
6 changes: 3 additions & 3 deletions src/command_metaschema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ auto intelligence::jsonschema::cli::metaschema(
const auto dialect{sourcemeta::jsontoolkit::dialect(entry.second)};
assert(dialect.has_value());

const auto metaschema{
sourcemeta::jsontoolkit::metaschema(entry.second, custom_resolver)};
if (!cache.contains(dialect.value())) {
const auto metaschema{
sourcemeta::jsontoolkit::metaschema(entry.second, custom_resolver)};
const auto metaschema_template{sourcemeta::jsontoolkit::compile(
metaschema, sourcemeta::jsontoolkit::default_schema_walker,
custom_resolver, sourcemeta::jsontoolkit::default_schema_compiler)};
Expand All @@ -43,7 +43,7 @@ auto intelligence::jsonschema::cli::metaschema(
if (sourcemeta::jsontoolkit::evaluate(
cache.at(dialect.value()), entry.second,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error,
pretty_evaluate_callback(error, metaschema,
sourcemeta::jsontoolkit::empty_pointer))) {
log_verbose(options)
<< entry.first.string()
Expand Down
2 changes: 1 addition & 1 deletion src/command_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ auto intelligence::jsonschema::cli::test(
schema_template,
get_data(test_case, entry.first.parent_path(), verbose),
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error, {"$ref"}))};
pretty_evaluate_callback(error, schema.value(), {"$ref"}))};

std::ostringstream test_case_description;
if (test_case.defines("description")) {
Expand Down
11 changes: 7 additions & 4 deletions src/command_validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ auto intelligence::jsonschema::cli::validate(
auto stream{sourcemeta::jsontoolkit::read_file(instance_path)};
try {
for (const auto &instance : sourcemeta::jsontoolkit::JSONL{stream}) {
index += 1;
std::ostringstream error;
bool subresult = true;
if (benchmark) {
Expand All @@ -88,7 +89,7 @@ auto intelligence::jsonschema::cli::validate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(
error, sourcemeta::jsontoolkit::empty_pointer));
error, instance, sourcemeta::jsontoolkit::empty_pointer));
}

if (subresult) {
Expand All @@ -110,13 +111,15 @@ auto intelligence::jsonschema::cli::validate(
result = false;
break;
}

index += 1;
}
} catch (const sourcemeta::jsontoolkit::ParseError &error) {
// For producing better error messages
throw sourcemeta::jsontoolkit::FileParseError(instance_path, error);
}

if (index == 0) {
log_verbose(options) << "warning: The JSONL file is empty\n";
}
} else {
const auto instance{sourcemeta::jsontoolkit::from_file(instance_path)};
std::ostringstream error;
Expand All @@ -139,7 +142,7 @@ auto intelligence::jsonschema::cli::validate(
subresult = sourcemeta::jsontoolkit::evaluate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error,
pretty_evaluate_callback(error, instance,
sourcemeta::jsontoolkit::empty_pointer));
}

Expand Down
12 changes: 8 additions & 4 deletions src/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -176,23 +176,27 @@ auto parse_options(const std::span<const std::string> &arguments,
}

auto pretty_evaluate_callback(std::ostringstream &output,
const sourcemeta::jsontoolkit::JSON &instance,
const sourcemeta::jsontoolkit::Pointer &base)
-> sourcemeta::jsontoolkit::SchemaCompilerEvaluationCallback {
output << "error: Schema validation failure\n";
return [&output, &base](
return [&output, &instance, &base](
const sourcemeta::jsontoolkit::SchemaCompilerEvaluationType,
const bool result,
const sourcemeta::jsontoolkit::SchemaCompilerTemplate::value_type
&step,
const sourcemeta::jsontoolkit::Pointer &evaluate_path,
const sourcemeta::jsontoolkit::Pointer &instance_location,
const sourcemeta::jsontoolkit::JSON &,
const sourcemeta::jsontoolkit::JSON &) -> void {
const sourcemeta::jsontoolkit::JSON &annotation) -> void {
if (result) {
return;
}

output << " " << sourcemeta::jsontoolkit::describe(step) << "\n";
output << " "
<< sourcemeta::jsontoolkit::describe(result, step, evaluate_path,
instance_location, instance,
annotation)
<< "\n";
output << " at instance location \"";
sourcemeta::jsontoolkit::stringify(instance_location, output);
output << "\"\n";
Expand Down
1 change: 1 addition & 0 deletions src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ auto for_each_json(const std::vector<std::string> &arguments,
std::pair<std::filesystem::path, sourcemeta::jsontoolkit::JSON>>;

auto pretty_evaluate_callback(std::ostringstream &,
const sourcemeta::jsontoolkit::JSON &,
const sourcemeta::jsontoolkit::Pointer &)
-> sourcemeta::jsontoolkit::SchemaCompilerEvaluationCallback;

Expand Down
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ add_jsonschema_test_unix(validate/fail_draft7)
add_jsonschema_test_unix(validate/fail_2019_09)
add_jsonschema_test_unix(validate/fail_2020_12)
add_jsonschema_test_unix(validate/pass_jsonl)
add_jsonschema_test_unix(validate/pass_jsonl_empty)
add_jsonschema_test_unix(validate/pass_jsonl_empty_verbose)
add_jsonschema_test_unix(validate/pass_jsonl_verbose)
add_jsonschema_test_unix(validate/fail_jsonl_invalid_entry)
add_jsonschema_test_unix(validate/fail_jsonl_one)
Expand Down
2 changes: 1 addition & 1 deletion test/test/fail_multi_resolve.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ $(realpath "$TMP")/test.json:
2/4 FAIL First failure
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
Expand Down
2 changes: 1 addition & 1 deletion test/test/fail_multi_resolve_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ $(realpath "$TMP")/test.json:
2/4 FAIL First failure
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
Expand Down
4 changes: 2 additions & 2 deletions test/test/fail_true_resolve_fragment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ $(realpath "$TMP")/test.json:
1/1 FAIL Fail
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
Mark the current position of the evaluation process for future jumps
The object value was expected to validate against the statically referenced schema
at instance location ""
at evaluate path ""
EOF
Expand Down
2 changes: 1 addition & 1 deletion test/test/fail_true_single_resolve.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ $(realpath "$TMP")/test.json:
2/3 FAIL Unexpected
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
EOF
Expand Down
2 changes: 1 addition & 1 deletion test/test/fail_true_single_resolve_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ $(realpath "$TMP")/test.json:
2/3 FAIL Unexpected
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type object
at instance location ""
at evaluate path "/type"
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_2019_09.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_2020_12.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_draft4.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_draft6.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_draft7.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_jsonl_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ EOF
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.jsonl (entry #0)
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
{
"foo": 1
}
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type array but it was of type object
at instance location ""
at evaluate path "/type"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_jsonl_all_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl
fail: $(realpath "$TMP")/instance.jsonl (entry #0)
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
{
"foo": 1
}
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type array but it was of type object
at instance location ""
at evaluate path "/type"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_jsonl_one.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ EOF
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
fail: $(realpath "$TMP")/instance.jsonl (entry #2)
[
{
Expand All @@ -34,7 +34,7 @@ fail: $(realpath "$TMP")/instance.jsonl (entry #1)
]
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type object but it was of type array
at instance location ""
at evaluate path "/type"
EOF
Expand Down
6 changes: 3 additions & 3 deletions test/validate/fail_jsonl_one_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl
ok: $(realpath "$TMP")/instance.jsonl (entry #0)
ok: $(realpath "$TMP")/instance.jsonl (entry #1)
matches $(realpath "$TMP")/schema.json
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
fail: $(realpath "$TMP")/instance.jsonl (entry #2)
[
{
Expand All @@ -37,7 +37,7 @@ fail: $(realpath "$TMP")/instance.jsonl (entry #1)
]
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type object but it was of type array
at instance location ""
at evaluate path "/type"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_many.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ test "$CODE" = "1" || exit 1
cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance_2.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
EOF
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_many_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ ok: $(realpath "$TMP")/instance_1.json
matches $(realpath "$TMP")/schema.json
fail: $(realpath "$TMP")/instance_2.json
error: Schema validation failure
The target document is expected to be of the given type
The value was expected to be of type string but it was of type integer
at instance location "/foo"
at evaluate path "/properties/foo/type"
The target is expected to match all of the given assertions
The object value was expected to validate against the single defined property subschema
at instance location ""
at evaluate path "/properties"
ok: $(realpath "$TMP")/instance_3.json
Expand Down
28 changes: 28 additions & 0 deletions test/validate/pass_jsonl_empty.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"foo": {
"type": "string"
}
}
}
EOF

touch "$TMP/instance.jsonl"

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2> "$TMP/output.txt" 1>&2

cat << EOF > "$TMP/expected.txt"
EOF

diff "$TMP/output.txt" "$TMP/expected.txt"
Loading

0 comments on commit ef0c147

Please sign in to comment.