From 8601ffd0567b3b85dd2df85d90bcf58258ced1d9 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Tue, 20 Aug 2024 11:43:34 -0400 Subject: [PATCH] [WIP] Gracefully handle validating empty JSONL datasets Signed-off-by: Juan Cruz Viotti --- src/command_validate.cc | 7 ++++-- test/CMakeLists.txt | 2 ++ test/validate/fail_jsonl_all.sh | 2 +- test/validate/fail_jsonl_all_verbose.sh | 2 +- test/validate/fail_jsonl_one.sh | 2 +- test/validate/fail_jsonl_one_verbose.sh | 4 +-- test/validate/pass_jsonl_empty.sh | 28 +++++++++++++++++++++ test/validate/pass_jsonl_empty_verbose.sh | 30 +++++++++++++++++++++++ test/validate/pass_jsonl_verbose.sh | 4 +-- 9 files changed, 72 insertions(+), 9 deletions(-) create mode 100755 test/validate/pass_jsonl_empty.sh create mode 100755 test/validate/pass_jsonl_empty_verbose.sh diff --git a/src/command_validate.cc b/src/command_validate.cc index 0a4bb203..3405c153 100644 --- a/src/command_validate.cc +++ b/src/command_validate.cc @@ -67,6 +67,7 @@ auto intelligence::jsonschema::cli::validate( auto stream{sourcemeta::jsontoolkit::read_file(instance_path)}; try { for (const auto &instance : sourcemeta::jsontoolkit::JSONL{stream}) { + index += 1; std::ostringstream error; bool subresult = true; if (benchmark) { @@ -110,13 +111,15 @@ auto intelligence::jsonschema::cli::validate( result = false; break; } - - index += 1; } } catch (const sourcemeta::jsontoolkit::ParseError &error) { // For producing better error messages throw sourcemeta::jsontoolkit::FileParseError(instance_path, error); } + + if (index == 0) { + log_verbose(options) << "warning: The JSONL file is empty\n"; + } } else { const auto instance{sourcemeta::jsontoolkit::from_file(instance_path)}; std::ostringstream error; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 829295ea..4da6ec16 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -66,6 +66,8 @@ add_jsonschema_test_unix(validate/fail_draft7) add_jsonschema_test_unix(validate/fail_2019_09) add_jsonschema_test_unix(validate/fail_2020_12) add_jsonschema_test_unix(validate/pass_jsonl) +add_jsonschema_test_unix(validate/pass_jsonl_empty) +add_jsonschema_test_unix(validate/pass_jsonl_empty_verbose) add_jsonschema_test_unix(validate/pass_jsonl_verbose) add_jsonschema_test_unix(validate/fail_jsonl_invalid_entry) add_jsonschema_test_unix(validate/fail_jsonl_one) diff --git a/test/validate/fail_jsonl_all.sh b/test/validate/fail_jsonl_all.sh index aa5771b1..2567f660 100755 --- a/test/validate/fail_jsonl_all.sh +++ b/test/validate/fail_jsonl_all.sh @@ -25,7 +25,7 @@ EOF test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" -fail: $(realpath "$TMP")/instance.jsonl (entry #0) +fail: $(realpath "$TMP")/instance.jsonl (entry #1) { "foo": 1 diff --git a/test/validate/fail_jsonl_all_verbose.sh b/test/validate/fail_jsonl_all_verbose.sh index 2a96bc0a..5a7b570a 100755 --- a/test/validate/fail_jsonl_all_verbose.sh +++ b/test/validate/fail_jsonl_all_verbose.sh @@ -26,7 +26,7 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl -fail: $(realpath "$TMP")/instance.jsonl (entry #0) +fail: $(realpath "$TMP")/instance.jsonl (entry #1) { "foo": 1 diff --git a/test/validate/fail_jsonl_one.sh b/test/validate/fail_jsonl_one.sh index 9c2f407b..bc219c5e 100755 --- a/test/validate/fail_jsonl_one.sh +++ b/test/validate/fail_jsonl_one.sh @@ -25,7 +25,7 @@ EOF test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" -fail: $(realpath "$TMP")/instance.jsonl (entry #1) +fail: $(realpath "$TMP")/instance.jsonl (entry #2) [ { diff --git a/test/validate/fail_jsonl_one_verbose.sh b/test/validate/fail_jsonl_one_verbose.sh index 132114e7..7c3fdb31 100755 --- a/test/validate/fail_jsonl_one_verbose.sh +++ b/test/validate/fail_jsonl_one_verbose.sh @@ -26,9 +26,9 @@ test "$CODE" = "1" || exit 1 cat << EOF > "$TMP/expected.txt" Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl -ok: $(realpath "$TMP")/instance.jsonl (entry #0) +ok: $(realpath "$TMP")/instance.jsonl (entry #1) matches $(realpath "$TMP")/schema.json -fail: $(realpath "$TMP")/instance.jsonl (entry #1) +fail: $(realpath "$TMP")/instance.jsonl (entry #2) [ { diff --git a/test/validate/pass_jsonl_empty.sh b/test/validate/pass_jsonl_empty.sh new file mode 100755 index 00000000..15526a96 --- /dev/null +++ b/test/validate/pass_jsonl_empty.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/schema.json" +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "properties": { + "foo": { + "type": "string" + } + } +} +EOF + +touch "$TMP/instance.jsonl" + +"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2> "$TMP/output.txt" 1>&2 + +cat << EOF > "$TMP/expected.txt" +EOF + +diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/validate/pass_jsonl_empty_verbose.sh b/test/validate/pass_jsonl_empty_verbose.sh new file mode 100755 index 00000000..2fd4ecd9 --- /dev/null +++ b/test/validate/pass_jsonl_empty_verbose.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/schema.json" +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "properties": { + "foo": { + "type": "string" + } + } +} +EOF + +touch "$TMP/instance.jsonl" + +"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" --verbose 2> "$TMP/output.txt" 1>&2 + +cat << EOF > "$TMP/expected.txt" +Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl +warning: The JSONL file is empty +EOF + +diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/validate/pass_jsonl_verbose.sh b/test/validate/pass_jsonl_verbose.sh index 0b20dcb9..e94e0735 100755 --- a/test/validate/pass_jsonl_verbose.sh +++ b/test/validate/pass_jsonl_verbose.sh @@ -28,12 +28,12 @@ EOF cat << EOF > "$TMP/expected.txt" Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl -ok: $(realpath "$TMP")/instance.jsonl (entry #0) - matches $(realpath "$TMP")/schema.json ok: $(realpath "$TMP")/instance.jsonl (entry #1) matches $(realpath "$TMP")/schema.json ok: $(realpath "$TMP")/instance.jsonl (entry #2) matches $(realpath "$TMP")/schema.json +ok: $(realpath "$TMP")/instance.jsonl (entry #3) + matches $(realpath "$TMP")/schema.json EOF diff "$TMP/output.txt" "$TMP/expected.txt"