Skip to content

Commit

Permalink
[WIP] Gracefully handle validating empty JSONL datasets
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti committed Aug 20, 2024
1 parent 8e86fdd commit 8601ffd
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 9 deletions.
7 changes: 5 additions & 2 deletions src/command_validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ auto intelligence::jsonschema::cli::validate(
auto stream{sourcemeta::jsontoolkit::read_file(instance_path)};
try {
for (const auto &instance : sourcemeta::jsontoolkit::JSONL{stream}) {
index += 1;
std::ostringstream error;
bool subresult = true;
if (benchmark) {
Expand Down Expand Up @@ -110,13 +111,15 @@ auto intelligence::jsonschema::cli::validate(
result = false;
break;
}

index += 1;
}
} catch (const sourcemeta::jsontoolkit::ParseError &error) {
// For producing better error messages
throw sourcemeta::jsontoolkit::FileParseError(instance_path, error);
}

if (index == 0) {
log_verbose(options) << "warning: The JSONL file is empty\n";
}
} else {
const auto instance{sourcemeta::jsontoolkit::from_file(instance_path)};
std::ostringstream error;
Expand Down
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ add_jsonschema_test_unix(validate/fail_draft7)
add_jsonschema_test_unix(validate/fail_2019_09)
add_jsonschema_test_unix(validate/fail_2020_12)
add_jsonschema_test_unix(validate/pass_jsonl)
add_jsonschema_test_unix(validate/pass_jsonl_empty)
add_jsonschema_test_unix(validate/pass_jsonl_empty_verbose)
add_jsonschema_test_unix(validate/pass_jsonl_verbose)
add_jsonschema_test_unix(validate/fail_jsonl_invalid_entry)
add_jsonschema_test_unix(validate/fail_jsonl_one)
Expand Down
2 changes: 1 addition & 1 deletion test/validate/fail_jsonl_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ EOF
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.jsonl (entry #0)
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
{
"foo": 1
Expand Down
2 changes: 1 addition & 1 deletion test/validate/fail_jsonl_all_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl
fail: $(realpath "$TMP")/instance.jsonl (entry #0)
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
{
"foo": 1
Expand Down
2 changes: 1 addition & 1 deletion test/validate/fail_jsonl_one.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ EOF
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
fail: $(realpath "$TMP")/instance.jsonl (entry #2)
[
{
Expand Down
4 changes: 2 additions & 2 deletions test/validate/fail_jsonl_one_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl
ok: $(realpath "$TMP")/instance.jsonl (entry #0)
ok: $(realpath "$TMP")/instance.jsonl (entry #1)
matches $(realpath "$TMP")/schema.json
fail: $(realpath "$TMP")/instance.jsonl (entry #1)
fail: $(realpath "$TMP")/instance.jsonl (entry #2)
[
{
Expand Down
28 changes: 28 additions & 0 deletions test/validate/pass_jsonl_empty.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"foo": {
"type": "string"
}
}
}
EOF

touch "$TMP/instance.jsonl"

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2> "$TMP/output.txt" 1>&2

cat << EOF > "$TMP/expected.txt"
EOF

diff "$TMP/output.txt" "$TMP/expected.txt"
30 changes: 30 additions & 0 deletions test/validate/pass_jsonl_empty_verbose.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"foo": {
"type": "string"
}
}
}
EOF

touch "$TMP/instance.jsonl"

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" --verbose 2> "$TMP/output.txt" 1>&2

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl
warning: The JSONL file is empty
EOF

diff "$TMP/output.txt" "$TMP/expected.txt"
4 changes: 2 additions & 2 deletions test/validate/pass_jsonl_verbose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ EOF

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL: $(realpath "$TMP")/instance.jsonl
ok: $(realpath "$TMP")/instance.jsonl (entry #0)
matches $(realpath "$TMP")/schema.json
ok: $(realpath "$TMP")/instance.jsonl (entry #1)
matches $(realpath "$TMP")/schema.json
ok: $(realpath "$TMP")/instance.jsonl (entry #2)
matches $(realpath "$TMP")/schema.json
ok: $(realpath "$TMP")/instance.jsonl (entry #3)
matches $(realpath "$TMP")/schema.json
EOF

diff "$TMP/output.txt" "$TMP/expected.txt"

0 comments on commit 8601ffd

Please sign in to comment.