Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the validate command to validate JSONL datasets #131

Merged
merged 1 commit into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cmake/FindJSONToolkit.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
if(NOT JSONToolkit_FOUND)
set(JSONTOOLKIT_INSTALL OFF CACHE BOOL "disable installation")
set(JSONTOOLKIT_JSONL OFF CACHE BOOL "disable JSONL support")
add_subdirectory("${PROJECT_SOURCE_DIR}/vendor/jsontoolkit")
set(JSONToolkit_FOUND ON)
endif()
14 changes: 7 additions & 7 deletions docs/validate.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ Validating
> Draft 2020-12 soon.

```sh
jsonschema validate <schema.json> <instance.json> [--http/-h] [--verbose/-v]
[--resolve/-r <schemas-or-directories> ...]
jsonschema validate <schema.json> <instance.json|.jsonl> [--http/-h]
[--verbose/-v] [--resolve/-r <schemas-or-directories> ...]
```

The most popular use case of JSON Schema is to validate JSON documents. The
JSON Schema CLI offers a `validate` command to evaluate a JSON instance against
a JSON Schema, presenting human-friendly information on unsuccessful
validation.
JSON Schema CLI offers a `validate` command to evaluate either a JSON instance
or a JSONL dataset against a JSON Schema, presenting human-friendly information
on unsuccessful validation.

**If you want to validate that a schema adheres to its metaschema, use the
[`metaschema`](./metaschema.markdown) command instead.**
Expand Down Expand Up @@ -55,10 +55,10 @@ error: The target document is expected to be of the given type
jsonschema validate path/to/my/schema.json path/to/my/instance.json
```

### Validate a JSON Schema against it meta-schema
### Validate a JSONL dataset against a schema

```sh
jsonschema validate path/to/my/schema.json
jsonschema validate path/to/my/schema.json path/to/my/dataset.jsonl
```

### Validate a JSON instance enabling HTTP resolution
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ noa_add_default_options(PRIVATE jsonschema_cli)
set_target_properties(jsonschema_cli PROPERTIES OUTPUT_NAME jsonschema)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsontoolkit::uri)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsontoolkit::json)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsontoolkit::jsonl)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsontoolkit::jsonschema)
target_link_libraries(jsonschema_cli PRIVATE sourcemeta::hydra::httpclient)

Expand Down
69 changes: 56 additions & 13 deletions src/command_validate.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <sourcemeta/jsontoolkit/json.h>
#include <sourcemeta/jsontoolkit/jsonl.h>
#include <sourcemeta/jsontoolkit/jsonschema.h>

#include <cstdlib> // EXIT_SUCCESS, EXIT_FAILURE
Expand Down Expand Up @@ -46,26 +47,68 @@ auto intelligence::jsonschema::cli::validate(
}

bool result{true};
const auto &instance_path{options.at("").at(1)};
const std::filesystem::path instance_path{options.at("").at(1)};
const auto schema_template{sourcemeta::jsontoolkit::compile(
schema, sourcemeta::jsontoolkit::default_schema_walker, custom_resolver,
sourcemeta::jsontoolkit::default_schema_compiler)};

const auto instance{sourcemeta::jsontoolkit::from_file(instance_path)};
if (instance_path.extension() == ".jsonl") {
log_verbose(options) << "Interpreting input as JSONL\n";
std::size_t index{0};

std::ostringstream error;
result = sourcemeta::jsontoolkit::evaluate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error, sourcemeta::jsontoolkit::empty_pointer));
auto stream{sourcemeta::jsontoolkit::read_file(instance_path)};
try {
for (const auto &instance : sourcemeta::jsontoolkit::JSONL{stream}) {
std::ostringstream error;
const auto subresult = sourcemeta::jsontoolkit::evaluate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error,
sourcemeta::jsontoolkit::empty_pointer));

if (result) {
log_verbose(options)
<< "ok: " << std::filesystem::weakly_canonical(instance_path).string()
<< "\n matches "
<< std::filesystem::weakly_canonical(schema_path).string() << "\n";
if (subresult) {
log_verbose(options)
<< "ok: "
<< std::filesystem::weakly_canonical(instance_path).string()
<< " (entry #" << index << ")"
<< "\n matches "
<< std::filesystem::weakly_canonical(schema_path).string()
<< "\n";
} else {
std::cerr << "fail: "
<< std::filesystem::weakly_canonical(instance_path).string()
<< " (entry #" << index << ")\n\n";
sourcemeta::jsontoolkit::prettify(instance, std::cerr);
std::cerr << "\n\n";
std::cerr << error.str();
result = false;
break;
}

index += 1;
}
} catch (const sourcemeta::jsontoolkit::ParseError &error) {
// For producing better error messages
throw sourcemeta::jsontoolkit::FileParseError(instance_path, error);
}
} else {
std::cerr << error.str();
const auto instance{sourcemeta::jsontoolkit::from_file(instance_path)};

std::ostringstream error;
result = sourcemeta::jsontoolkit::evaluate(
schema_template, instance,
sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
pretty_evaluate_callback(error,
sourcemeta::jsontoolkit::empty_pointer));

if (result) {
log_verbose(options)
<< "ok: " << std::filesystem::weakly_canonical(instance_path).string()
<< "\n matches "
<< std::filesystem::weakly_canonical(schema_path).string() << "\n";
} else {
std::cerr << error.str();
}
}

return result ? EXIT_SUCCESS : EXIT_FAILURE;
Expand Down
2 changes: 1 addition & 1 deletion src/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Global Options:

Commands:

validate <schema.json> <instance.json> [--http/-h]
validate <schema.json> <instance.json|.jsonl> [--http/-h]

Validate an instance against the given schema.

Expand Down
7 changes: 7 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ add_jsonschema_test_unix(validate/pass_draft7)
add_jsonschema_test_unix(validate/fail_draft4)
add_jsonschema_test_unix(validate/fail_draft6)
add_jsonschema_test_unix(validate/fail_draft7)
add_jsonschema_test_unix(validate/pass_jsonl)
add_jsonschema_test_unix(validate/pass_jsonl_verbose)
add_jsonschema_test_unix(validate/fail_jsonl_invalid_entry)
add_jsonschema_test_unix(validate/fail_jsonl_one)
add_jsonschema_test_unix(validate/fail_jsonl_one_verbose)
add_jsonschema_test_unix(validate/fail_jsonl_all)
add_jsonschema_test_unix(validate/fail_jsonl_all_verbose)

# Test
add_jsonschema_test_unix(test/fail_true_single_resolve)
Expand Down
40 changes: 40 additions & 0 deletions test/validate/fail_jsonl_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "array"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": 1 }
{ "foo": 2 }
{ "foo": 3 }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.jsonl (entry #0)

{
"foo": 1
}

error: Schema validation failure
The target document is expected to be of the given type
at instance location ""
at evaluate path "/type"
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
41 changes: 41 additions & 0 deletions test/validate/fail_jsonl_all_verbose.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "array"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": 1 }
{ "foo": 2 }
{ "foo": 3 }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" --verbose 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL
fail: $(realpath "$TMP")/instance.jsonl (entry #0)

{
"foo": 1
}

error: Schema validation failure
The target document is expected to be of the given type
at instance location ""
at evaluate path "/type"
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
32 changes: 32 additions & 0 deletions test/validate/fail_jsonl_invalid_entry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": "first" }
{ "foo" "second" }
{ "foo": "third" }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
error: Failed to parse the JSON document at line 2 and column 10
$(realpath "$TMP")/instance.jsonl
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
42 changes: 42 additions & 0 deletions test/validate/fail_jsonl_one.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": 1 }
[ { "foo": 2 } ]
{ "foo": 3 }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
fail: $(realpath "$TMP")/instance.jsonl (entry #1)

[
{
"foo": 2
}
]

error: Schema validation failure
The target document is expected to be of the given type
at instance location ""
at evaluate path "/type"
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
45 changes: 45 additions & 0 deletions test/validate/fail_jsonl_one_verbose.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": 1 }
[ { "foo": 2 } ]
{ "foo": 3 }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" --verbose 2>"$TMP/stderr.txt" \
&& CODE="$?" || CODE="$?"
test "$CODE" = "1" || exit 1

cat << EOF > "$TMP/expected.txt"
Interpreting input as JSONL
ok: $(realpath "$TMP")/instance.jsonl (entry #0)
matches $(realpath "$TMP")/schema.json
fail: $(realpath "$TMP")/instance.jsonl (entry #1)

[
{
"foo": 2
}
]

error: Schema validation failure
The target document is expected to be of the given type
at instance location ""
at evaluate path "/type"
EOF

diff "$TMP/stderr.txt" "$TMP/expected.txt"
32 changes: 32 additions & 0 deletions test/validate/pass_jsonl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/sh

set -o errexit
set -o nounset

TMP="$(mktemp -d)"
clean() { rm -rf "$TMP"; }
trap clean EXIT

cat << 'EOF' > "$TMP/schema.json"
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"foo": {
"type": "string"
}
}
}
EOF

cat << 'EOF' > "$TMP/instance.jsonl"
{ "foo": "first" }
{ "foo": "second" }
{ "foo": "third" }
EOF

"$1" validate "$TMP/schema.json" "$TMP/instance.jsonl" 2> "$TMP/output.txt" 1>&2

cat << EOF > "$TMP/expected.txt"
EOF

diff "$TMP/output.txt" "$TMP/expected.txt"
Loading