From 6dc5bbfc775f97602b8401d766606581a3836029 Mon Sep 17 00:00:00 2001
From: Juan Cruz Viotti <jv@jviotti.com>
Date: Mon, 15 Jul 2024 14:28:00 -0400
Subject: [PATCH] Support passing a schema URI with a fragment as a test target

See: https://github.com/Intelligence-AI/jsonschema/discussions/110
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
---
 docs/test.markdown                            |  5 ++
 src/command_metaschema.cc                     |  3 +-
 src/command_test.cc                           | 46 ++++++++++++++---
 src/command_validate.cc                       |  2 +-
 src/utils.cc                                  |  8 +--
 src/utils.h                                   |  3 +-
 test/CMakeLists.txt                           |  5 ++
 test/test/fail_true_resolve_fragment.sh       | 51 +++++++++++++++++++
 test/test/fail_unresolvable_anchor.sh         | 48 +++++++++++++++++
 test/test/fail_unresolvable_fragment.sh       | 48 +++++++++++++++++
 test/test/pass_single_resolve_fragment.sh     | 45 ++++++++++++++++
 .../pass_single_resolve_fragment_verbose.sh   | 48 +++++++++++++++++
 12 files changed, 300 insertions(+), 12 deletions(-)
 create mode 100755 test/test/fail_true_resolve_fragment.sh
 create mode 100755 test/test/fail_unresolvable_anchor.sh
 create mode 100755 test/test/fail_unresolvable_fragment.sh
 create mode 100755 test/test/pass_single_resolve_fragment.sh
 create mode 100755 test/test/pass_single_resolve_fragment_verbose.sh
diff --git a/docs/test.markdown b/docs/test.markdown
index a6333c3c..b77bed87 100644
--- a/docs/test.markdown
+++ b/docs/test.markdown
@@ -48,6 +48,11 @@ To create a test definition, you must write JSON documents that look like this:
 }
 ```
 
+> [!TIP]
+> You can test different portions of a large schema by passing a schema URI
+> that contains a JSON Pointer in the `target` property. For example:
+> `https://example.com/my-big-schema#/definitions/foo`.
+
 Assuming this file is saved as `test/draft4.json`, you can run it as follows:
 
 ```sh
diff --git a/src/command_metaschema.cc b/src/command_metaschema.cc
index 0b8f1ab6..15fd8480 100644
--- a/src/command_metaschema.cc
+++ b/src/command_metaschema.cc
@@ -43,7 +43,8 @@ auto intelligence::jsonschema::cli::metaschema(
     if (sourcemeta::jsontoolkit::evaluate(
             cache.at(dialect.value()), entry.second,
             sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
-            pretty_evaluate_callback(error))) {
+            pretty_evaluate_callback(error,
+                                     sourcemeta::jsontoolkit::empty_pointer))) {
       log_verbose(options)
           << entry.first.string()
           << ": The schema is valid with respect to its metaschema\n";
diff --git a/src/command_test.cc b/src/command_test.cc
index dd3c39f3..ec28a8c4 100644
--- a/src/command_test.cc
+++ b/src/command_test.cc
@@ -1,5 +1,6 @@
 #include <sourcemeta/jsontoolkit/json.h>
 #include <sourcemeta/jsontoolkit/jsonschema.h>
+#include <sourcemeta/jsontoolkit/uri.h>
 
 #include <cstdlib>  // EXIT_SUCCESS, EXIT_FAILURE
 #include <iostream> // std::cerr, std::cout
@@ -7,6 +8,31 @@
 #include "command.h"
 #include "utils.h"
 
+static auto
+get_schema_object(const sourcemeta::jsontoolkit::URI &identifier,
+                  const sourcemeta::jsontoolkit::SchemaResolver &resolver)
+    -> std::optional<sourcemeta::jsontoolkit::JSON> {
+  const auto schema{resolver(identifier.recompose()).get()};
+  if (schema.has_value()) {
+    return schema;
+  }
+
+  // Resolving a schema identifier that contains a fragment (i.e. a JSON Pointer
+  // one) can be tricky, as we might end up re-inventing JSON Schema referencing
+  // all over again. To make it work without much hassle, we do exactly that:
+  // create an artificial schema wrapper that uses `$ref`.
+  if (identifier.fragment().has_value()) {
+    auto result{sourcemeta::jsontoolkit::JSON::make_object()};
+    result.assign("$schema", sourcemeta::jsontoolkit::JSON{
+                                 "http://json-schema.org/draft-07/schema#"});
+    result.assign("$ref",
+                  sourcemeta::jsontoolkit::JSON{identifier.recompose()});
+    return result;
+  }
+
+  return std::nullopt;
+}
+
 auto intelligence::jsonschema::cli::test(
     const std::span<const std::string> &arguments) -> int {
   const auto options{parse_options(arguments, {"h", "http"})};
@@ -65,12 +91,11 @@ auto intelligence::jsonschema::cli::test(
       return EXIT_FAILURE;
     }
 
-    const auto schema{test_resolver(test.at("target").to_string()).get()};
+    sourcemeta::jsontoolkit::URI schema_uri{test.at("target").to_string()};
+    schema_uri.canonicalize();
+    const auto schema{get_schema_object(schema_uri, test_resolver)};
     if (!schema.has_value()) {
-      if (verbose) {
-        std::cout << "\n";
-      }
-
+      std::cout << "\n";
       throw sourcemeta::jsontoolkit::SchemaResolutionError(
           test.at("target").to_string(), "Could not resolve schema under test");
     }
@@ -90,6 +115,15 @@ auto intelligence::jsonschema::cli::test(
       schema_template = sourcemeta::jsontoolkit::compile(
           schema.value(), sourcemeta::jsontoolkit::default_schema_walker,
           test_resolver, sourcemeta::jsontoolkit::default_schema_compiler);
+    } catch (const sourcemeta::jsontoolkit::SchemaReferenceError &error) {
+      if (error.location().empty() && error.id() == schema_uri.recompose()) {
+        std::cout << "\n";
+        throw sourcemeta::jsontoolkit::SchemaResolutionError(
+            test.at("target").to_string(),
+            "Could not resolve schema under test");
+      }
+
+      throw;
     } catch (...) {
       std::cout << "\n";
       throw;
@@ -157,7 +191,7 @@ auto intelligence::jsonschema::cli::test(
       const auto case_result{sourcemeta::jsontoolkit::evaluate(
           schema_template, test_case.at("data"),
           sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
-          pretty_evaluate_callback(error))};
+          pretty_evaluate_callback(error, {"$ref"}))};
 
       std::ostringstream test_case_description;
       if (test_case.defines("description")) {
diff --git a/src/command_validate.cc b/src/command_validate.cc
index ec941b13..2a6d0a14 100644
--- a/src/command_validate.cc
+++ b/src/command_validate.cc
@@ -56,7 +56,7 @@ auto intelligence::jsonschema::cli::validate(
   result = sourcemeta::jsontoolkit::evaluate(
       schema_template, instance,
       sourcemeta::jsontoolkit::SchemaCompilerEvaluationMode::Fast,
-      pretty_evaluate_callback(error));
+      pretty_evaluate_callback(error, sourcemeta::jsontoolkit::empty_pointer));
 
   if (result) {
     log_verbose(options)
diff --git a/src/utils.cc b/src/utils.cc
index a5b66d20..18f35f04 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -175,10 +175,11 @@ auto parse_options(const std::span<const std::string> &arguments,
   return options;
 }
 
-auto pretty_evaluate_callback(std::ostringstream &output)
+auto pretty_evaluate_callback(std::ostringstream &output,
+                              const sourcemeta::jsontoolkit::Pointer &base)
     -> sourcemeta::jsontoolkit::SchemaCompilerEvaluationCallback {
   output << "error: Schema validation failure\n";
-  return [&output](
+  return [&output, &base](
              const sourcemeta::jsontoolkit::SchemaCompilerEvaluationType,
              const bool result,
              const sourcemeta::jsontoolkit::SchemaCompilerTemplate::value_type
@@ -197,7 +198,8 @@ auto pretty_evaluate_callback(std::ostringstream &output)
     output << "\"\n";
 
     output << "    at evaluate path \"";
-    sourcemeta::jsontoolkit::stringify(evaluate_path, output);
+    sourcemeta::jsontoolkit::stringify(evaluate_path.resolve_from(base),
+                                       output);
     output << "\"\n";
   };
 }
diff --git a/src/utils.h b/src/utils.h
index 352b93cd..2fac8441 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -33,7 +33,8 @@ auto for_each_json(const std::vector<std::string> &arguments,
     -> std::vector<
         std::pair<std::filesystem::path, sourcemeta::jsontoolkit::JSON>>;
 
-auto pretty_evaluate_callback(std::ostringstream &)
+auto pretty_evaluate_callback(std::ostringstream &,
+                              const sourcemeta::jsontoolkit::Pointer &)
     -> sourcemeta::jsontoolkit::SchemaCompilerEvaluationCallback;
 
 auto resolver(const std::map<std::string, std::vector<std::string>> &options,
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 4bc35a44..bc3ec1fc 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -71,6 +71,8 @@ add_jsonschema_test_unix(test/fail_false_single_resolve_verbose)
 add_jsonschema_test_unix(test/fail_multi_resolve)
 add_jsonschema_test_unix(test/fail_multi_resolve_verbose)
 add_jsonschema_test_unix(test/fail_unresolvable)
+add_jsonschema_test_unix(test/fail_unresolvable_fragment)
+add_jsonschema_test_unix(test/fail_unresolvable_anchor)
 add_jsonschema_test_unix(test/fail_unsupported)
 add_jsonschema_test_unix(test/fail_unsupported_verbose)
 add_jsonschema_test_unix(test/fail_not_object)
@@ -83,10 +85,13 @@ add_jsonschema_test_unix(test/fail_test_case_no_data)
 add_jsonschema_test_unix(test/fail_test_case_non_string_description)
 add_jsonschema_test_unix(test/fail_test_case_no_valid)
 add_jsonschema_test_unix(test/fail_test_case_non_boolean_valid)
+add_jsonschema_test_unix(test/fail_true_resolve_fragment)
 add_jsonschema_test_unix(test/pass_empty)
 add_jsonschema_test_unix(test/pass_empty_verbose)
 add_jsonschema_test_unix(test/pass_single_resolve)
 add_jsonschema_test_unix(test/pass_single_resolve_verbose)
+add_jsonschema_test_unix(test/pass_single_resolve_fragment)
+add_jsonschema_test_unix(test/pass_single_resolve_fragment_verbose)
 add_jsonschema_test_unix(test/pass_single_comment_verbose)
 add_jsonschema_test_unix(test/pass_single_no_description_verbose)
 add_jsonschema_test_unix(test/pass_single_no_test_description_verbose)
diff --git a/test/test/fail_true_resolve_fragment.sh b/test/test/fail_true_resolve_fragment.sh
new file mode 100755
index 00000000..9cb3dc1f
--- /dev/null
+++ b/test/test/fail_true_resolve_fragment.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+
+set -o errexit
+set -o nounset
+
+TMP="$(mktemp -d)"
+clean() { rm -rf "$TMP"; }
+trap clean EXIT
+
+cat << 'EOF' > "$TMP/schema.json"
+{
+  "id": "https://example.com",
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "definitions": {
+    "foo": { "type": "string" },
+    "bar": { "type": "integer" }
+  }
+}
+EOF
+
+cat << 'EOF' > "$TMP/test.json"
+{
+  "target": "https://example.com#/definitions/foo",
+  "tests": [
+    {
+      "description": "Fail",
+      "valid": true,
+      "data": 5
+    }
+  ]
+}
+EOF
+
+"$1" test "$TMP/test.json" --resolve "$TMP/schema.json" 1> "$TMP/output.txt" 2>&1 \
+  && CODE="$?" || CODE="$?"
+test "$CODE" = "1" || exit 1
+
+cat << EOF > "$TMP/expected.txt"
+$(realpath "$TMP")/test.json:
+  1/1 FAIL Fail
+
+error: Schema validation failure
+  The target document is expected to be of the given type
+    at instance location ""
+    at evaluate path "/type"
+  Mark the current position of the evaluation process for future jumps
+    at instance location ""
+    at evaluate path ""
+EOF
+
+diff "$TMP/output.txt" "$TMP/expected.txt"
diff --git a/test/test/fail_unresolvable_anchor.sh b/test/test/fail_unresolvable_anchor.sh
new file mode 100755
index 00000000..afad9c31
--- /dev/null
+++ b/test/test/fail_unresolvable_anchor.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+
+set -o errexit
+set -o nounset
+
+TMP="$(mktemp -d)"
+clean() { rm -rf "$TMP"; }
+trap clean EXIT
+
+cat << 'EOF' > "$TMP/schema.json"
+{
+  "$id": "https://example.com",
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "definitions": {
+    "foo": { "type": "string" },
+    "bar": { "type": "integer" }
+  }
+}
+EOF
+
+cat << 'EOF' > "$TMP/test.json"
+{
+  "target": "https://example.com#foo",
+  "tests": [
+    {
+      "valid": true,
+      "data": {}
+    },
+    {
+      "valid": true,
+      "data": { "type": 1 }
+    }
+  ]
+}
+EOF
+
+"$1" test "$TMP/test.json" --resolve "$TMP/schema.json" --verbose 1> "$TMP/output.txt" 2>&1 \
+  && CODE="$?" || CODE="$?"
+test "$CODE" = "1" || exit 1
+
+cat << EOF > "$TMP/expected.txt"
+Importing schema into the resolution context: $(realpath "$TMP")/schema.json
+$(realpath "$TMP")/test.json:
+error: Could not resolve schema under test
+  at https://example.com#foo
+EOF
+
+diff "$TMP/output.txt" "$TMP/expected.txt"
diff --git a/test/test/fail_unresolvable_fragment.sh b/test/test/fail_unresolvable_fragment.sh
new file mode 100755
index 00000000..76606439
--- /dev/null
+++ b/test/test/fail_unresolvable_fragment.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+
+set -o errexit
+set -o nounset
+
+TMP="$(mktemp -d)"
+clean() { rm -rf "$TMP"; }
+trap clean EXIT
+
+cat << 'EOF' > "$TMP/schema.json"
+{
+  "id": "https://example.com",
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "definitions": {
+    "foo": { "type": "string" },
+    "bar": { "type": "integer" }
+  }
+}
+EOF
+
+cat << 'EOF' > "$TMP/test.json"
+{
+  "target": "https://example.com#/foo",
+  "tests": [
+    {
+      "valid": true,
+      "data": {}
+    },
+    {
+      "valid": true,
+      "data": { "type": 1 }
+    }
+  ]
+}
+EOF
+
+"$1" test "$TMP/test.json" --resolve "$TMP/schema.json" --verbose 1> "$TMP/output.txt" 2>&1 \
+  && CODE="$?" || CODE="$?"
+test "$CODE" = "1" || exit 1
+
+cat << EOF > "$TMP/expected.txt"
+Importing schema into the resolution context: $(realpath "$TMP")/schema.json
+$(realpath "$TMP")/test.json:
+error: Could not resolve schema under test
+  at https://example.com#/foo
+EOF
+
+diff "$TMP/output.txt" "$TMP/expected.txt"
diff --git a/test/test/pass_single_resolve_fragment.sh b/test/test/pass_single_resolve_fragment.sh
new file mode 100755
index 00000000..a3b32be4
--- /dev/null
+++ b/test/test/pass_single_resolve_fragment.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+set -o errexit
+set -o nounset
+
+TMP="$(mktemp -d)"
+clean() { rm -rf "$TMP"; }
+trap clean EXIT
+
+cat << 'EOF' > "$TMP/schema.json"
+{
+  "id": "https://example.com",
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "definitions": {
+    "foo": { "type": "string" },
+    "bar": { "type": "integer" }
+  }
+}
+EOF
+
+cat << 'EOF' > "$TMP/test.json"
+{
+  "target": "https://example.com#/definitions/foo",
+  "tests": [
+    {
+      "description": "First test",
+      "valid": true,
+      "data": "foo"
+    },
+    {
+      "description": "Invalid type",
+      "valid": false,
+      "data": 1
+    }
+  ]
+}
+EOF
+
+"$1" test "$TMP/test.json" --resolve "$TMP/schema.json" 1> "$TMP/output.txt" 2>&1
+
+cat << EOF > "$TMP/expected.txt"
+$(realpath "$TMP")/test.json: PASS 2/2
+EOF
+
+diff "$TMP/output.txt" "$TMP/expected.txt"
diff --git a/test/test/pass_single_resolve_fragment_verbose.sh b/test/test/pass_single_resolve_fragment_verbose.sh
new file mode 100755
index 00000000..07ac9f63
--- /dev/null
+++ b/test/test/pass_single_resolve_fragment_verbose.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+
+set -o errexit
+set -o nounset
+
+TMP="$(mktemp -d)"
+clean() { rm -rf "$TMP"; }
+trap clean EXIT
+
+cat << 'EOF' > "$TMP/schema.json"
+{
+  "id": "https://example.com",
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "definitions": {
+    "foo": { "type": "string" },
+    "bar": { "type": "integer" }
+  }
+}
+EOF
+
+cat << 'EOF' > "$TMP/test.json"
+{
+  "target": "https://example.com#/definitions/foo",
+  "tests": [
+    {
+      "description": "First test",
+      "valid": true,
+      "data": "foo"
+    },
+    {
+      "description": "Invalid type",
+      "valid": false,
+      "data": 1
+    }
+  ]
+}
+EOF
+
+"$1" test "$TMP/test.json" --resolve "$TMP/schema.json" --verbose 1> "$TMP/output.txt" 2>&1
+
+cat << EOF > "$TMP/expected.txt"
+Importing schema into the resolution context: $(realpath "$TMP")/schema.json
+$(realpath "$TMP")/test.json:
+  1/2 PASS First test
+  2/2 PASS Invalid type
+EOF
+
+diff "$TMP/output.txt" "$TMP/expected.txt"