diff --git a/DEPENDENCIES b/DEPENDENCIES index 353b3e8d..529ffd55 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,4 +1,4 @@ vendorpull https://github.com/sourcemeta/vendorpull dea311b5bfb53b6926a4140267959ae334d3ecf4 noa https://github.com/sourcemeta/noa 7e26abce7a4e31e86a16ef2851702a56773ca527 -jsontoolkit https://github.com/sourcemeta/jsontoolkit 8e4d59fb0d75351175337bdcff7fe6caf4fe7096 +jsontoolkit https://github.com/sourcemeta/jsontoolkit a3765c8038ba4271e55318a677f6366bdaa7b805 hydra https://github.com/sourcemeta/hydra 3c53d3fdef79e9ba603d48470a508cc45472a0dc diff --git a/test/compile/pass.sh b/test/compile/pass.sh index d471ad9a..bce11b0d 100755 --- a/test/compile/pass.sh +++ b/test/compile/pass.sh @@ -32,26 +32,7 @@ cat << 'EOF' > "$TMP/expected.json" "type": "instance", "location": "" }, - "condition": [ - { - "category": "assertion", - "type": "type-strict", - "value": { - "category": "value", - "type": "type", - "value": "object" - }, - "absoluteKeywordLocation": "#/properties", - "relativeSchemaLocation": "", - "relativeInstanceLocation": "", - "target": { - "category": "target", - "type": "instance", - "location": "" - }, - "condition": [] - } - ], + "condition": [], "children": [ { "category": "internal", diff --git a/vendor/jsontoolkit/src/jsonschema/compile.cc b/vendor/jsontoolkit/src/jsonschema/compile.cc index 7003f019..d2c11ee1 100644 --- a/vendor/jsontoolkit/src/jsonschema/compile.cc +++ b/vendor/jsontoolkit/src/jsonschema/compile.cc @@ -80,7 +80,10 @@ auto compile(const JSON &schema, const SchemaWalker &walker, .wait(); const std::string base{ - URI{sourcemeta::jsontoolkit::id(schema, resolver, default_dialect) + URI{sourcemeta::jsontoolkit::id( + schema, resolver, + sourcemeta::jsontoolkit::IdentificationStrategy::Strict, + default_dialect) .get() .value_or("")} .canonicalize() diff --git a/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc b/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc index cb45d53b..942f3cf0 100644 --- a/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc +++ b/vendor/jsontoolkit/src/jsonschema/compile_evaluate.cc @@ -146,7 +146,7 @@ class EvaluationContext { assert(!this->instance_location(target).empty()); assert(this->instance_location(target).back().is_property()); return this->value( - JSON{this->instance_location(target).back().to_property()}); + this->instance_location(target).back().to_property()); } assert(this->target_type() == TargetType::Value); diff --git a/vendor/jsontoolkit/src/jsonschema/default_compiler_draft4.h b/vendor/jsontoolkit/src/jsonschema/default_compiler_draft4.h index dc9476c6..b7c43751 100644 --- a/vendor/jsontoolkit/src/jsonschema/default_compiler_draft4.h +++ b/vendor/jsontoolkit/src/jsonschema/default_compiler_draft4.h @@ -297,7 +297,7 @@ auto compiler_draft4_applicator_properties( return {make( schema_context, dynamic_context, SchemaCompilerValueNone{}, - std::move(children), type_condition(schema_context, JSON::Type::Object))}; + std::move(children), SchemaCompilerTemplate{})}; } auto compiler_draft4_applicator_patternproperties( diff --git a/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema.h b/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema.h index 218b0435..f0ccace9 100644 --- a/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema.h +++ b/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema.h @@ -69,6 +69,16 @@ namespace sourcemeta::jsontoolkit { SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT auto is_schema(const JSON &schema) -> bool; +/// @ingroup jsonschema +/// The strategy to follow when attempting to identify a schema +enum class IdentificationStrategy { + /// Only proceed if we can guarantee the identifier is valid + Strict, + + /// Attempt to guess even if we don't know the base dialect + Loose +}; + /// @ingroup jsonschema /// /// This function returns the URI identifier of the given schema, if any. For @@ -90,8 +100,13 @@ auto is_schema(const JSON &schema) -> bool; /// assert(id.has_value()); /// assert(id.value() == "https://sourcemeta.com/example-schema"); /// ``` +/// +/// You can opt-in to a loose identification strategy to attempt to play a +/// guessing game. Often useful if you have a schema without a dialect and you +/// want to at least try to get something. SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT auto id(const JSON &schema, const SchemaResolver &resolver, + const IdentificationStrategy strategy = IdentificationStrategy::Strict, const std::optional &default_dialect = std::nullopt, const std::optional &default_id = std::nullopt) -> std::future>; diff --git a/vendor/jsontoolkit/src/jsonschema/jsonschema.cc b/vendor/jsontoolkit/src/jsonschema/jsonschema.cc index a191ef8b..9b5bb8d2 100644 --- a/vendor/jsontoolkit/src/jsonschema/jsonschema.cc +++ b/vendor/jsontoolkit/src/jsonschema/jsonschema.cc @@ -15,15 +15,55 @@ auto sourcemeta::jsontoolkit::is_schema( return schema.is_object() || schema.is_boolean(); } +static auto guess_identifier(const sourcemeta::jsontoolkit::JSON &schema) + -> std::optional { + if (schema.defines("$id") && schema.at("$id").is_string()) { + if (!schema.defines("id") || + (schema.defines("id") && (!schema.at("id").is_string() || + schema.at("$id") == schema.at("id")))) { + return schema.at("$id").to_string(); + } + } else if (schema.defines("id") && schema.at("id").is_string()) { + return schema.at("id").to_string(); + } + + return std::nullopt; +} + auto sourcemeta::jsontoolkit::id( const sourcemeta::jsontoolkit::JSON &schema, const SchemaResolver &resolver, + const IdentificationStrategy strategy, const std::optional &default_dialect, const std::optional &default_id) -> std::future> { - const std::optional maybe_base_dialect{ - sourcemeta::jsontoolkit::base_dialect(schema, resolver, default_dialect) - .get()}; + std::optional maybe_base_dialect; + + // TODO: Can we avoid a C++ exception as the potential normal way of + // operation? + try { + maybe_base_dialect = + sourcemeta::jsontoolkit::base_dialect(schema, resolver, default_dialect) + .get(); + } catch (const SchemaResolutionError &) { + // Attempt to play a heuristic guessing game before giving up + if (strategy == IdentificationStrategy::Loose && schema.is_object()) { + std::promise> promise; + promise.set_value(guess_identifier(schema)); + return promise.get_future(); + } + + throw; + } + if (!maybe_base_dialect.has_value()) { + + // Attempt to play a heuristic guessing game before giving up + if (strategy == IdentificationStrategy::Loose && schema.is_object()) { + std::promise> promise; + promise.set_value(guess_identifier(schema)); + return promise.get_future(); + } + std::promise> promise; promise.set_value(default_id); return promise.get_future(); diff --git a/vendor/jsontoolkit/src/uri/include/sourcemeta/jsontoolkit/uri.h b/vendor/jsontoolkit/src/uri/include/sourcemeta/jsontoolkit/uri.h index ca31b15d..19dd1f2b 100644 --- a/vendor/jsontoolkit/src/uri/include/sourcemeta/jsontoolkit/uri.h +++ b/vendor/jsontoolkit/src/uri/include/sourcemeta/jsontoolkit/uri.h @@ -14,8 +14,10 @@ #include // std::unique_ptr #include // std::optional #include // std::ostream +#include // std::span #include // std::string #include // std::string_view +#include // std::vector /// @defgroup uri URI /// @brief A RFC 3986 URI implementation based on `uriparser`. @@ -308,6 +310,9 @@ class SOURCEMETA_JSONTOOLKIT_URI_EXPORT URI { [[nodiscard]] auto userinfo() const -> std::optional; private: + bool parsed = false; + auto parse() -> void; + // Exporting symbols that depends on the standard C++ library is considered // safe. // https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN @@ -318,6 +323,15 @@ class SOURCEMETA_JSONTOOLKIT_URI_EXPORT URI { // points to fragments of it. // We keep this as const as this class is immutable std::string data; + + std::optional path_; + std::optional userinfo_; + std::optional host_; + std::optional port_; + std::optional scheme_; + std::optional fragment_; + std::optional query_; + // Use PIMPL idiom to hide `urlparser` struct Internal; std::unique_ptr internal; diff --git a/vendor/jsontoolkit/src/uri/uri.cc b/vendor/jsontoolkit/src/uri/uri.cc index 0bbb3ed5..7b19900d 100644 --- a/vendor/jsontoolkit/src/uri/uri.cc +++ b/vendor/jsontoolkit/src/uri/uri.cc @@ -4,10 +4,12 @@ #include // assert #include // std::uint32_t #include // std::istream +#include // std::optional #include // std::ostringstream #include // std::length_error, std::runtime_error #include // std::stoul, std::string, std::tolower #include // std::move +#include // std::vector static auto uri_normalize(UriUriA *uri) -> void { if (uriNormalizeSyntaxA(uri) != URI_SUCCESS) { @@ -63,6 +65,45 @@ static auto uri_parse(const std::string &data, UriUriA *uri) -> void { uri_normalize(uri); } +static auto +canonicalize_path(const std::string &path) -> std::optional { + std::vector segments; + std::string segment; + + if (path.empty()) { + return std::nullopt; + } + + bool has_leading_with_word = path.front() != '/' && path.front() != '.'; + for (unsigned int i = has_leading_with_word ? 0 : 1; i <= path.size(); i++) { + char c = path[i]; + if (c == '/' || i == path.size()) { + if (segment == "..") { + if (!segments.empty()) { + segments.pop_back(); + } + } else if (segment != "." && !segment.empty()) { + segments.push_back(segment); + } + segment.clear(); + } else { + segment += c; + } + } + + // Reconstruct the canonical path + std::string canonical_path; + std::string separator = ""; + for (const auto &seg : segments) { + canonical_path += separator + seg; + separator = "/"; + } + + if (canonical_path.empty()) + return std::nullopt; + return canonical_path; +} + namespace sourcemeta::jsontoolkit { struct URI::Internal { @@ -70,25 +111,95 @@ struct URI::Internal { }; URI::URI(std::string input) : data{std::move(input)}, internal{new Internal} { - uri_parse(this->data, &this->internal->uri); + this->parse(); } URI::URI(std::istream &input) : internal{new Internal} { std::ostringstream output; output << input.rdbuf(); this->data = output.str(); - uri_parse(this->data, &this->internal->uri); + this->parse(); } URI::~URI() { uriFreeUriMembersA(&this->internal->uri); } +// TODO: Test the copy constructor URI::URI(const URI &other) : URI{other.recompose()} {} URI::URI(URI &&other) : data{std::move(other.data)}, internal{std::move(other.internal)} { + this->parsed = other.parsed; + this->path_ = std::move(other.path_); + this->scheme_ = std::move(other.scheme_); + this->userinfo_ = std::move(other.userinfo_); + this->host_ = std::move(other.host_); + this->port_ = std::move(other.port_); + this->fragment_ = std::move(other.fragment_); + this->query_ = std::move(other.query_); + other.internal = nullptr; } +auto URI::parse() -> void { + if (this->parsed) { + // clean + this->path_ = std::nullopt; + this->scheme_ = std::nullopt; + this->userinfo_ = std::nullopt; + this->host_ = std::nullopt; + this->port_ = std::nullopt; + this->fragment_ = std::nullopt; + this->query_ = std::nullopt; + this->parsed = false; + uriFreeUriMembersA(&this->internal->uri); + } + + uri_parse(this->data, &this->internal->uri); + + this->scheme_ = uri_text_range(&this->internal->uri.scheme); + this->scheme_ = uri_text_range(&this->internal->uri.scheme); + this->userinfo_ = uri_text_range(&this->internal->uri.userInfo); + this->host_ = uri_text_range(&this->internal->uri.hostText); + this->fragment_ = uri_text_range(&this->internal->uri.fragment); + this->query_ = uri_text_range(&this->internal->uri.query); + const auto port_text{uri_text_range(&this->internal->uri.portText)}; + if (!port_text.has_value()) { + this->port_ = std::nullopt; + } else { + this->port_ = std::stoul(std::string{port_text.value()}); + } + + const UriPathSegmentA *segment{this->internal->uri.pathHead}; + if (segment != nullptr) { + std::ostringstream path; + // URNs and tags have a single path segment by definition + if (this->is_urn() || this->is_tag()) { + const auto part{uri_text_range(&segment->text)}; + assert(part.has_value()); + path << part.value(); + } else { + bool first{true}; + while (segment) { + const auto part{uri_text_range(&segment->text)}; + assert(part.has_value()); + const auto value{part.value()}; + + if (first) { + path << value; + } else { + path << "/" << value; + } + + segment = segment->next; + first = false; + } + } + this->path_ = path.str(); + } + + this->parsed = true; +} + auto URI::is_absolute() const noexcept -> bool { // An absolute URI always contains a scheme component, return this->internal->uri.scheme.first != nullptr; @@ -111,57 +222,56 @@ auto URI::is_fragment_only() const -> bool { } auto URI::scheme() const -> std::optional { - return uri_text_range(&this->internal->uri.scheme); + return this->scheme_; } auto URI::host() const -> std::optional { - return uri_text_range(&this->internal->uri.hostText); + return this->host_; } -auto URI::port() const -> std::optional { - const auto port_text{uri_text_range(&this->internal->uri.portText)}; - if (!port_text.has_value()) { - return std::nullopt; - } - - return std::stoul(std::string{port_text.value()}); -} +auto URI::port() const -> std::optional { return this->port_; } auto URI::path() const -> std::optional { - const UriPathSegmentA *segment{this->internal->uri.pathHead}; - if (!segment) { + if (!this->path_.has_value()) { return std::nullopt; } - // URNs and tags have a single path segment by definition - if (this->is_urn() || this->is_tag()) { - const auto part{uri_text_range(&segment->text)}; - assert(part.has_value()); - return std::string{part.value()}; + if (!this->is_urn() && !this->is_tag() && this->scheme().has_value()) { + return "/" + this->path_.value(); } - std::ostringstream result; - while (segment) { - const auto part{uri_text_range(&segment->text)}; - assert(part.has_value()); - result << '/'; - result << part.value(); - segment = segment->next; + size_t path_pos = this->data.find(this->path_.value()); + if (path_pos != std::string::npos && path_pos > 0 && + this->data[path_pos - 1] == '/') { + return "/" + this->path_.value(); } - return result.str(); + return path_; } auto URI::fragment() const -> std::optional { - return uri_text_range(&this->internal->uri.fragment); + return this->fragment_; } auto URI::query() const -> std::optional { - return uri_text_range(&this->internal->uri.query); + return this->query_; } auto URI::recompose() const -> std::string { - return uri_to_string(&this->internal->uri); + std::ostringstream result; + + const auto uri = this->recompose_without_fragment(); + if (uri.has_value()) { + result << uri.value(); + } + + // Fragment + const auto result_fragment{this->fragment()}; + if (result_fragment.has_value()) { + result << '#' << result_fragment.value(); + } + + return result.str(); } auto URI::recompose_without_fragment() const -> std::optional { @@ -210,28 +320,42 @@ auto URI::recompose_without_fragment() const -> std::optional { } auto URI::canonicalize() -> URI & { - std::ostringstream result; - // Scheme const auto result_scheme{this->scheme()}; if (result_scheme.has_value()) { + std::ostringstream lowercased_scheme; for (const auto character : result_scheme.value()) { - result << static_cast(std::tolower(character)); - } - - if (this->is_urn() || this->is_tag()) { - result << ":"; - } else { - result << "://"; + lowercased_scheme << static_cast(std::tolower(character)); } + this->scheme_ = lowercased_scheme.str(); } // Host const auto result_host{this->host()}; if (result_host.has_value()) { + std::ostringstream lowercased_host; for (const auto character : result_host.value()) { - result << static_cast(std::tolower(character)); + lowercased_host << static_cast(std::tolower(character)); } + this->host_ = lowercased_host.str(); + } + + // Clean Path form ".." and "." + const auto result_path{this->path()}; + if (result_path.has_value()) { + const auto canonical_path{canonicalize_path(result_path.value())}; + if (canonical_path.has_value()) { + this->path_ = canonical_path.value(); + } + } + + // Fragment + // The empty fragment is optional + const auto result_fragment{this->fragment()}; + if (result_fragment.has_value() && !result_fragment.value().empty()) { + this->fragment_ = result_fragment.value(); + } else { + this->fragment_ = std::nullopt; } // Port @@ -245,31 +369,12 @@ auto URI::canonicalize() -> URI & { result_port.value() == 443}; if (!is_default_http_port && !is_default_https_port) { - result << ':' << result_port.value(); + this->port_ = result_port.value(); + } else { + this->port_ = std::nullopt; } } - // Path - const auto result_path{this->path()}; - if (result_path.has_value()) { - result << result_path.value(); - } - - // Query - const auto result_query{this->query()}; - if (result_query.has_value()) { - result << '?' << result_query.value(); - } - - // Fragment - const auto result_fragment{this->fragment()}; - if (result_fragment.has_value() && !result_fragment.value().empty()) { - result << '#' << result_fragment.value(); - } - - this->data = result.str(); - uriFreeUriMembersA(&this->internal->uri); - uri_parse(this->data, &this->internal->uri); return *this; } @@ -295,8 +400,7 @@ auto URI::resolve_from(const URI &base) -> URI & { uri_normalize(&absoluteDest); this->data = uri_to_string(&absoluteDest); uriFreeUriMembersA(&absoluteDest); - uriFreeUriMembersA(&this->internal->uri); - uri_parse(this->data, &this->internal->uri); + this->parse(); return *this; } catch (...) { uriFreeUriMembersA(&absoluteDest); @@ -320,7 +424,7 @@ auto URI::resolve_from_if_absolute(const URI &base) -> URI & { } auto URI::userinfo() const -> std::optional { - return uri_text_range(&this->internal->uri.userInfo); + return this->userinfo_; } } // namespace sourcemeta::jsontoolkit