From 6f2c8e8bd43c8b3ca5e00667460193949454d609 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Wed, 26 Jun 2024 01:46:35 +0100 Subject: [PATCH] `json`: better support for "type" unions (e.g. nullable arrays w/ typed items) (#7863) * json: better suport for "type" arrays (e.g. `{"type": ["array", "null"], "items": {"type": "string"}}`) * json: add test for type: [array, null] fix * update tests --- common/json-schema-to-grammar.cpp | 4 ++- examples/json_schema_to_grammar.py | 2 +- .../server/public/json-schema-to-grammar.mjs | 2 +- tests/test-grammar-integration.cpp | 25 +++++++++++++++ tests/test-json-schema-to-grammar.cpp | 32 +++++++++++++++++++ 5 files changed, 62 insertions(+), 3 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index b40821dadc05e7..2f233e2e7477f7 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -893,7 +893,9 @@ class SchemaConverter { } else if (schema_type.is_array()) { std::vector schema_types; for (const auto & t : schema_type) { - schema_types.push_back({{"type", t}}); + json schema_copy(schema); + schema_copy["type"] = t; + schema_types.push_back(schema_copy); } return _add_rule(rule_name, _generate_union_rule(name, schema_types)); } else if (schema.contains("const")) { diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 3f3132f88a824d..92f6e3d47bae7e 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -565,7 +565,7 @@ def visit(self, schema, name): return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf'])) elif isinstance(schema_type, list): - return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type])) + return self._add_rule(rule_name, self._generate_union_rule(name, [{**schema, 'type': t} for t in schema_type])) elif 'const' in schema: return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space') diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index 02015bbd49015d..06d76edde500ab 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -616,7 +616,7 @@ export class SchemaConverter { } else if (schema.oneOf || schema.anyOf) { return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf)); } else if (Array.isArray(schemaType)) { - return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t })))); + return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({...schema, type: t})))); } else if ('const' in schema) { return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space'); } else if ('enum' in schema) { diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 23ef8324c1327b..0e21dc79599437 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -993,6 +993,31 @@ static void test_json_schema() { } ); + test_schema( + "", + // Schema + R"""( + { + "type": ["array", "null"], + "items": { "type": "string" } + } + )""", + // Passing strings + { + "null", + "[]", + "[\"123\"]", + "[\"foo\", \"bar\"]", + }, + // Failing strings + { + "", + "[123]", + "\"foo\"", + "[\"foo\", 42]", + } + ); + test_schema( "min+max items", // Schema diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 1e69cb6ef36bee..3aaa11833de574 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -502,6 +502,38 @@ static void test_all(const std::string & lang, std::function