Skip to content

Commit

Permalink
Correct schema comparison when posting to subject
Browse files Browse the repository at this point in the history
After refactoring schema storage to use non-parsed versions an
error was introduced to schema comparison when posting to subject.
The original Avro schema string can differ from the parsed schema, e.g.
names are not necessary for comparison. Example below:

Stored schema string: {"type":"int","name":"example_name"}
-> parsed schema: "int"

New schema posted to subject: {"type":"int"}
-> parsed new schema: "int"
  • Loading branch information
jjaakola-aiven committed Apr 20, 2022
1 parent 27f10b9 commit 864875b
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 4 deletions.
11 changes: 7 additions & 4 deletions karapace/schema_registry_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,19 +741,22 @@ async def subjects_schema_post(self, content_type, *, subject, request):
status=HTTPStatus.INTERNAL_SERVER_ERROR,
)
for schema in subject_data["schemas"].values():
typed_schema = schema["schema"]
if typed_schema == new_schema:
validated_typed_schema = ValidatedTypedSchema.parse(schema["schema"].schema_type, schema["schema"].schema_str)
if (
validated_typed_schema.schema_type == new_schema.schema_type
and validated_typed_schema.schema == new_schema.schema
):
ret = {
"subject": subject,
"version": schema["version"],
"id": schema["id"],
"schema": typed_schema.schema_str,
"schema": validated_typed_schema.schema_str,
}
if schema_type is not SchemaType.AVRO:
ret["schemaType"] = schema_type
self.r(ret, content_type)
else:
self.log.debug("Schema %r did not match %r", schema, typed_schema)
self.log.debug("Schema %r did not match %r", schema, validated_typed_schema)
self.r(
body={
"error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value,
Expand Down
35 changes: 35 additions & 0 deletions tests/integration/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1526,6 +1526,41 @@ async def test_schema_same_subject(registry_async_client: Client, trail: str) ->
assert json == {"id": schema_id, "subject": subject, "schema": ujson.loads(schema_str), "version": 1}


async def test_schema_same_subject_unnamed(registry_async_client: Client) -> None:
"""
The same schema JSON should be returned when checking the same schema str against the same subject
"""
subject_name_factory = create_subject_name_factory("test_schema_same_subject_unnamed")
schema_name = create_schema_name_factory("test_schema_same_subject_unnamed")()

schema_str = ujson.dumps(
{
"type": "int",
"name": schema_name,
}
)
subject = subject_name_factory()
res = await registry_async_client.post(
f"subjects/{subject}/versions",
json={"schema": schema_str},
)
assert res.status_code == 200
schema_id = res.json()["id"]

unnamed_schema_str = ujson.dumps({"type": "int"})

res = await registry_async_client.post(
f"subjects/{subject}",
json={"schema": unnamed_schema_str},
)
assert res.status_code == 200

# Switch the str schema to a dict for comparison
json = res.json()
json["schema"] = ujson.loads(json["schema"])
assert json == {"id": schema_id, "subject": subject, "schema": ujson.loads(schema_str), "version": 1}


@pytest.mark.parametrize("trail", ["", "/"])
async def test_schema_version_number_existing_schema(registry_async_client: Client, trail: str) -> None:
"""
Expand Down

0 comments on commit 864875b

Please sign in to comment.