From 864875b71c05ab4eed34f76805acdf062ea7d275 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Wed, 20 Apr 2022 13:36:09 +0300 Subject: [PATCH] Correct schema comparison when posting to subject After refactoring schema storage to use non-parsed versions an error was introduced to schema comparison when posting to subject. The original Avro schema string can differ from the parsed schema, e.g. names are not necessary for comparison. Example below: Stored schema string: {"type":"int","name":"example_name"} -> parsed schema: "int" New schema posted to subject: {"type":"int"} -> parsed new schema: "int" --- karapace/schema_registry_apis.py | 11 ++++++---- tests/integration/test_schema.py | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index 5079c4976..936125028 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -741,19 +741,22 @@ async def subjects_schema_post(self, content_type, *, subject, request): status=HTTPStatus.INTERNAL_SERVER_ERROR, ) for schema in subject_data["schemas"].values(): - typed_schema = schema["schema"] - if typed_schema == new_schema: + validated_typed_schema = ValidatedTypedSchema.parse(schema["schema"].schema_type, schema["schema"].schema_str) + if ( + validated_typed_schema.schema_type == new_schema.schema_type + and validated_typed_schema.schema == new_schema.schema + ): ret = { "subject": subject, "version": schema["version"], "id": schema["id"], - "schema": typed_schema.schema_str, + "schema": validated_typed_schema.schema_str, } if schema_type is not SchemaType.AVRO: ret["schemaType"] = schema_type self.r(ret, content_type) else: - self.log.debug("Schema %r did not match %r", schema, typed_schema) + self.log.debug("Schema %r did not match %r", schema, validated_typed_schema) self.r( body={ "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value, diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index 477cd93f5..3b5ee370a 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -1526,6 +1526,41 @@ async def test_schema_same_subject(registry_async_client: Client, trail: str) -> assert json == {"id": schema_id, "subject": subject, "schema": ujson.loads(schema_str), "version": 1} +async def test_schema_same_subject_unnamed(registry_async_client: Client) -> None: + """ + The same schema JSON should be returned when checking the same schema str against the same subject + """ + subject_name_factory = create_subject_name_factory("test_schema_same_subject_unnamed") + schema_name = create_schema_name_factory("test_schema_same_subject_unnamed")() + + schema_str = ujson.dumps( + { + "type": "int", + "name": schema_name, + } + ) + subject = subject_name_factory() + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schema": schema_str}, + ) + assert res.status_code == 200 + schema_id = res.json()["id"] + + unnamed_schema_str = ujson.dumps({"type": "int"}) + + res = await registry_async_client.post( + f"subjects/{subject}", + json={"schema": unnamed_schema_str}, + ) + assert res.status_code == 200 + + # Switch the str schema to a dict for comparison + json = res.json() + json["schema"] = ujson.loads(json["schema"]) + assert json == {"id": schema_id, "subject": subject, "schema": ujson.loads(schema_str), "version": 1} + + @pytest.mark.parametrize("trail", ["", "/"]) async def test_schema_version_number_existing_schema(registry_async_client: Client, trail: str) -> None: """