diff --git a/.changelog/1738663351.md b/.changelog/1738663351.md index f7eb6719dd..f496e9f74a 100644 --- a/.changelog/1738663351.md +++ b/.changelog/1738663351.md @@ -8,4 +8,4 @@ breaking: false new_feature: true bug_fix: false --- -Allow invalid UTF-8 characters to be replaced with � +Enhanced UTF-8 handling: When `replaceInvalidUtf8` codegen flag is enabled, invalid UTF-8 sequences are now automatically replaced with the Unicode replacement character (U+FFFD) instead of causing errors diff --git a/codegen-core/src/main/kotlin/software/amazon/smithy/rust/codegen/core/testutil/CodegenIntegrationTest.kt b/codegen-core/src/main/kotlin/software/amazon/smithy/rust/codegen/core/testutil/CodegenIntegrationTest.kt index f8325ec5d3..282c663482 100644 --- a/codegen-core/src/main/kotlin/software/amazon/smithy/rust/codegen/core/testutil/CodegenIntegrationTest.kt +++ b/codegen-core/src/main/kotlin/software/amazon/smithy/rust/codegen/core/testutil/CodegenIntegrationTest.kt @@ -5,10 +5,19 @@ package software.amazon.smithy.rust.codegen.core.testutil +import software.amazon.smithy.aws.traits.protocols.AwsJson1_0Trait +import software.amazon.smithy.aws.traits.protocols.AwsJson1_1Trait +import software.amazon.smithy.aws.traits.protocols.RestJson1Trait +import software.amazon.smithy.aws.traits.protocols.RestXmlTrait import software.amazon.smithy.build.PluginContext import software.amazon.smithy.model.Model import software.amazon.smithy.model.node.ObjectNode import software.amazon.smithy.model.node.ToNode +import software.amazon.smithy.model.shapes.ServiceShape +import software.amazon.smithy.model.shapes.ShapeId +import software.amazon.smithy.model.traits.AbstractTrait +import software.amazon.smithy.model.transform.ModelTransformer +import software.amazon.smithy.protocol.traits.Rpcv2CborTrait import software.amazon.smithy.rust.codegen.core.smithy.RuntimeConfig import software.amazon.smithy.rust.codegen.core.util.runCommand import java.io.File @@ -153,3 +162,128 @@ fun codegenIntegrationTest( logger.fine(out.toString()) return testDir } + +/** + * Metadata associated with a protocol that provides additional information needed for testing. + * + * @property protocol The protocol enum value this metadata is associated with + * @property contentType The HTTP Content-Type header value associated with this protocol. + */ +data class ProtocolMetadata( + val protocol: ModelProtocol, + val contentType: String, +) + +/** + * Represents the supported protocol traits in Smithy models. + * + * @property trait The Smithy trait instance with which the service shape must be annotated. + */ +enum class ModelProtocol(val trait: AbstractTrait) { + AwsJson10(AwsJson1_0Trait.builder().build()), + AwsJson11(AwsJson1_1Trait.builder().build()), + RestJson(RestJson1Trait.builder().build()), + RestXml(RestXmlTrait.builder().build()), + Rpcv2Cbor(Rpcv2CborTrait.builder().build()), + ; + + // Create metadata after enum is initialized + val metadata: ProtocolMetadata by lazy { + when (this) { + AwsJson10 -> ProtocolMetadata(this, "application/x-amz-json-1.0") + AwsJson11 -> ProtocolMetadata(this, "application/x-amz-json-1.1") + RestJson -> ProtocolMetadata(this, "application/json") + RestXml -> ProtocolMetadata(this, "application/xml") + Rpcv2Cbor -> ProtocolMetadata(this, "application/cbor") + } + } + + companion object { + private val TRAIT_IDS = values().map { it.trait.toShapeId() }.toSet() + val ALL: Set = values().toSet() + + fun getTraitIds() = TRAIT_IDS + } +} + +/** + * Removes all existing protocol traits annotated on the given service, + * then sets the provided `protocol` as the sole protocol trait for the service. + */ +fun Model.replaceProtocolTraitOnServerShapeId( + serviceShapeId: ShapeId, + modelProtocol: ModelProtocol, +): Model { + val serviceShape = this.expectShape(serviceShapeId, ServiceShape::class.java) + return replaceProtocolTraitOnServiceShape(serviceShape, modelProtocol) +} + +/** + * Removes all existing protocol traits annotated on the given service shape, + * then sets the provided `protocol` as the sole protocol trait for the service. + */ +fun Model.replaceProtocolTraitOnServiceShape( + serviceShape: ServiceShape, + modelProtocol: ModelProtocol, +): Model { + val serviceBuilder = serviceShape.toBuilder() + ModelProtocol.getTraitIds().forEach { traitId -> + serviceBuilder.removeTrait(traitId) + } + val service = serviceBuilder.addTrait(modelProtocol.trait).build() + return ModelTransformer.create().replaceShapes(this, listOf(service)) +} + +/** + * Processes a Smithy model string by applying different protocol traits and invoking the tests block on the model. + * For each protocol, this function: + * 1. Parses the Smithy model string + * 2. Replaces any existing protocol traits on service shapes with the specified protocol + * 3. Runs the provided test with the transformed model and protocol metadata + * + * @param protocolTraitIds Set of protocols to test against + * @param test Function that receives the transformed model and protocol metadata for testing + */ +fun String.forProtocols( + protocolTraitIds: Set, + test: (Model, ProtocolMetadata) -> Unit, +) { + val baseModel = this.asSmithyModel(smithyVersion = "2") + val serviceShapes = baseModel.serviceShapes.toList() + + protocolTraitIds.forEach { protocol -> + val transformedModel = + serviceShapes.fold(baseModel) { acc, shape -> + acc.replaceProtocolTraitOnServiceShape(shape, protocol) + } + test(transformedModel, protocol.metadata) + } +} + +/** + * Convenience overload that accepts vararg protocols instead of a Set. + * + * @param protocols Variable number of protocols to test against + * @param test Function that receives the transformed model and protocol metadata for testing + * @see forProtocols + */ +fun String.forProtocols( + vararg protocols: ModelProtocol, + test: (Model, ProtocolMetadata) -> Unit, +) { + forProtocols(protocols.toSet(), test) +} + +/** + * Tests a Smithy model string against all supported protocols, with optional exclusions. + * + * @param exclude Set of protocols to exclude from testing (default is empty) + * @param test Function that receives the transformed model and protocol metadata for testing + * @see forProtocols + */ +fun String.forAllProtocols( + exclude: Set = emptySet(), + test: (Model, ProtocolMetadata) -> Unit, +) { + forProtocols(ModelProtocol.ALL - exclude, test) +} diff --git a/codegen-server/src/main/kotlin/software/amazon/smithy/rust/codegen/server/smithy/generators/protocol/ServerProtocol.kt b/codegen-server/src/main/kotlin/software/amazon/smithy/rust/codegen/server/smithy/generators/protocol/ServerProtocol.kt index 669f683a15..fadb40658d 100644 --- a/codegen-server/src/main/kotlin/software/amazon/smithy/rust/codegen/server/smithy/generators/protocol/ServerProtocol.kt +++ b/codegen-server/src/main/kotlin/software/amazon/smithy/rust/codegen/server/smithy/generators/protocol/ServerProtocol.kt @@ -135,7 +135,6 @@ fun jsonParserGenerator( httpBindingResolver: HttpBindingResolver, jsonName: (MemberShape) -> String, additionalParserCustomizations: List = listOf(), - smithyJson: RuntimeType = RuntimeType.smithyJson(codegenContext.runtimeConfig), ): JsonParserGenerator = JsonParserGenerator( codegenContext, @@ -153,7 +152,6 @@ fun jsonParserGenerator( } else { RuntimeType.smithyJson(codegenContext.runtimeConfig) }, - , ) class ServerAwsJsonProtocol( diff --git a/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/ConstraintsTest.kt b/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/ConstraintsTest.kt index 36e30230c6..ff0c990a1e 100644 --- a/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/ConstraintsTest.kt +++ b/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/ConstraintsTest.kt @@ -9,10 +9,6 @@ import io.kotest.inspectors.forAll import io.kotest.matchers.ints.shouldBeGreaterThan import io.kotest.matchers.shouldBe import org.junit.jupiter.api.Test -import software.amazon.smithy.aws.traits.protocols.AwsJson1_0Trait -import software.amazon.smithy.aws.traits.protocols.AwsJson1_1Trait -import software.amazon.smithy.aws.traits.protocols.RestJson1Trait -import software.amazon.smithy.aws.traits.protocols.RestXmlTrait import software.amazon.smithy.model.Model import software.amazon.smithy.model.shapes.BooleanShape import software.amazon.smithy.model.shapes.ListShape @@ -22,35 +18,27 @@ import software.amazon.smithy.model.shapes.ServiceShape import software.amazon.smithy.model.shapes.ShapeId import software.amazon.smithy.model.shapes.StringShape import software.amazon.smithy.model.shapes.StructureShape -import software.amazon.smithy.model.traits.AbstractTrait import software.amazon.smithy.model.transform.ModelTransformer -import software.amazon.smithy.protocol.traits.Rpcv2CborTrait import software.amazon.smithy.rust.codegen.core.rustlang.rustTemplate import software.amazon.smithy.rust.codegen.core.smithy.RustCrate import software.amazon.smithy.rust.codegen.core.testutil.IntegrationTestParams +import software.amazon.smithy.rust.codegen.core.testutil.ModelProtocol import software.amazon.smithy.rust.codegen.core.testutil.ServerAdditionalSettings import software.amazon.smithy.rust.codegen.core.testutil.asSmithyModel +import software.amazon.smithy.rust.codegen.core.testutil.replaceProtocolTraitOnServerShapeId import software.amazon.smithy.rust.codegen.core.testutil.unitTest import software.amazon.smithy.rust.codegen.core.util.lookup import software.amazon.smithy.rust.codegen.server.smithy.testutil.serverIntegrationTest import software.amazon.smithy.rust.codegen.server.smithy.testutil.serverTestSymbolProvider import java.io.File -enum class ModelProtocol(val trait: AbstractTrait) { - AwsJson10(AwsJson1_0Trait.builder().build()), - AwsJson11(AwsJson1_1Trait.builder().build()), - RestJson(RestJson1Trait.builder().build()), - RestXml(RestXmlTrait.builder().build()), - Rpcv2Cbor(Rpcv2CborTrait.builder().build()), -} - /** * Returns the Smithy constraints model from the common repository, with the specified protocol * applied to the service. */ fun loadSmithyConstraintsModelForProtocol(modelProtocol: ModelProtocol): Pair { val (model, serviceShapeId) = loadSmithyConstraintsModel() - return Pair(model.replaceProtocolTrait(serviceShapeId, modelProtocol), serviceShapeId) + return Pair(model.replaceProtocolTraitOnServerShapeId(serviceShapeId, modelProtocol), serviceShapeId) } /** @@ -65,23 +53,6 @@ fun loadSmithyConstraintsModel(): Pair { return Pair(model, serviceShapeId) } -/** - * Removes all existing protocol traits annotated on the given service, - * then sets the provided `protocol` as the sole protocol trait for the service. - */ -fun Model.replaceProtocolTrait( - serviceShapeId: ShapeId, - modelProtocol: ModelProtocol, -): Model { - val serviceBuilder = - this.expectShape(serviceShapeId, ServiceShape::class.java).toBuilder() - for (p in ModelProtocol.values()) { - serviceBuilder.removeTrait(p.trait.toShapeId()) - } - val service = serviceBuilder.addTrait(modelProtocol.trait).build() - return ModelTransformer.create().replaceShapes(this, listOf(service)) -} - fun List.containsAnyShapeId(ids: Collection): Boolean { return ids.any { id -> this.any { shape -> shape == id } } } diff --git a/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/ReplaceInvalidUtf8Test.kt b/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/ReplaceInvalidUtf8Test.kt index d5f7167234..09e653db29 100644 --- a/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/ReplaceInvalidUtf8Test.kt +++ b/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/ReplaceInvalidUtf8Test.kt @@ -1,11 +1,12 @@ package software.amazon.smithy.rust.codegen.server.smithy import org.junit.jupiter.api.Test -import software.amazon.smithy.rust.codegen.core.rustlang.rust import software.amazon.smithy.rust.codegen.core.rustlang.rustTemplate import software.amazon.smithy.rust.codegen.core.testutil.IntegrationTestParams +import software.amazon.smithy.rust.codegen.core.testutil.ModelProtocol import software.amazon.smithy.rust.codegen.core.testutil.ServerAdditionalSettings -import software.amazon.smithy.rust.codegen.core.testutil.asSmithyModel +import software.amazon.smithy.rust.codegen.core.testutil.forAllProtocols +import software.amazon.smithy.rust.codegen.core.testutil.forProtocols import software.amazon.smithy.rust.codegen.core.testutil.testModule import software.amazon.smithy.rust.codegen.server.smithy.testutil.serverIntegrationTest @@ -13,9 +14,7 @@ internal class ReplaceInvalidUtf8Test { val model = """ namespace test - use aws.protocols#restJson1 - @restJson1 service SampleService { operations: [SampleOperation] } @@ -26,74 +25,80 @@ internal class ReplaceInvalidUtf8Test { x : String } } - """.asSmithyModel(smithyVersion = "2") + """ @Test fun `invalid utf8 should be replaced if the codegen flag is set`() { - serverIntegrationTest( - model, - IntegrationTestParams( - additionalSettings = - ServerAdditionalSettings - .builder() - .replaceInvalidUtf8(true) - .toObjectNode(), - ), - ) { _, rustCrate -> - rustCrate.testModule { - rustTemplate( - """ - ##[tokio::test] - async fn test_utf8_replaced() { - let body = r##"{ "x" : "\ud800" }"##; - let request = http::Request::builder() - .method("POST") - .uri("/operation") - .header("content-type", "application/json") - .body(hyper::Body::from(body)) - .expect("failed to build request"); - let result = crate::protocol_serde::shape_sample_operation::de_sample_operation_http_request(request).await; - assert!( - result.is_ok(), - "Invalid utf8 should have been replaced. {result:?}" - ); - assert_eq!( - result.unwrap().x.unwrap(), - "�", - "payload should have been replaced with �." - ); - } - """, - ) + model.forProtocols(ModelProtocol.AwsJson10, ModelProtocol.AwsJson11, ModelProtocol.RestJson) { model, metadata -> + serverIntegrationTest( + model, + IntegrationTestParams( + additionalSettings = + ServerAdditionalSettings + .builder() + .replaceInvalidUtf8(true) + .toObjectNode(), + ), + ) { _, rustCrate -> + rustCrate.testModule { + rustTemplate( + """ + ##[tokio::test] + async fn test_utf8_replaced() { + let body = r##"{ "x" : "\ud800" }"##; + let request = http::Request::builder() + .method("POST") + .uri("/operation") + .header("content-type", "${metadata.contentType}") + .body(hyper::Body::from(body)) + .expect("failed to build request"); + let result = crate::protocol_serde::shape_sample_operation::de_sample_operation_http_request(request).await; + assert!( + result.is_ok(), + "Invalid utf8 should have been replaced. {result:?}" + ); + assert_eq!( + result.unwrap().x.unwrap(), + "�", + "payload should have been replaced with �." + ); + } + """, + ) + } } } } @Test fun `invalid utf8 should be rejected if the codegen flag is not set`() { - serverIntegrationTest( - model, - ) { _, rustCrate -> - rustCrate.testModule { - rustTemplate( - """ - ##[tokio::test] - async fn test_invalid_utf8_raises_an_error() { - let body = r##"{ "x" : "\ud800" }"##; - let request = http::Request::builder() - .method("POST") - .uri("/operation") - .header("content-type", "application/json") - .body(hyper::Body::from(body)) - .expect("failed to build request"); - let result = crate::protocol_serde::shape_sample_operation::de_sample_operation_http_request(request).await; - assert!( - result.is_err(), - "invalid utf8 characters should not be allowed by default {result:?}" - ); - } - """, - ) + model.forAllProtocols(exclude = setOf(ModelProtocol.RestXml, ModelProtocol.Rpcv2Cbor)) { model, metadata -> + serverIntegrationTest( + model, + ) { _, rustCrate -> + rustCrate.testModule { + rustTemplate( + """ + ##[tokio::test] + async fn test_invalid_utf8_raises_an_error() { + let body = r##"{ "x" : "\ud800" }"##; + let request = http::Request::builder() + .method("POST") + .uri("/operation") + .header("content-type", "${metadata.contentType}") + .body(hyper::Body::from(body)) + .expect("failed to build request"); + let result = crate::protocol_serde::shape_sample_operation::de_sample_operation_http_request(request).await; + assert!( + result.is_err(), + "invalid utf8 characters should not be allowed by default {result:?}" + ); + let error_msg = result.err().unwrap().to_string(); + assert!(error_msg.contains("failed to unescape JSON string")); + } + """, + ) + } } } } diff --git a/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/protocols/serialize/CborConstraintsIntegrationTest.kt b/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/protocols/serialize/CborConstraintsIntegrationTest.kt index 0a80a125d6..03f5ce71c7 100644 --- a/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/protocols/serialize/CborConstraintsIntegrationTest.kt +++ b/codegen-server/src/test/kotlin/software/amazon/smithy/rust/codegen/server/smithy/protocols/serialize/CborConstraintsIntegrationTest.kt @@ -6,8 +6,8 @@ package software.amazon.smithy.rust.codegen.server.smithy.protocols.serialize import org.junit.jupiter.api.Test import software.amazon.smithy.rust.codegen.core.testutil.IntegrationTestParams +import software.amazon.smithy.rust.codegen.core.testutil.ModelProtocol import software.amazon.smithy.rust.codegen.core.testutil.ServerAdditionalSettings -import software.amazon.smithy.rust.codegen.server.smithy.ModelProtocol import software.amazon.smithy.rust.codegen.server.smithy.loadSmithyConstraintsModelForProtocol import software.amazon.smithy.rust.codegen.server.smithy.testutil.serverIntegrationTest diff --git a/rust-runtime/aws-smithy-json/src/escape.rs b/rust-runtime/aws-smithy-json/src/escape.rs index cc208e0b7e..006eea89aa 100644 --- a/rust-runtime/aws-smithy-json/src/escape.rs +++ b/rust-runtime/aws-smithy-json/src/escape.rs @@ -233,7 +233,10 @@ fn read_unicode_escapes(bytes: &[u8], into: &mut Vec) -> Result into.push(chr as u8), _ => into.extend_from_slice(chr.encode_utf8(&mut [0; 4]).as_bytes()), }, - None => into.extend_from_slice(&[0xEF, 0xBF, 0xBD]), // &[0xEF, 0xBF, 0xBD] is the byte representation of the '�' (\uFFFD) replacement character. + None => { + const REPLACEMENT_BYTES: &[u8] = "\u{FFFD}".as_bytes(); + into.extend_from_slice(REPLACEMENT_BYTES) + } } Ok(bytes_read)