Skip to content

Commit

Permalink
Update toSnakeCase to better handle plurals, version numbers, and o…
Browse files Browse the repository at this point in the history
…ther pathological cases (#3037)

## Motivation and Context
There are currently a lot of fields in the SDK that are clearly
converted to snake-case wrong:
<img width="738" alt="Screenshot 2023-10-09 at 12 00 43 PM"
src="https://github.com/awslabs/smithy-rs/assets/492903/796b5dac-ca26-4a55-b9da-b88a976251d5">
<img width="417" alt="Screenshot 2023-10-09 at 12 02 48 PM"
src="https://github.com/awslabs/smithy-rs/assets/492903/b091bf50-2e6b-4ada-b2f8-8c7158f18f7d">


## Description
- Author a new splitWords algorithm
- Add snapshot test
- Add lots of individual tests.

## Testing
- snapshot testing, compared with current behavior.

## Checklist
<!--- If a checkbox below is not applicable, then please DELETE it
rather than leaving it unchecked -->
- [ ] I have updated `CHANGELOG.next.toml` if I made changes to the
smithy-rs codegen or runtime crates
- [ ] I have updated `CHANGELOG.next.toml` if I made changes to the AWS
SDK, generated SDK code, or SDK runtime crates

----

_By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice._
  • Loading branch information
rcoh authored Oct 10, 2023
1 parent 8bfc4c6 commit e61fb6f
Show file tree
Hide file tree
Showing 7 changed files with 126,167 additions and 10 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.next.toml
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,9 @@ message = "The `customize()` method is now sync and infallible. Remove any `awai
references = ["smithy-rs#3039"]
meta = { "breaking" = true, "tada" = false, "bug" = false, "target" = "client" }
author = "jdisanti"

[[smithy-rs]]
message = "Our algorithm for converting identifiers to `snake_case` has been updated. This may result in a small change for some identifiers, particularly acronyms ending in `s`, e.g. `ACLs`."
references = ["smithy-rs#3037", "aws-sdk-rust#756"]
meta = { "breaking" = true, "tada" = false, "bug" = true, "target" = "all" }
author = "rcoh"
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import software.amazon.smithy.rust.codegen.core.rustlang.docsTemplate
import software.amazon.smithy.rust.codegen.core.rustlang.rustTemplate
import software.amazon.smithy.rust.codegen.core.rustlang.writable
import software.amazon.smithy.rust.codegen.core.smithy.customize.writeCustomizationsOrElse
import software.amazon.smithy.rust.codegen.core.util.toSnakeCase

object AwsDocs {
/**
Expand All @@ -27,7 +26,7 @@ object AwsDocs {
).contains(codegenContext.serviceShape.id)

fun constructClient(codegenContext: ClientCodegenContext, indent: String): Writable {
val crateName = codegenContext.moduleName.toSnakeCase()
val crateName = codegenContext.moduleUseName()
return writable {
writeCustomizationsOrElse(
codegenContext.rootDecorator.extraSections(codegenContext),
Expand All @@ -48,7 +47,7 @@ object AwsDocs {

fun clientConstructionDocs(codegenContext: ClientCodegenContext): Writable = {
if (canRelyOnAwsConfig(codegenContext)) {
val crateName = codegenContext.moduleName.toSnakeCase()
val crateName = codegenContext.moduleUseName()
docsTemplate(
"""
#### Constructing a `Client`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,116 @@ package software.amazon.smithy.rust.codegen.core.util
import software.amazon.smithy.utils.CaseUtils
import software.amazon.smithy.utils.StringUtils

fun String.doubleQuote(): String = StringUtils.escapeJavaString(this, "").replace(Regex("""\\u([0-9a-f]{4})""")) { matchResult: MatchResult ->
"\\u{" + matchResult.groupValues[1] + "}" as CharSequence
}
fun String.doubleQuote(): String =
StringUtils.escapeJavaString(this, "").replace(Regex("""\\u([0-9a-f]{4})""")) { matchResult: MatchResult ->
"\\u{" + matchResult.groupValues[1] + "}" as CharSequence
}

/**
* Double quote a string, e.g. "abc" -> "\"abc\""
*/
fun String.dq(): String = this.doubleQuote()

// String extensions
private fun String.splitOnWordBoundaries(): List<String> {
val out = mutableListOf<String>()
// These are whole words but cased differently, e.g. `IPv4`, `MiB`, `GiB`, `TtL`
val completeWords = listOf("ipv4", "ipv6", "sigv4", "mib", "gib", "kib", "ttl")
var currentWord = ""

// emit the current word and update from the next character
val emit = { next: Char ->
if (currentWord.isNotEmpty()) {
out += currentWord.lowercase()
}
currentWord = if (next.isLetterOrDigit()) {
next.toString()
} else {
""
}
}
val allLowerCase = this.lowercase() == this
this.forEachIndexed { index, nextCharacter ->
val peek = this.getOrNull(index + 1)
val doublePeek = this.getOrNull(index + 2)
val completeWordInProgress = completeWords.any {
(currentWord + this.substring(index)).lowercase().startsWith(
it,
)
} && !completeWords.contains(currentWord.lowercase())
when {
// [C] in these docs indicates the value of nextCharacter
// A[_]B
!nextCharacter.isLetterOrDigit() -> emit(nextCharacter)

// If we have no letters so far, push the next letter (we already know it's a letter or digit)
currentWord.isEmpty() -> currentWord += nextCharacter.toString()

// Abc[D]ef or Ab2[D]ef
!completeWordInProgress && loweredFollowedByUpper(currentWord, nextCharacter) -> emit(nextCharacter)

// s3[k]ey
!completeWordInProgress && allLowerCase && digitFollowedByLower(currentWord, nextCharacter) -> emit(
nextCharacter,
)

// DB[P]roxy, or `IAM[U]ser` but not AC[L]s
endOfAcronym(currentWord, nextCharacter, peek, doublePeek) -> emit(nextCharacter)

// If we haven't found a word boundary, push it and keep going
else -> currentWord += nextCharacter.toString()
}
}
if (currentWord.isNotEmpty()) {
out += currentWord
}
return out
}

/**
* Handle cases like `DB[P]roxy`, `ARN[S]upport`, `AC[L]s`
*/
private fun endOfAcronym(current: String, nextChar: Char, peek: Char?, doublePeek: Char?): Boolean {
if (!current.last().isUpperCase()) {
// Not an acronym in progress
return false
}
if (!nextChar.isUpperCase()) {
// We aren't at the next word yet
return false
}

if (peek?.isLowerCase() != true) {
return false
}

// Skip cases like `AR[N]s`, `AC[L]s` but not `IAM[U]ser`
if (peek == 's' && (doublePeek == null || !doublePeek.isLowerCase())) {
return false
}

// Skip cases like `DynamoD[B]v2`
if (peek == 'v' && doublePeek?.isDigit() == true) {
return false
}
return true
}

private fun loweredFollowedByUpper(current: String, nextChar: Char): Boolean {
if (!nextChar.isUpperCase()) {
return false
}
return current.last().isLowerCase() || current.last().isDigit()
}

private fun digitFollowedByLower(current: String, nextChar: Char): Boolean {
return (current.last().isDigit() && nextChar.isLowerCase())
}

fun String.toSnakeCase(): String {
return CaseUtils.toSnakeCase(this)
return this.splitOnWordBoundaries().joinToString("_") { it.lowercase() }
}

fun String.toPascalCase(): String {
// TODO(https://github.com/awslabs/smithy-rs/issues/3047): consider using our updated toSnakeCase (but need to audit diff)
return CaseUtils.toSnakeCase(this).let { CaseUtils.toPascalCase(it) }
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ package software.amazon.smithy.rust.codegen.core.util

import io.kotest.matchers.shouldBe
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtensionContext
import org.junit.jupiter.api.fail
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.Arguments
import org.junit.jupiter.params.provider.ArgumentsProvider
import org.junit.jupiter.params.provider.ArgumentsSource
import java.io.File
import java.util.stream.Stream

internal class StringsTest {

Expand All @@ -18,4 +26,77 @@ internal class StringsTest {
"{\"nested\": \"{\\\"nested\\\": 5}\"}\"}"
""".trimIndent().trim()
}

@Test
fun correctlyConvertToSnakeCase() {
"NotificationARNs".toSnakeCase() shouldBe "notification_arns"
}

@Test
fun testAllNames() {
// Set this to true to write a new test expectation file
val publishUpdate = false
val allNames = this::class.java.getResource("/testOutput.txt")?.readText()!!
val errors = mutableListOf<String>()
val output = StringBuilder()
allNames.lines().filter { it.isNotBlank() }.forEach {
val split = it.split(',')
val input = split[0]
val expectation = split[1]
val actual = input.toSnakeCase()
if (input.toSnakeCase() != expectation) {
errors += "$it => $actual (expected $expectation)"
}
output.appendLine("$input,$actual")
}
if (publishUpdate) {
File("testOutput.txt").writeText(output.toString())
}
if (errors.isNotEmpty()) {
fail(errors.joinToString("\n"))
}
}

@ParameterizedTest
@ArgumentsSource(TestCasesProvider::class)
fun testSnakeCase(input: String, output: String) {
input.toSnakeCase() shouldBe output
}
}

class TestCasesProvider : ArgumentsProvider {
override fun provideArguments(context: ExtensionContext?): Stream<out Arguments> =
listOf(
"ACLs" to "acls",
"ACLsUpdateStatus" to "acls_update_status",
"AllowedAllVPCs" to "allowed_all_vpcs",
"BluePrimaryX" to "blue_primary_x",
"CIDRs" to "cidrs",
"AuthTtL" to "auth_ttl",
"CNAMEPrefix" to "cname_prefix",
"S3Location" to "s3_location",
"signatureS" to "signature_s",
"signatureR" to "signature_r",
"M3u8Settings" to "m3u8_settings",
"IAMUser" to "iam_user",
"OtaaV1_0_x" to "otaa_v1_0_x",
"DynamoDBv2Action" to "dynamo_dbv2_action",
"SessionKeyEmv2000" to "session_key_emv2000",
"SupportsClassB" to "supports_class_b",
"UnassignIpv6AddressesRequest" to "unassign_ipv6_addresses_request",
"TotalGpuMemoryInMiB" to "total_gpu_memory_in_mib",
"WriteIOs" to "write_ios",
"dynamoDBv2" to "dynamo_dbv2",
"ipv4Address" to "ipv4_address",
"sigv4" to "sigv4",
"s3key" to "s3_key",
"sha256sum" to "sha256_sum",
"Av1QvbrSettings" to "av1_qvbr_settings",
"Av1Settings" to "av1_settings",
"AwsElbv2LoadBalancer" to "aws_elbv2_load_balancer",
"SigV4Authorization" to "sigv4_authorization",
"IpV6Address" to "ipv6_address",
"IpV6Cidr" to "ipv6_cidr",
"IpV4Addresses" to "ipv4_addresses",
).map { Arguments.of(it.first, it.second) }.stream()
}
Loading

0 comments on commit e61fb6f

Please sign in to comment.