Skip to content

Commit

Permalink
[GLUTEN-6813][CH] Support soundex function (#7093)
Browse files Browse the repository at this point in the history
* support soundex function

* add uts

* fix style

* fix failed uts
  • Loading branch information
taiyang-li authored Sep 5, 2024
1 parent 8b5e3ce commit 138931e
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ object CHExpressionUtil {
URL_ENCODE -> DefaultValidator(),
FORMAT_STRING -> FormatStringValidator(),
SKEWNESS -> DefaultValidator(),
SOUNDEX -> DefaultValidator(),
MAKE_YM_INTERVAL -> DefaultValidator(),
MAP_ZIP_WITH -> DefaultValidator(),
ZIP_WITH -> DefaultValidator(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2936,5 +2936,11 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr
checkBHJWithIsNullAwareAntiJoin(df)
})
}

test("soundex") {
runQueryAndCompare("select soundex(c_mktsegment) from customer limit 50") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
}
}
// scalastyle:on line.size.limit
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Uuid, uuid, generateUUIDv4);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Levenshtein, levenshtein, editDistanceUTF8);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FormatString, format_string, printf);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Concat, concat, concat);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(SoundEx, soundex, soundex);

REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Crc32, crc32, CRC32);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Murmur3Hash, murmur3hash, sparkMurmurHash3_32);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
.exclude(
"soundex unit test"
) // CH and spark returns different results when input non-ASCII characters
.excludeGlutenTest("SPARK-40213: ascii for Latin-1 Supplement characters")
enableSuite[GlutenTryCastSuite]
.exclude("null cast")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
.exclude(
"soundex unit test"
) // CH and spark returns different results when input non-ASCII characters
enableSuite[GlutenTryCastSuite]
.exclude("null cast")
.exclude("cast string to date")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
.exclude(
"soundex unit test"
) // CH and spark returns different results when input non-ASCII characters
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
.exclude(
"soundex unit test"
) // CH and spark returns different results when input non-ASCII characters
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
Expand Down

0 comments on commit 138931e

Please sign in to comment.