Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GLUTEN-6388][CH] Support function format #6716

Merged
merged 5 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ case class ArrayJoinValidator() extends FunctionValidator {
}
}

case class FormatStringValidator() extends FunctionValidator {
override def doValidate(expr: Expression): Boolean = {
val formatString = expr.asInstanceOf[FormatString]
formatString.children.head.isInstanceOf[Literal]
}
}

object CHExpressionUtil {

final val CH_AGGREGATE_FUNC_BLACKLIST: Map[String, FunctionValidator] = Map(
Expand Down Expand Up @@ -199,6 +206,7 @@ object CHExpressionUtil {
SPARK_PARTITION_ID -> DefaultValidator(),
URL_DECODE -> DefaultValidator(),
URL_ENCODE -> DefaultValidator(),
FORMAT_STRING -> FormatStringValidator(),
SKEWNESS -> DefaultValidator(),
SOUNDEX -> DefaultValidator(),
MAKE_YM_INTERVAL -> DefaultValidator(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -740,4 +740,19 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS
|""".stripMargin
runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
}

test("test function format_string") {
val sql = """
| SELECT
| format_string(
| 'hello world %d %d %s %f',
| id,
| id,
| CAST(id AS STRING),
| CAST(id AS float)
| )
|FROM range(10)
|""".stripMargin
runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetTimestamp, get_timestamp, parseDateTim
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Quarter, quarter, toQuarter);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToUnixTimestamp, to_unix_timestamp, parseDateTimeInJodaSyntaxOrNull);

// math functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Position, positive, identity);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Negative, negative, negate);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Pmod, pmod, pmod);
Expand Down Expand Up @@ -107,6 +108,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Rand, rand, randCanonical);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Bin, bin, sparkBin);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Rint, rint, sparkRint);

// string functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Like, like, like);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(NotLike, not_like, notLike);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(StartsWith, starts_with, startsWithUTF8);
Expand All @@ -130,6 +132,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Initcap, initcap, initcapUTF8);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Conv, conv, sparkConv);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Uuid, uuid, generateUUIDv4);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Levenshtein, levenshtein, editDistanceUTF8);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FormatString, format_string, printf);

REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Crc32, crc32, CRC32);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Murmur3Hash, murmur3hash, sparkMurmurHash3_32);
Expand All @@ -150,7 +153,6 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FloorDatetime, floor_datetime, dateTrunc)
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Floor, floor, sparkFloor);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MothsBetween, months_between, sparkMonthsBetween);


// array functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Array, array, array);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Shuffle, shuffle, arrayShuffle);
Expand All @@ -165,7 +167,6 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapKeys, map_keys, mapKeys);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapValues, map_values, mapValues);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapFromArrays, map_from_arrays, mapFromArrays);


// json functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FlattenJsonStringOnRequired, flattenJSONStringOnRequired, flattenJSONStringOnRequired);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToJson, to_json, toJSONString);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ object ExpressionMappings {
Sig[Levenshtein](LEVENSHTEIN),
Sig[UnBase64](UNBASE64),
Sig[Base64](BASE64),
Sig[FormatString](FORMAT_STRING),

// URL functions
Sig[ParseUrl](PARSE_URL),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -912,6 +912,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
.excludeGlutenTest("SPARK-40213: ascii for Latin-1 Supplement characters")
enableSuite[GlutenTryCastSuite]
.exclude("null cast")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
enableSuite[GlutenTryCastSuite]
.exclude("null cast")
.exclude("cast string to date")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ object ExpressionNames {
final val UNBASE64 = "unbase64"
final val BASE64 = "base64"
final val MASK = "mask"
final val FORMAT_STRING = "format_string"

// URL functions
final val PARSE_URL = "parse_url"
Expand Down
Loading