Skip to content

Commit

Permalink
[GLUTEN-6388][CH] Support function format (#6716)
Browse files Browse the repository at this point in the history
* support function printf

* support function format_string

* fix failed uts

* fix failed uts

* fix failed ut
  • Loading branch information
taiyang-li authored Aug 9, 2024
1 parent f7e59be commit 58f1cf6
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ case class ArrayJoinValidator() extends FunctionValidator {
}
}

case class FormatStringValidator() extends FunctionValidator {
override def doValidate(expr: Expression): Boolean = {
val formatString = expr.asInstanceOf[FormatString]
formatString.children.head.isInstanceOf[Literal]
}
}

object CHExpressionUtil {

final val CH_AGGREGATE_FUNC_BLACKLIST: Map[String, FunctionValidator] = Map(
Expand Down Expand Up @@ -199,6 +206,7 @@ object CHExpressionUtil {
SPARK_PARTITION_ID -> DefaultValidator(),
URL_DECODE -> DefaultValidator(),
URL_ENCODE -> DefaultValidator(),
FORMAT_STRING -> FormatStringValidator(),
SKEWNESS -> DefaultValidator(),
SOUNDEX -> DefaultValidator(),
MAKE_YM_INTERVAL -> DefaultValidator(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -740,4 +740,19 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS
|""".stripMargin
runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
}

test("test function format_string") {
val sql = """
| SELECT
| format_string(
| 'hello world %d %d %s %f',
| id,
| id,
| CAST(id AS STRING),
| CAST(id AS float)
| )
|FROM range(10)
|""".stripMargin
runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetTimestamp, get_timestamp, parseDateTim
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Quarter, quarter, toQuarter);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToUnixTimestamp, to_unix_timestamp, parseDateTimeInJodaSyntaxOrNull);

// math functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Position, positive, identity);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Negative, negative, negate);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Pmod, pmod, pmod);
Expand Down Expand Up @@ -107,6 +108,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Rand, rand, randCanonical);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Bin, bin, sparkBin);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Rint, rint, sparkRint);

// string functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Like, like, like);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(NotLike, not_like, notLike);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(StartsWith, starts_with, startsWithUTF8);
Expand All @@ -130,6 +132,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Initcap, initcap, initcapUTF8);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Conv, conv, sparkConv);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Uuid, uuid, generateUUIDv4);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Levenshtein, levenshtein, editDistanceUTF8);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FormatString, format_string, printf);

REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Crc32, crc32, CRC32);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Murmur3Hash, murmur3hash, sparkMurmurHash3_32);
Expand All @@ -150,7 +153,6 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FloorDatetime, floor_datetime, dateTrunc)
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Floor, floor, sparkFloor);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MothsBetween, months_between, sparkMonthsBetween);


// array functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Array, array, array);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Shuffle, shuffle, arrayShuffle);
Expand All @@ -165,7 +167,6 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapKeys, map_keys, mapKeys);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapValues, map_values, mapValues);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapFromArrays, map_from_arrays, mapFromArrays);


// json functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FlattenJsonStringOnRequired, flattenJSONStringOnRequired, flattenJSONStringOnRequired);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToJson, to_json, toJSONString);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ object ExpressionMappings {
Sig[Levenshtein](LEVENSHTEIN),
Sig[UnBase64](UNBASE64),
Sig[Base64](BASE64),
Sig[FormatString](FORMAT_STRING),

// URL functions
Sig[ParseUrl](PARSE_URL),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -912,6 +912,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
.excludeGlutenTest("SPARK-40213: ascii for Latin-1 Supplement characters")
enableSuite[GlutenTryCastSuite]
.exclude("null cast")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
enableSuite[GlutenTryCastSuite]
.exclude("null cast")
.exclude("cast string to date")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url")
.exclude("FORMAT") // refer https://github.com/apache/incubator-gluten/issues/6765
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ object ExpressionNames {
final val UNBASE64 = "unbase64"
final val BASE64 = "base64"
final val MASK = "mask"
final val FORMAT_STRING = "format_string"

// URL functions
final val PARSE_URL = "parse_url"
Expand Down

0 comments on commit 58f1cf6

Please sign in to comment.