From b93718a5977fc1c6b7fc8070b77a5576309761c4 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 3 Sep 2024 18:25:26 +0800 Subject: [PATCH] support function timestamp_xxx --- .../gluten/utils/CHExpressionUtil.scala | 2 - .../GlutenFunctionValidateSuite.scala | 12 ++++ .../CommonScalarFunctionParser.cpp | 2 + .../timestampSeconds.cpp | 58 +++++++++++++++++++ .../expression/ExpressionMappings.scala | 1 + .../clickhouse/ClickHouseTestSettings.scala | 3 - .../clickhouse/ClickHouseTestSettings.scala | 3 - .../clickhouse/ClickHouseTestSettings.scala | 3 - .../clickhouse/ClickHouseTestSettings.scala | 3 - .../gluten/expression/ExpressionNames.scala | 1 + 10 files changed, 74 insertions(+), 14 deletions(-) create mode 100644 cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala index 1d0f13055f6ac..f2b08e3bd6c69 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala @@ -216,8 +216,6 @@ object CHExpressionUtil { REGR_SXY -> DefaultValidator(), TO_UTC_TIMESTAMP -> UtcTimestampValidator(), FROM_UTC_TIMESTAMP -> UtcTimestampValidator(), - TIMESTAMP_MILLIS -> DefaultValidator(), - TIMESTAMP_MICROS -> DefaultValidator(), STACK -> DefaultValidator(), TRANSFORM_KEYS -> DefaultValidator(), TRANSFORM_VALUES -> DefaultValidator(), diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala index 0b91522aedbcc..9cf545f5bd335 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala @@ -780,4 +780,16 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS |""".stripMargin runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer]) } + + test("test function timestamp_seconds/timestamp_millis/timestamp_micros") { + val sql = """ + |SELECT + | id, + | timestamp_seconds(1725453790 + id) as ts_seconds, + | timestamp_millis(1725453790123 + id) as ts_millis, + | timestamp_micros(1725453790123456 + id) as ts_micros + |from range(10); + |""".stripMargin + runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer]) + } } diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp index ae654bd296ef6..429d57a8d52f7 100644 --- a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp @@ -158,6 +158,8 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixSeconds, unix_seconds, toUnixTimestam REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixDate, unix_date, toInt32); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixMillis, unix_millis, toUnixTimestamp64Milli); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixMicros, unix_micros, toUnixTimestamp64Micro); +REGISTER_COMMON_SCALAR_FUNCTION_PARSER(TimestampMillis, timestamp_millis, fromUnixTimestamp64Milli); +REGISTER_COMMON_SCALAR_FUNCTION_PARSER(TimestampMicros, timestamp_micros, fromUnixTimestamp64Micro); // array functions REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Array, array, array); diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp new file mode 100644 index 0000000000000..a0d6f5de69761 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} +} + +namespace local_engine +{ + +class FunctionParserTimestampSeconds : public FunctionParser +{ +public: + explicit FunctionParserTimestampSeconds(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) { } + ~FunctionParserTimestampSeconds() override = default; + + static constexpr auto name = "timestamp_seconds"; + + String getName() const override { return name; } + + const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction & substrait_func, ActionsDAG & actions_dag) const override + { + /// Parse timestamp_seconds(expr) as toDateTime64(expr, 6) + auto parsed_args = parseFunctionArguments(substrait_func, actions_dag); + if (parsed_args.size() != 1) + throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires exactly one argument", getName()); + + const auto * arg = parsed_args[0]; + const auto * precision_node = addColumnToActionsDAG(actions_dag, std::make_shared(), 6); + const auto * toDateTime64_node = toFunctionNode(actions_dag, "toDateTime64", {arg, precision_node}); + return convertNodeTypeIfNeeded(substrait_func, toDateTime64_node, actions_dag); + } +}; + +static FunctionParserRegister register_timestamp_seconds; +} diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala index 38f9de629a169..e2f7971ad673e 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala @@ -199,6 +199,7 @@ object ExpressionMappings { Sig[UnixSeconds](UNIX_SECONDS), Sig[UnixMillis](UNIX_MILLIS), Sig[UnixMicros](UNIX_MICROS), + Sig[SecondsToTimestamp](TIMESTAMP_SECONDS), Sig[MillisToTimestamp](TIMESTAMP_MILLIS), Sig[MicrosToTimestamp](TIMESTAMP_MICROS), Sig[PreciseTimestampConversion](PRECYSE_TIMESTAMP_CONVERSION), diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index fb9ce5afb7734..365863c5e9bdb 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -740,9 +740,6 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") .exclude("DATE_FROM_UNIX_DATE") .exclude("UNIX_SECONDS") - .exclude("TIMESTAMP_SECONDS") - .exclude("TIMESTAMP_MILLIS") - .exclude("TIMESTAMP_MICROS") .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") .exclude("SPARK-34739,SPARK-35889: add a year-month interval to a timestamp") .exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp") diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 705f5beaf3dc5..192190ca2abb4 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -769,9 +769,6 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") .exclude("DATE_FROM_UNIX_DATE") .exclude("UNIX_SECONDS") - .exclude("TIMESTAMP_SECONDS") - .exclude("TIMESTAMP_MILLIS") - .exclude("TIMESTAMP_MICROS") .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") .exclude("SPARK-34739,SPARK-35889: add a year-month interval to a timestamp") .exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp") diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 5f30dea84d399..e15e9631ae5be 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -657,9 +657,6 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") .exclude("DATE_FROM_UNIX_DATE") .exclude("UNIX_SECONDS") - .exclude("TIMESTAMP_SECONDS") - .exclude("TIMESTAMP_MILLIS") - .exclude("TIMESTAMP_MICROS") .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") .exclude("SPARK-34739,SPARK-35889: add a year-month interval to a timestamp") .exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp") diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 6a2241f7e4dd7..b9b5d7ca9223e 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -657,9 +657,6 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is missing") .exclude("DATE_FROM_UNIX_DATE") .exclude("UNIX_SECONDS") - .exclude("TIMESTAMP_SECONDS") - .exclude("TIMESTAMP_MILLIS") - .exclude("TIMESTAMP_MICROS") .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") .exclude("SPARK-34739,SPARK-35889: add a year-month interval to a timestamp") .exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp") diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala index 96a615615179c..e1a94487a9513 100644 --- a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala @@ -223,6 +223,7 @@ object ExpressionNames { final val UNIX_SECONDS = "unix_seconds" final val UNIX_MILLIS = "unix_millis" final val UNIX_MICROS = "unix_micros" + final val TIMESTAMP_SECONDS = "timestamp_seconds" final val TIMESTAMP_MILLIS = "timestamp_millis" final val TIMESTAMP_MICROS = "timestamp_micros" final val PRECYSE_TIMESTAMP_CONVERSION = "precise_timestamp_conversion"