Skip to content

Commit

Permalink
[GLUTEN-5827][CH]support utc timestamp transfrom (#5828)
Browse files Browse the repository at this point in the history
What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)

(Fixes: #5827)

support to_utc_timestamp/from_utc_timestamp function;
convert timezone like '+08:00,-08:00' to GMT+8 , GMT-8 while this config set in spark.sql.session.timezone or in to_utc_timestamp / from_utc_timestamp's parameters
How was this patch tested?
spark ut
  • Loading branch information
KevinyhZou authored Jun 11, 2024
1 parent d3bd3d7 commit b37a6e4
Show file tree
Hide file tree
Showing 11 changed files with 156 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,15 @@ case class SequenceValidator() extends FunctionValidator {
}
}

case class UtcTimestampValidator() extends FunctionValidator {
override def doValidate(expr: Expression): Boolean = expr match {
// CH backend doest not support non-const timezone parameter
case t: ToUTCTimestamp => t.children(1).isInstanceOf[Literal]
case f: FromUTCTimestamp => f.children(1).isInstanceOf[Literal]
case _ => false
}
}

case class UnixTimeStampValidator() extends FunctionValidator {
final val DATE_TYPE = "date"

Expand Down Expand Up @@ -194,8 +203,8 @@ object CHExpressionUtil {
REGR_SLOPE -> DefaultValidator(),
REGR_INTERCEPT -> DefaultValidator(),
REGR_SXY -> DefaultValidator(),
TO_UTC_TIMESTAMP -> DefaultValidator(),
FROM_UTC_TIMESTAMP -> DefaultValidator(),
TO_UTC_TIMESTAMP -> UtcTimestampValidator(),
FROM_UTC_TIMESTAMP -> UtcTimestampValidator(),
UNIX_MILLIS -> DefaultValidator(),
UNIX_MICROS -> DefaultValidator(),
TIMESTAMP_MILLIS -> DefaultValidator(),
Expand Down
18 changes: 12 additions & 6 deletions cpp-ch/local-engine/Common/CHUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ void BackendInitializerUtil::initEnvs(DB::Context::ConfigurationPtr config)
if (config->has("timezone"))
{
const std::string config_timezone = config->getString("timezone");
const String mapped_timezone = DateLUT::mappingForJavaTimezone(config_timezone);
const String mapped_timezone = DateTimeUtil::convertTimeZone(config_timezone);
if (0 != setenv("TZ", mapped_timezone.data(), 1)) // NOLINT(concurrency-mt-unsafe) // ok if not called concurrently with other setenv/getenv
throw Poco::Exception("Cannot setenv TZ variable");

Expand Down Expand Up @@ -659,11 +659,7 @@ void BackendInitializerUtil::initSettings(std::map<std::string, std::string> & b
}
else if (key == SPARK_SESSION_TIME_ZONE)
{
String time_zone_val = value;
/// Convert timezone ID like '+8:00' to GMT+8:00
if (value.starts_with("+") || value.starts_with("-"))
time_zone_val = "GMT" + value;
time_zone_val = DateLUT::mappingForJavaTimezone(time_zone_val);
String time_zone_val = DateTimeUtil::convertTimeZone(value);
settings.set("session_timezone", time_zone_val);
LOG_DEBUG(&Poco::Logger::get("CHUtil"), "Set settings key:{} value:{}", "session_timezone", time_zone_val);
}
Expand Down Expand Up @@ -937,6 +933,16 @@ Int64 DateTimeUtil::currentTimeMillis()
return timeInMilliseconds(std::chrono::system_clock::now());
}

String DateTimeUtil::convertTimeZone(const String & time_zone)
{
String res = time_zone;
/// Convert timezone ID like '+08:00' to GMT+8:00
if (time_zone.starts_with("+") || time_zone.starts_with("-"))
res = "GMT" + time_zone;
res = DateLUT::mappingForJavaTimezone(res);
return res;
}

UInt64 MemoryUtil::getCurrentMemoryUsage(size_t depth)
{
Int64 current_memory_usage = 0;
Expand Down
1 change: 1 addition & 0 deletions cpp-ch/local-engine/Common/CHUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ class DateTimeUtil
{
public:
static Int64 currentTimeMillis();
static String convertTimeZone(const String & time_zone);
};

class MemoryUtil
Expand Down
2 changes: 1 addition & 1 deletion cpp-ch/local-engine/Parser/SerializedPlanParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ static const std::map<std::string, std::string> SCALAR_FUNCTIONS
{"get_timestamp", "parseDateTimeInJodaSyntaxOrNull"}, // for spark function: to_date/to_timestamp
{"quarter", "toQuarter"},
{"to_unix_timestamp", "parseDateTimeInJodaSyntaxOrNull"},
// {"unix_timestamp", "toUnixTimestamp"},
//{"unix_timestamp", "toUnixTimestamp"},
{"date_format", "formatDateTimeInJodaSyntax"},
{"timestamp_add", "timestamp_add"},

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <Parser/scalar_function_parser/utcTimestampTransform.h>

namespace local_engine
{

class FunctionParserFromUtcTimestamp : public FunctionParserUtcTimestampTransform
{
public:
explicit FunctionParserFromUtcTimestamp(SerializedPlanParser * plan_parser_) : FunctionParserUtcTimestampTransform(plan_parser_) { }
~FunctionParserFromUtcTimestamp() = default;

static constexpr auto name = "from_utc_timestamp";
String getCHFunctionName(const substrait::Expression_ScalarFunction &) const override { return "from_utc_timestamp"; }
String getName() const override { return "from_utc_timestamp"; }
};

static FunctionParserRegister<FunctionParserFromUtcTimestamp> fromUtcTimestamp;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <Parser/scalar_function_parser/utcTimestampTransform.h>

namespace local_engine
{

class FunctionParserToUtcTimestamp : public FunctionParserUtcTimestampTransform
{
public:
explicit FunctionParserToUtcTimestamp(SerializedPlanParser * plan_parser_) : FunctionParserUtcTimestampTransform(plan_parser_) { }
~FunctionParserToUtcTimestamp() = default;

static constexpr auto name = "to_utc_timestamp";
String getCHFunctionName(const substrait::Expression_ScalarFunction &) const override { return "to_utc_timestamp"; }
String getName() const override { return "to_utc_timestamp"; }
};

static FunctionParserRegister<FunctionParserToUtcTimestamp> toUtcTimestamp;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <Parser/FunctionParser.h>
#include <Common/CHUtil.h>

namespace DB
{

namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
}

namespace local_engine
{

class FunctionParserUtcTimestampTransform : public FunctionParser
{
public:
explicit FunctionParserUtcTimestampTransform(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) { }
~FunctionParserUtcTimestampTransform() override = default;

const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction & substrait_func, ActionsDAGPtr & actions_dag) const override
{
/// Convert timezone value to clickhouse backend supported, i.e. GMT+8 -> Etc/GMT-8, +08:00 -> Etc/GMT-8
if (substrait_func.arguments_size() != 2)
throw DB::Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s must have 2 arguments", getName());

const substrait::Expression & arg1 = substrait_func.arguments()[1].value();
if (!arg1.has_literal() || !arg1.literal().has_string())
throw DB::Exception(DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s 2nd argument should be string literal", getName());

const String & arg1_literal = arg1.literal().string();
String time_zone_val = DateTimeUtil::convertTimeZone(arg1_literal);
auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag);
auto nullable_string_type = DB::makeNullable(std::make_shared<DB::DataTypeString>());
const auto * time_zone_node = addColumnToActionsDAG(actions_dag, nullable_string_type, time_zone_val);
const auto * result_node = toFunctionNode(actions_dag, getCHFunctionName(substrait_func), {parsed_args[0], time_zone_node});
return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag);
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
.exclude("to_utc_timestamp with literal zone")
.exclude("to_utc_timestamp with column zone")
.exclude("from_utc_timestamp with literal zone")
.exclude("from_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
.exclude("to_utc_timestamp with literal zone")
.exclude("to_utc_timestamp with column zone")
.exclude("from_utc_timestamp with literal zone")
.exclude("from_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
.exclude("to_utc_timestamp with literal zone")
.exclude("to_utc_timestamp with column zone")
.exclude("from_utc_timestamp with literal zone")
.exclude("from_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
.exclude("to_utc_timestamp with literal zone")
.exclude("to_utc_timestamp with column zone")
.exclude("from_utc_timestamp with literal zone")
.exclude("from_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
Expand Down

0 comments on commit b37a6e4

Please sign in to comment.