Skip to content

Commit

Permalink
[GLUTEN-4898][CH]Bug fix to date diff (apache#4900)
Browse files Browse the repository at this point in the history
What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)

(Fixes: apache#4898)

Fix diff problem of to_date function;
Fix exception throws from parseDateTimeBestEffort of SparkFunctionToDateTime;
simplify substring function code
How was this patch tested?
TEST BY UT
  • Loading branch information
KevinyhZou authored Mar 11, 2024
1 parent 3f30efd commit 0f19a86
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2103,7 +2103,10 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite
|(4, '2023-09-02 00:00:01.333-11'),
|(5, ' 2023-09-02 agdfegfew'),
|(6, 'afe2023-09-02 11:22:33'),
|(7, '1970-01-01 00:00:00')
|(7, '1970-01-01 00:00:00'),
|(8, '2024-3-2'),
|(9, '2024-03-2'),
|(10, '2024-03')
|""".stripMargin
spark.sql(create_table_sql)
spark.sql(insert_data_sql)
Expand Down
30 changes: 15 additions & 15 deletions cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <Functions/FunctionFactory.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/ReadHelpers.h>
#include <IO/parseDateTimeBestEffort.h>

namespace DB
{
Expand Down Expand Up @@ -50,7 +51,9 @@ class SparkFunctionConvertToDate : public DB::FunctionToDate32OrNull
for (size_t i = start; i < start + length; ++i)
{
if (!isNumericASCII(*(rb.position() + i)))
{
return false;
}
}
return true;
};
Expand All @@ -63,7 +66,7 @@ class SparkFunctionConvertToDate : public DB::FunctionToDate32OrNull
};
if (!checkNumbericASCII(buf, 0, 4)
|| !checkDelimiter(buf, 4)
|| !checkNumbericASCII(buf, 5, 2)
|| !checkNumbericASCII(buf, 5, 2)
|| !checkDelimiter(buf, 7)
|| !checkNumbericASCII(buf, 8, 2))
return false;
Expand Down Expand Up @@ -112,19 +115,19 @@ class SparkFunctionConvertToDate : public DB::FunctionToDate32OrNull
throw DB::Exception(DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}'s return type must be date32.", name);

using ColVecTo = DB::DataTypeDate32::ColumnType;
typename ColVecTo::MutablePtr result_column = ColVecTo::create(size);
typename ColVecTo::MutablePtr result_column = ColVecTo::create(size, 0);
typename ColVecTo::Container & result_container = result_column->getData();
DB::ColumnUInt8::MutablePtr null_map = DB::ColumnUInt8::create(size);
DB::ColumnUInt8::MutablePtr null_map = DB::ColumnUInt8::create(size, 0);
typename DB::ColumnUInt8::Container & null_container = null_map->getData();
const DateLUTImpl * time_zone = &DateLUT::instance();
const DateLUTImpl * local_time_zone = &DateLUT::instance();
const DateLUTImpl * utc_time_zone = &DateLUT::instance("UTC");

for (size_t i = 0; i < size; ++i)
{
auto str = src_col->getDataAt(i);
if (str.size < 10)
if (str.size < 4)
{
null_container[i] = true;
result_container[i] = 0;
continue;
}
else
Expand All @@ -134,20 +137,17 @@ class SparkFunctionConvertToDate : public DB::FunctionToDate32OrNull
{
buf.position() ++;
}
if(buf.buffer().end() - buf.position() < 10)
if(buf.buffer().end() - buf.position() < 4)
{
null_container[i] = true;
result_container[i] = 0;
continue;
}
if (!checkAndGetDate32(buf, result_container[i], *time_zone))
{
null_container[i] = true;
result_container[i] = 0;
}
else
if (!checkAndGetDate32(buf, result_container[i], *local_time_zone))
{
null_container[i] = false;
time_t tmp = 0;
bool parsed = tryParseDateTimeBestEffort(tmp, buf, *local_time_zone, *utc_time_zone);
result_container[i] = local_time_zone->toDayNum<time_t>(tmp);
null_container[i] = !parsed;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions cpp-ch/local-engine/Functions/SparkFunctionToDateTime.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ class SparkFunctionConvertToDateTime : public DB::FunctionToDateTime64OrNull
}
else
{
parseDateTime64BestEffort(dst_data[i], scale, buf, *local_time_zone, *utc_time_zone);
null_map_data[i] = 0;
bool parsed = tryParseDateTime64BestEffort(dst_data[i], scale, buf, *local_time_zone, *utc_time_zone);
null_map_data[i] = !parsed;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,14 @@ class FunctionParserSubstring : public FunctionParser
/**
parse substring(str, start_index, length) as
if (start_index == 0)
substring(str, start_index+1, length)
substring(str, 1, length)
else
substring(str, start_index, length)
*/
auto * const_zero_node = addColumnToActionsDAG(actions_dag, start_index_data_type, Field(0));
auto * const_one_node = addColumnToActionsDAG(actions_dag, start_index_data_type, Field(1));
auto * equals_zero_node = toFunctionNode(actions_dag, "equals", {parsed_args[1], const_zero_node});
auto * index_plus_node = toFunctionNode(actions_dag, "plus", {parsed_args[1], const_one_node});
auto * if_node = toFunctionNode(actions_dag, "if", {equals_zero_node, index_plus_node, parsed_args[1]});
auto * if_node = toFunctionNode(actions_dag, "if", {equals_zero_node, const_one_node, parsed_args[1]});
const DB::ActionsDAG::Node * substring_func_node;
if (parsed_args.size() == 2)
substring_func_node = toFunctionNode(actions_dag, "substringUTF8", {parsed_args[0], if_node});
Expand Down

0 comments on commit 0f19a86

Please sign in to comment.