From b5ab38848600be639d5a40fac637ed026d91c585 Mon Sep 17 00:00:00 2001 From: lwz9103 Date: Fri, 6 Sep 2024 11:24:51 +0800 Subject: [PATCH] [GLUTEN-7032][CH] Fix incorrect result using timestamp in-filter --- .../resources/csv-data/filter_timestamp.csv | 4 ++ .../GlutenClickHouseFileFormatSuite.scala | 40 +++++++++++++++++++ cpp-ch/clickhouse.version | 2 +- cpp-ch/local-engine/Common/CHUtil.cpp | 1 + 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 backends-clickhouse/src/test/resources/csv-data/filter_timestamp.csv diff --git a/backends-clickhouse/src/test/resources/csv-data/filter_timestamp.csv b/backends-clickhouse/src/test/resources/csv-data/filter_timestamp.csv new file mode 100644 index 000000000000..dabed83da0d1 --- /dev/null +++ b/backends-clickhouse/src/test/resources/csv-data/filter_timestamp.csv @@ -0,0 +1,4 @@ +account_id,record_time,account_user_country,account_date,account_time +20201001,2020-10-01,shanghai,2020-10-01,2020-10-01 10:10:10 +20201005,2020-10-05,shanghai,2020-10-05,2020-10-05 10:10:10 +20201008,2020-10-08,beijing,2020-10-08,2020-10-08 10:10:10 \ No newline at end of file diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala index c10b11290baf..5b499026f81c 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala @@ -173,6 +173,46 @@ class GlutenClickHouseFileFormatSuite ) } + // scalastyle:off line.size.limit + test("GLUTEN-7032 timestamp in-filter test") { + val filePath = rootPath + "/csv-data/filter_timestamp.csv" + val schema = StructType.apply( + Seq( + StructField.apply("account_id", IntegerType, nullable = false), + StructField.apply("record_time", DateType, nullable = false), + StructField.apply("account_user_country", StringType, nullable = false), + StructField.apply("account_date", DateType, nullable = false), + StructField.apply("account_time", TimestampType, nullable = false) + )) + + val options = new util.HashMap[String, String]() + options.put("delimiter", ",") + options.put("header", "false") + options.put("nullValue", "null") + + val df = spark.read + .options(options) + .schema(schema) + .csv(filePath) + .toDF() + df.createTempView("filter_timestamp") + val sql1: String = + "select * from filter_timestamp where account_time in ('2020-10-01 10:10:10', '2020-10-01 10:10:11')" + val sql2: String = + "select * from filter_timestamp where account_time in (timestamp'2020-10-01 10:10:10', timestamp'2020-10-01 10:10:11')" + val sql3: String = "select * from filter_timestamp where account_time = '2020-10-01 10:10:10'" + val sql4: String = + "select * from filter_timestamp where account_time = timestamp'2020-10-01 10:10:10'" + val sql5: String = + "select * from filter_timestamp where account_date in ('2020-10-01', '2020-10-02')" + runAndCompare(sql1) {} + runAndCompare(sql2) {} + runAndCompare(sql3) {} + runAndCompare(sql4) {} + runAndCompare(sql5) {} + } + // scalastyle:on line.size.limit + test("read data from csv file format with filter") { val filePath = basePath + "/csv_test_filter.csv" val csvFileFormat = "csv" diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version index 8859c650a85c..93bf97d78cee 100644 --- a/cpp-ch/clickhouse.version +++ b/cpp-ch/clickhouse.version @@ -1,3 +1,3 @@ CH_ORG=Kyligence CH_BRANCH=rebase_ch/20240830 -CH_COMMIT=d239aeff645 +CH_COMMIT=5e2eaab52ac diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp b/cpp-ch/local-engine/Common/CHUtil.cpp index 9558bf957d4a..cbb3217bbb14 100644 --- a/cpp-ch/local-engine/Common/CHUtil.cpp +++ b/cpp-ch/local-engine/Common/CHUtil.cpp @@ -799,6 +799,7 @@ void BackendInitializerUtil::initSettings(std::map & b settings.set("function_json_value_return_type_allow_nullable", true); settings.set("precise_float_parsing", true); settings.set("enable_named_columns_in_function_tuple", false); + settings.set("datetime64_trim_suffix_zeros", true); if (backend_conf_map.contains(GLUTEN_TASK_OFFHEAP)) {