Skip to content

Commit

Permalink
[GLUTEN-7032][CH] Fix incorrect result using timestamp in-filter
Browse files Browse the repository at this point in the history
  • Loading branch information
lwz9103 committed Sep 6, 2024
1 parent 3ce3ab9 commit b5ab388
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
account_id,record_time,account_user_country,account_date,account_time
20201001,2020-10-01,shanghai,2020-10-01,2020-10-01 10:10:10
20201005,2020-10-05,shanghai,2020-10-05,2020-10-05 10:10:10
20201008,2020-10-08,beijing,2020-10-08,2020-10-08 10:10:10
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,46 @@ class GlutenClickHouseFileFormatSuite
)
}

// scalastyle:off line.size.limit
test("GLUTEN-7032 timestamp in-filter test") {
val filePath = rootPath + "/csv-data/filter_timestamp.csv"
val schema = StructType.apply(
Seq(
StructField.apply("account_id", IntegerType, nullable = false),
StructField.apply("record_time", DateType, nullable = false),
StructField.apply("account_user_country", StringType, nullable = false),
StructField.apply("account_date", DateType, nullable = false),
StructField.apply("account_time", TimestampType, nullable = false)
))

val options = new util.HashMap[String, String]()
options.put("delimiter", ",")
options.put("header", "false")
options.put("nullValue", "null")

val df = spark.read
.options(options)
.schema(schema)
.csv(filePath)
.toDF()
df.createTempView("filter_timestamp")
val sql1: String =
"select * from filter_timestamp where account_time in ('2020-10-01 10:10:10', '2020-10-01 10:10:11')"
val sql2: String =
"select * from filter_timestamp where account_time in (timestamp'2020-10-01 10:10:10', timestamp'2020-10-01 10:10:11')"
val sql3: String = "select * from filter_timestamp where account_time = '2020-10-01 10:10:10'"
val sql4: String =
"select * from filter_timestamp where account_time = timestamp'2020-10-01 10:10:10'"
val sql5: String =
"select * from filter_timestamp where account_date in ('2020-10-01', '2020-10-02')"
runAndCompare(sql1) {}
runAndCompare(sql2) {}
runAndCompare(sql3) {}
runAndCompare(sql4) {}
runAndCompare(sql5) {}
}
// scalastyle:on line.size.limit

test("read data from csv file format with filter") {
val filePath = basePath + "/csv_test_filter.csv"
val csvFileFormat = "csv"
Expand Down
2 changes: 1 addition & 1 deletion cpp-ch/clickhouse.version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
CH_BRANCH=rebase_ch/20240830
CH_COMMIT=d239aeff645
CH_COMMIT=5e2eaab52ac
1 change: 1 addition & 0 deletions cpp-ch/local-engine/Common/CHUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ void BackendInitializerUtil::initSettings(std::map<std::string, std::string> & b
settings.set("function_json_value_return_type_allow_nullable", true);
settings.set("precise_float_parsing", true);
settings.set("enable_named_columns_in_function_tuple", false);
settings.set("datetime64_trim_suffix_zeros", true);

if (backend_conf_map.contains(GLUTEN_TASK_OFFHEAP))
{
Expand Down

0 comments on commit b5ab388

Please sign in to comment.