diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala index 60dc3dad0b87b..3c20a659e7ff2 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala @@ -79,14 +79,35 @@ class CHListenerApi extends ListenerApi with Logging { val executorLibPath = conf.get(GlutenConfig.GLUTEN_EXECUTOR_LIB_PATH, libPath) JniLibLoader.loadFromPath(executorLibPath, true) } + + setDefaultConfigs(conf) + + // Load supported hive/python/scala udfs + UDFMappings.loadFromSparkConf(conf) + + CHNativeExpressionEvaluator.initNative(conf) + + // inject backend-specific implementations to override spark classes + // FIXME: The following set instances twice in local mode? + GlutenParquetWriterInjects.setInstance(new CHParquetWriterInjects()) + GlutenOrcWriterInjects.setInstance(new CHOrcWriterInjects()) + GlutenMergeTreeWriterInjects.setInstance(new CHMergeTreeWriterInjects()) + GlutenRowSplitter.setInstance(new CHRowSplitter()) + } + + private def setDefaultConfigs(conf: SparkConf): Unit = { // Add configs - conf.set( + conf.setIfMissing( s"${CHBackendSettings.getBackendConfigPrefix}.runtime_config.timezone", conf.get("spark.sql.session.timeZone", TimeZone.getDefault.getID)) - conf.set( - s"${CHBackendSettings.getBackendConfigPrefix}.runtime_config" + - s".local_engine.settings.log_processors_profiles", - "true") + + val logProfileKey = s"${CHBackendSettings.getBackendConfigPrefix}.runtime_config" + + s".local_engine.settings.log_processors_profiles" + conf.setIfMissing(logProfileKey, "true") + + val trimDatetime64ZerosKey = s"${CHBackendSettings.getBackendConfigPrefix}.runtime_settings" + + s".datetime64_trim_suffix_zeros" + conf.setIfMissing(trimDatetime64ZerosKey, "true") // add memory limit for external sort val externalSortKey = s"${CHBackendSettings.getBackendConfigPrefix}.runtime_settings" + @@ -102,18 +123,6 @@ class CHListenerApi extends ListenerApi with Logging { } } } - - // Load supported hive/python/scala udfs - UDFMappings.loadFromSparkConf(conf) - - CHNativeExpressionEvaluator.initNative(conf) - - // inject backend-specific implementations to override spark classes - // FIXME: The following set instances twice in local mode? - GlutenParquetWriterInjects.setInstance(new CHParquetWriterInjects()) - GlutenOrcWriterInjects.setInstance(new CHOrcWriterInjects()) - GlutenMergeTreeWriterInjects.setInstance(new CHMergeTreeWriterInjects()) - GlutenRowSplitter.setInstance(new CHRowSplitter()) } private def shutdown(): Unit = { diff --git a/backends-clickhouse/src/test/resources/csv-data/filter_timestamp.csv b/backends-clickhouse/src/test/resources/csv-data/filter_timestamp.csv new file mode 100644 index 0000000000000..dabed83da0d1f --- /dev/null +++ b/backends-clickhouse/src/test/resources/csv-data/filter_timestamp.csv @@ -0,0 +1,4 @@ +account_id,record_time,account_user_country,account_date,account_time +20201001,2020-10-01,shanghai,2020-10-01,2020-10-01 10:10:10 +20201005,2020-10-05,shanghai,2020-10-05,2020-10-05 10:10:10 +20201008,2020-10-08,beijing,2020-10-08,2020-10-08 10:10:10 \ No newline at end of file diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala index c10b11290bafe..5b499026f81ce 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala @@ -173,6 +173,46 @@ class GlutenClickHouseFileFormatSuite ) } + // scalastyle:off line.size.limit + test("GLUTEN-7032 timestamp in-filter test") { + val filePath = rootPath + "/csv-data/filter_timestamp.csv" + val schema = StructType.apply( + Seq( + StructField.apply("account_id", IntegerType, nullable = false), + StructField.apply("record_time", DateType, nullable = false), + StructField.apply("account_user_country", StringType, nullable = false), + StructField.apply("account_date", DateType, nullable = false), + StructField.apply("account_time", TimestampType, nullable = false) + )) + + val options = new util.HashMap[String, String]() + options.put("delimiter", ",") + options.put("header", "false") + options.put("nullValue", "null") + + val df = spark.read + .options(options) + .schema(schema) + .csv(filePath) + .toDF() + df.createTempView("filter_timestamp") + val sql1: String = + "select * from filter_timestamp where account_time in ('2020-10-01 10:10:10', '2020-10-01 10:10:11')" + val sql2: String = + "select * from filter_timestamp where account_time in (timestamp'2020-10-01 10:10:10', timestamp'2020-10-01 10:10:11')" + val sql3: String = "select * from filter_timestamp where account_time = '2020-10-01 10:10:10'" + val sql4: String = + "select * from filter_timestamp where account_time = timestamp'2020-10-01 10:10:10'" + val sql5: String = + "select * from filter_timestamp where account_date in ('2020-10-01', '2020-10-02')" + runAndCompare(sql1) {} + runAndCompare(sql2) {} + runAndCompare(sql3) {} + runAndCompare(sql4) {} + runAndCompare(sql5) {} + } + // scalastyle:on line.size.limit + test("read data from csv file format with filter") { val filePath = basePath + "/csv_test_filter.csv" val csvFileFormat = "csv" diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version index 8859c650a85cf..93bf97d78cee8 100644 --- a/cpp-ch/clickhouse.version +++ b/cpp-ch/clickhouse.version @@ -1,3 +1,3 @@ CH_ORG=Kyligence CH_BRANCH=rebase_ch/20240830 -CH_COMMIT=d239aeff645 +CH_COMMIT=5e2eaab52ac