From 190d377e312366163b21e3116f3caf24c6cedfe6 Mon Sep 17 00:00:00 2001 From: loneylee Date: Fri, 26 Jul 2024 12:25:00 +0800 Subject: [PATCH] Fix 6604 --- .../utils/MergeTreePartsPartitionsUtil.scala | 2 +- .../MergeTreeFileFormatDataWriter.scala | 3 +- .../GlutenClickHouseMergeTreeWriteSuite.scala | 28 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/utils/MergeTreePartsPartitionsUtil.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/utils/MergeTreePartsPartitionsUtil.scala index 64e41778cb9b..03199f7ffe83 100644 --- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/utils/MergeTreePartsPartitionsUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/utils/MergeTreePartsPartitionsUtil.scala @@ -166,7 +166,7 @@ object MergeTreePartsPartitionsUtil extends Logging { partition => partition.files.map( fs => { - val path = fs.getPath.toString + val path = fs.getPath.toUri.toString val ret = ClickhouseSnapshot.pathToAddMTPCache.getIfPresent(path) if (ret == null) { diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/MergeTreeFileFormatDataWriter.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/MergeTreeFileFormatDataWriter.scala index 712afb3788d1..4f522e218659 100644 --- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/MergeTreeFileFormatDataWriter.scala +++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/MergeTreeFileFormatDataWriter.scala @@ -306,7 +306,8 @@ abstract class MergeTreeBaseDynamicPartitionDataWriter( releaseCurrentWriter() } - val partDir = partitionValues.map(getPartitionPath(_)) + val partDir = + partitionValues.map(getPartitionPath(_)).map(str => new Path(str).toUri.toASCIIString) partDir.foreach(updatedPartitions.add) val bucketIdStr = bucketId.map(id => f"$id%05d").getOrElse("") diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala index 2563d792b040..77d7f37c0369 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala @@ -2022,5 +2022,33 @@ class GlutenClickHouseMergeTreeWriteSuite |""".stripMargin runTPCHQueryBySQL(6, sqlStr) { _ => } } + + test("test mergetree with partition with whitespace") { + spark.sql(s""" + |DROP TABLE IF EXISTS lineitem_mergetree_partition_with_whitespace; + |""".stripMargin) + + spark.sql(s""" + |CREATE TABLE IF NOT EXISTS lineitem_mergetree_partition_with_whitespace + |( + | l_returnflag string, + | l_linestatus string + |) + |USING clickhouse + |PARTITIONED BY (l_returnflag) + |LOCATION '$basePath/lineitem_mergetree_partition_with_whitespace' + |""".stripMargin) + + spark.sql(s""" + | insert into table lineitem_mergetree_partition_with_whitespace + | (l_returnflag, l_linestatus) values ('a A', 'abc') + |""".stripMargin) + + val sqlStr = + s""" + |SELECT * from lineitem_mergetree_partition_with_whitespace + |""".stripMargin + runSql(sqlStr) { _ => } + } } // scalastyle:off line.size.limit