Skip to content

Commit

Permalink
Fix bucket table create error (apache#7156)
Browse files Browse the repository at this point in the history
  • Loading branch information
loneylee authored and hengzhen.sq committed Sep 11, 2024
1 parent 092e139 commit 42a8c5c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,14 @@ object MergeTreeFileFormatWriter extends Logging {
if (writerBucketSpec.isDefined) {
// We need to add the bucket id expression to the output of the sort plan,
// so that we can use backend to calculate the bucket id for each row.
wrapped = ProjectExec(
wrapped.output :+ Alias(writerBucketSpec.get.bucketIdExpression, "__bucket_value__")(),
wrapped)
val bucketValueExpr = bindReferences(
Seq(writerBucketSpec.get.bucketIdExpression),
finalOutputSpec.outputColumns)
wrapped =
ProjectExec(wrapped.output :+ Alias(bucketValueExpr.head, "__bucket_value__")(), wrapped)
// TODO: to optimize, bucket value is computed twice here
}

val nativeFormat = sparkSession.sparkContext.getLocalProperty("nativeFormat")
(GlutenMergeTreeWriterInjects.getInstance().executeWriterWrappedSparkPlan(wrapped), None)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,30 @@ class GlutenClickHouseHiveTableSuite
.mode(SaveMode.Overwrite)
.save(dataPath3)
assert(new File(dataPath3).listFiles().nonEmpty)

val dataPath4 = s"$basePath/lineitem_mergetree_bucket2"
val df4 = spark
.sql(s"""
|select
| INT_FIELD ,
| STRING_FIELD,
| LONG_FIELD ,
| DATE_FIELD
| from $txt_table_name
| order by INT_FIELD
|""".stripMargin)
.toDF("INT_FIELD", "STRING_FIELD", "LONG_FIELD", "DATE_FIELD")

df4.write
.format("clickhouse")
.partitionBy("DATE_FIELD")
.option("clickhouse.numBuckets", "3")
.option("clickhouse.bucketColumnNames", "STRING_FIELD")
.option("clickhouse.orderByKey", "INT_FIELD,LONG_FIELD")
.option("clickhouse.primaryKey", "INT_FIELD")
.mode(SaveMode.Append)
.save(dataPath4)
assert(new File(dataPath4).listFiles().nonEmpty)
}

test("GLUTEN-6506: Orc read time zone") {
Expand Down

0 comments on commit 42a8c5c

Please sign in to comment.