From 623870bcb4e17640c288e92c56d52e833389bf12 Mon Sep 17 00:00:00 2001
From: Zhichao Zhang <zhangzc@apache.org>
Date: Fri, 7 Jun 2024 17:02:11 +0800
Subject: [PATCH 1/2] [GLUTEN-6067][CH] Support CH backend with Spark3.5 (Task
 1 and Taks 2)

Support CH backend with Spark3.5:

1. Upgrade Spark version to 3.5 and compile passed;
2. Upgrade Delta version to 3.2 and compile passed;
3. CH backend UT passed: (now only the MergeTree + Delta UT passed);
4. Parquet native write passed;
5. Gluten UT passed;
6. Support to run Gluten CH CI with Spark 3.5
---
 backends-clickhouse/pom.xml                   |   33 +
 .../ClickhouseOptimisticTransaction.scala     |   16 +-
 .../spark/sql/delta/DeltaAdapter.scala}       |    8 +-
 .../sql/delta/catalog/ClickHouseTableV2.scala |  164 +--
 .../OptimizeTableCommandOverwrites.scala      |    0
 .../v2/clickhouse/ClickHouseDataSource.scala  |    0
 .../clickhouse/ClickHouseSparkCatalog.scala   |    0
 .../ClickhouseOptimisticTransaction.scala     |  183 +++
 .../apache/spark/sql/delta/DeltaAdapter.scala |   21 +
 .../sql/delta/catalog/ClickHouseTableV2.scala |  186 +++
 .../OptimizeTableCommandOverwrites.scala      |  333 +++++
 .../v2/clickhouse/ClickHouseDataSource.scala  |  145 +++
 .../clickhouse/ClickHouseSparkCatalog.scala   |  662 ++++++++++
 .../io/delta/tables/ClickhouseTable.scala     |  134 ++
 .../ClickhouseOptimisticTransaction.scala     |  193 +++
 .../apache/spark/sql/delta/DeltaAdapter.scala |   21 +
 .../org/apache/spark/sql/delta/DeltaLog.scala | 1106 +++++++++++++++++
 .../org/apache/spark/sql/delta/Snapshot.scala |  663 ++++++++++
 .../sql/delta/catalog/ClickHouseTableV2.scala |  220 ++++
 .../sql/delta/commands/DeleteCommand.scala    |  557 +++++++++
 .../delta/commands/OptimizeTableCommand.scala |  608 +++++++++
 .../OptimizeTableCommandOverwrites.scala      |  345 +++++
 .../sql/delta/commands/UpdateCommand.scala    |  556 +++++++++
 .../sql/delta/commands/VacuumCommand.scala    |  735 +++++++++++
 .../commands/merge/ClassicMergeExecutor.scala |  571 +++++++++
 .../delta/files/MergeTreeCommitProtocol.scala |  255 ++++
 .../v2/clickhouse/ClickHouseDataSource.scala  |  144 +++
 .../clickhouse/ClickHouseSparkCatalog.scala   |  734 +++++++++++
 .../source/DeltaMergeTreeFileFormat.scala     |  133 ++
 .../clickhouse/CHIteratorApi.scala            |    2 +-
 .../clickhouse/CHSparkPlanExecApi.scala       |    2 +-
 .../gluten/utils/CHInputPartitionsUtil.scala  |   18 +-
 .../spark/sql/delta/DeltaAdapterTrait.scala}  |    7 +-
 .../delta/catalog/ClickHouseTableV2Base.scala |  185 +++
 .../v1/CHMergeTreeWriterInjects.scala         |    4 +-
 .../MergeTreeFileFormatWriter.scala           |    7 +-
 .../sql/execution/utils/CHExecUtil.scala      |    9 +-
 .../sql/execution/utils/PushDownUtil.scala    |   30 +-
 ...utenClickHouseDeltaParquetWriteSuite.scala |   54 +-
 ...utenClickHouseMergeTreeOptimizeSuite.scala |    8 +-
 ...ickHouseMergeTreePathBasedWriteSuite.scala |   15 +-
 .../GlutenClickHouseMergeTreeWriteSuite.scala |    9 +-
 ...utenClickHouseTPCHParquetBucketSuite.scala |    2 +-
 .../expressions/aggregate/CustomSum.scala     |   10 +-
 .../apache/gluten/sql/shims/SparkShims.scala  |   20 +-
 .../sql/shims/spark32/Spark32Shims.scala      |   21 +
 .../sql/shims/spark33/Spark33Shims.scala      |   21 +
 .../sql/shims/spark34/Spark34Shims.scala      |   20 +
 .../sql/shims/spark35/Spark35Shims.scala      |   22 +-
 49 files changed, 8913 insertions(+), 279 deletions(-)
 rename backends-clickhouse/src/main/{scala => delta-20}/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala (92%)
 rename backends-clickhouse/src/main/{delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/DeltaLogAdapter.scala => delta-20/org/apache/spark/sql/delta/DeltaAdapter.scala} (78%)
 rename backends-clickhouse/src/main/{scala => delta-20}/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala (55%)
 rename backends-clickhouse/src/main/{scala => delta-20}/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala (100%)
 rename backends-clickhouse/src/main/{scala => delta-20}/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala (100%)
 rename backends-clickhouse/src/main/{scala => delta-20}/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala (100%)
 create mode 100644 backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
 create mode 100644 backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/DeltaAdapter.scala
 create mode 100644 backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
 create mode 100644 backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala
 create mode 100644 backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala
 create mode 100644 backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/io/delta/tables/ClickhouseTable.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/DeltaAdapter.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/DeltaLog.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/Snapshot.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/DeleteCommand.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/OptimizeTableCommand.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/UpdateCommand.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/VacuumCommand.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/merge/ClassicMergeExecutor.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/files/MergeTreeCommitProtocol.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala
 create mode 100644 backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/source/DeltaMergeTreeFileFormat.scala
 rename backends-clickhouse/src/main/{delta-20/org/apache/spark/sql/execution/datasources/v2/clickhouse/DeltaLogAdapter.scala => scala/org/apache/spark/sql/delta/DeltaAdapterTrait.scala} (79%)
 create mode 100644 backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2Base.scala

diff --git a/backends-clickhouse/pom.xml b/backends-clickhouse/pom.xml
index a37734b150bb..27ebd75f2d59 100644
--- a/backends-clickhouse/pom.xml
+++ b/backends-clickhouse/pom.xml
@@ -215,6 +215,38 @@
       <version>8.5.9</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-memory-core</artifactId>
+      <version>${arrow.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty-common</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty-buffer</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-vector</artifactId>
+      <version>${arrow.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty-common</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty-buffer</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
   </dependencies>
 
   <build>
@@ -272,6 +304,7 @@
             </includes>
             <excludes>
               <exclude>src/main/delta-${delta.binary.version}/org/apache/spark/sql/delta/commands/*.scala</exclude>
+              <exclude>src/main/delta-${delta.binary.version}/org/apache/spark/sql/delta/commands/merge/*.scala</exclude>
               <exclude>src/main/delta-${delta.binary.version}/org/apache/spark/sql/delta/files/*.scala</exclude>
               <exclude>src/main/delta-${delta.binary.version}/org/apache/spark/sql/delta/DeltaLog.scala</exclude>
               <exclude>src/main/delta-${delta.binary.version}/org/apache/spark/sql/delta/Snapshot.scala</exclude>
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
similarity index 92%
rename from backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
rename to backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
index d59467e11293..0794b45158e6 100644
--- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
+++ b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
@@ -32,7 +32,6 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FakeRowAdaptor, FileFormatWriter, WriteJobStatsTracker}
 import org.apache.spark.sql.execution.datasources.v1.clickhouse.MergeTreeFileFormatWriter
 import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig
-import org.apache.spark.sql.execution.datasources.v2.clickhouse.source.DeltaMergeTreeFileFormat
 import org.apache.spark.util.{Clock, SerializableConfiguration}
 
 import org.apache.commons.lang3.exception.ExceptionUtils
@@ -139,20 +138,7 @@ class ClickhouseOptimisticTransaction(
           MergeTreeFileFormatWriter.write(
             sparkSession = spark,
             plan = newQueryPlan,
-            fileFormat = new DeltaMergeTreeFileFormat(
-              metadata,
-              tableV2.dataBaseName,
-              tableV2.tableName,
-              ClickhouseSnapshot.genSnapshotId(tableV2.snapshot),
-              tableV2.orderByKeyOption,
-              tableV2.lowCardKeyOption,
-              tableV2.minmaxIndexKeyOption,
-              tableV2.bfIndexKeyOption,
-              tableV2.setIndexKeyOption,
-              tableV2.primaryKeyOption,
-              tableV2.clickhouseTableConfigs,
-              tableV2.partitionColumns
-            ),
+            fileFormat = tableV2.getFileFormat(metadata),
             // formats.
             committer = committer,
             outputSpec = outputSpec,
diff --git a/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/DeltaLogAdapter.scala b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaAdapter.scala
similarity index 78%
rename from backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/DeltaLogAdapter.scala
rename to backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaAdapter.scala
index dd6ad383e0d7..b6d4c04844c4 100644
--- a/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/DeltaLogAdapter.scala
+++ b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaAdapter.scala
@@ -14,10 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.execution.datasources.v2.clickhouse
+package org.apache.spark.sql.delta
 
-import org.apache.spark.sql.delta.{DeltaLog, Snapshot}
-
-object DeltaLogAdapter {
-  def snapshot(deltaLog: DeltaLog): Snapshot = deltaLog.unsafeVolatileSnapshot
+object DeltaAdapter extends DeltaAdapterTrait {
+  override def snapshot(deltaLog: DeltaLog): Snapshot = deltaLog.snapshot
 }
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
similarity index 55%
rename from backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
rename to backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
index d5cd4f984ca6..90370f0b1d99 100644
--- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
+++ b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
@@ -17,11 +17,11 @@
 package org.apache.spark.sql.delta.catalog
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable}
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
 import org.apache.spark.sql.connector.read.InputPartition
 import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
-import org.apache.spark.sql.delta.{ClickhouseSnapshot, DeltaErrors, DeltaLog, DeltaTimeTravelSpec}
+import org.apache.spark.sql.delta.{ClickhouseSnapshot, DeltaErrors, DeltaLog, DeltaTimeTravelSpec, Snapshot}
 import org.apache.spark.sql.delta.actions.Metadata
 import org.apache.spark.sql.delta.catalog.ClickHouseTableV2.deltaLog2Table
 import org.apache.spark.sql.delta.sources.DeltaDataSource
@@ -54,8 +54,8 @@ class ClickHouseTableV2(
     tableIdentifier,
     timeTravelOpt,
     options,
-    cdcOptions) {
-  protected def getMetadata: Metadata = if (snapshot == null) Metadata() else snapshot.metadata
+    cdcOptions)
+  with ClickHouseTableV2Base {
 
   lazy val (rootPath, partitionFilters, timeTravelByPath) = {
     if (catalogTable.isDefined) {
@@ -93,126 +93,6 @@ class ClickHouseTableV2(
     new WriteIntoDeltaBuilder(deltaLog, info.options)
   }
 
-  lazy val dataBaseName = catalogTable
-    .map(_.identifier.database.getOrElse("default"))
-    .getOrElse("clickhouse")
-
-  lazy val tableName = catalogTable
-    .map(_.identifier.table)
-    .getOrElse(path.toUri.getPath)
-
-  lazy val bucketOption: Option[BucketSpec] = {
-    val tableProperties = properties()
-    if (tableProperties.containsKey("numBuckets")) {
-      val numBuckets = tableProperties.get("numBuckets").trim.toInt
-      val bucketColumnNames: Seq[String] =
-        tableProperties.get("bucketColumnNames").split(",").map(_.trim).toSeq
-      val sortColumnNames: Seq[String] = if (tableProperties.containsKey("orderByKey")) {
-        tableProperties.get("orderByKey").split(",").map(_.trim).toSeq
-      } else Seq.empty[String]
-      Some(BucketSpec(numBuckets, bucketColumnNames, sortColumnNames))
-    } else {
-      None
-    }
-  }
-
-  lazy val lowCardKeyOption: Option[Seq[String]] = {
-    getCommaSeparatedColumns("lowCardKey")
-  }
-
-  lazy val minmaxIndexKeyOption: Option[Seq[String]] = {
-    getCommaSeparatedColumns("minmaxIndexKey")
-  }
-
-  lazy val bfIndexKeyOption: Option[Seq[String]] = {
-    getCommaSeparatedColumns("bloomfilterIndexKey")
-  }
-
-  lazy val setIndexKeyOption: Option[Seq[String]] = {
-    getCommaSeparatedColumns("setIndexKey")
-  }
-
-  private def getCommaSeparatedColumns(keyName: String) = {
-    val tableProperties = properties()
-    if (tableProperties.containsKey(keyName)) {
-      if (tableProperties.get(keyName).nonEmpty) {
-        val keys = tableProperties.get(keyName).split(",").map(_.trim).toSeq
-        keys.foreach(
-          s => {
-            if (s.contains(".")) {
-              throw new IllegalStateException(
-                s"$keyName $s can not contain '.' (not support nested column yet)")
-            }
-          })
-        Some(keys.map(s => s.toLowerCase()))
-      } else {
-        None
-      }
-    } else {
-      None
-    }
-  }
-
-  lazy val orderByKeyOption: Option[Seq[String]] = {
-    if (bucketOption.isDefined && bucketOption.get.sortColumnNames.nonEmpty) {
-      val orderByKes = bucketOption.get.sortColumnNames
-      val invalidKeys = orderByKes.intersect(partitionColumns)
-      if (invalidKeys.nonEmpty) {
-        throw new IllegalStateException(
-          s"partition cols $invalidKeys can not be in the order by keys.")
-      }
-      Some(orderByKes)
-    } else {
-      val tableProperties = properties()
-      if (tableProperties.containsKey("orderByKey")) {
-        if (tableProperties.get("orderByKey").nonEmpty) {
-          val orderByKes = tableProperties.get("orderByKey").split(",").map(_.trim).toSeq
-          val invalidKeys = orderByKes.intersect(partitionColumns)
-          if (invalidKeys.nonEmpty) {
-            throw new IllegalStateException(
-              s"partition cols $invalidKeys can not be in the order by keys.")
-          }
-          Some(orderByKes)
-        } else {
-          None
-        }
-      } else {
-        None
-      }
-    }
-  }
-
-  lazy val primaryKeyOption: Option[Seq[String]] = {
-    if (orderByKeyOption.isDefined) {
-      val tableProperties = properties()
-      if (tableProperties.containsKey("primaryKey")) {
-        if (tableProperties.get("primaryKey").nonEmpty) {
-          val primaryKeys = tableProperties.get("primaryKey").split(",").map(_.trim).toSeq
-          if (!orderByKeyOption.get.mkString(",").startsWith(primaryKeys.mkString(","))) {
-            throw new IllegalStateException(
-              s"Primary key $primaryKeys must be a prefix of the sorting key")
-          }
-          Some(primaryKeys)
-        } else {
-          None
-        }
-      } else {
-        None
-      }
-    } else {
-      None
-    }
-  }
-
-  lazy val partitionColumns = snapshot.metadata.partitionColumns
-
-  lazy val clickhouseTableConfigs: Map[String, String] = {
-    val tableProperties = properties()
-    val configs = scala.collection.mutable.Map[String, String]()
-    configs += ("storage_policy" -> tableProperties.getOrDefault("storage_policy", "default"))
-    configs.toMap
-  }
-
   def getFileFormat(meta: Metadata): DeltaMergeTreeFileFormat = {
     new DeltaMergeTreeFileFormat(
       meta,
@@ -230,41 +110,19 @@ class ClickHouseTableV2(
     )
   }
 
-  def cacheThis(): Unit = {
-    deltaLog2Table.put(deltaLog, this)
-  }
+  override def deltaProperties(): ju.Map[String, String] = properties()
 
-  cacheThis()
+  override def deltaCatalog(): Option[CatalogTable] = catalogTable
 
-  def primaryKey(): String = primaryKeyOption match {
-    case Some(keys) => keys.mkString(",")
-    case None => ""
-  }
-
-  def orderByKey(): String = orderByKeyOption match {
-    case Some(keys) => keys.mkString(",")
-    case None => "tuple()"
-  }
-
-  def lowCardKey(): String = lowCardKeyOption match {
-    case Some(keys) => keys.mkString(",")
-    case None => ""
-  }
+  override def deltaPath(): Path = path
 
-  def minmaxIndexKey(): String = minmaxIndexKeyOption match {
-    case Some(keys) => keys.mkString(",")
-    case None => ""
-  }
+  override def deltaSnapshot(): Snapshot = snapshot
 
-  def bfIndexKey(): String = bfIndexKeyOption match {
-    case Some(keys) => keys.mkString(",")
-    case None => ""
+  def cacheThis(): Unit = {
+    deltaLog2Table.put(deltaLog, this)
   }
 
-  def setIndexKey(): String = setIndexKeyOption match {
-    case Some(keys) => keys.mkString(",")
-    case None => ""
-  }
+  cacheThis()
 }
 
 @SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala
similarity index 100%
rename from backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala
rename to backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala
similarity index 100%
rename from backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala
rename to backends-clickhouse/src/main/delta-20/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala
similarity index 100%
rename from backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala
rename to backends-clickhouse/src/main/delta-20/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala
diff --git a/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
new file mode 100644
index 000000000000..0794b45158e6
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta
+
+import org.apache.gluten.backendsapi.clickhouse.CHBackendSettings
+import org.apache.gluten.execution.ColumnarToRowExecBase
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.delta.actions._
+import org.apache.spark.sql.delta.catalog.ClickHouseTableV2
+import org.apache.spark.sql.delta.constraints.{Constraint, Constraints}
+import org.apache.spark.sql.delta.files.MergeTreeCommitProtocol
+import org.apache.spark.sql.delta.schema.InvariantViolationException
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
+import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FakeRowAdaptor, FileFormatWriter, WriteJobStatsTracker}
+import org.apache.spark.sql.execution.datasources.v1.clickhouse.MergeTreeFileFormatWriter
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig
+import org.apache.spark.util.{Clock, SerializableConfiguration}
+
+import org.apache.commons.lang3.exception.ExceptionUtils
+
+import scala.collection.mutable.ListBuffer
+
+object ClickhouseOptimisticTransaction {}
+class ClickhouseOptimisticTransaction(
+    override val deltaLog: DeltaLog,
+    override val snapshot: Snapshot)(implicit override val clock: Clock)
+  extends OptimisticTransaction(deltaLog, snapshot) {
+
+  def this(deltaLog: DeltaLog, snapshotOpt: Option[Snapshot] = None)(implicit clock: Clock) {
+    this(
+      deltaLog,
+      snapshotOpt.getOrElse(deltaLog.update())
+    )
+  }
+
+  def insertFakeRowAdaptor(queryPlan: SparkPlan): SparkPlan = queryPlan match {
+    // if the child is columnar, we can just wrap&transfer the columnar data
+    case c2r: ColumnarToRowExecBase =>
+      FakeRowAdaptor(c2r.child)
+    // If the child is aqe, we make aqe "support columnar",
+    // then aqe itself will guarantee to generate columnar outputs.
+    // So FakeRowAdaptor will always consumes columnar data,
+    // thus avoiding the case of c2r->aqe->r2c->writer
+    case aqe: AdaptiveSparkPlanExec =>
+      FakeRowAdaptor(
+        AdaptiveSparkPlanExec(
+          aqe.inputPlan,
+          aqe.context,
+          aqe.preprocessingRules,
+          aqe.isSubquery,
+          supportsColumnar = true
+        ))
+    case other => FakeRowAdaptor(other)
+  }
+
+  override def writeFiles(
+      inputData: Dataset[_],
+      writeOptions: Option[DeltaOptions],
+      additionalConstraints: Seq[Constraint]): Seq[FileAction] = {
+    if (ClickHouseConfig.isMergeTreeFormatEngine(metadata.configuration)) {
+      hasWritten = true
+
+      val spark = inputData.sparkSession
+      val (data, partitionSchema) = performCDCPartition(inputData)
+      val outputPath = deltaLog.dataPath
+
+      val (queryExecution, output, generatedColumnConstraints, _) =
+        normalizeData(deltaLog, data)
+      val partitioningColumns = getPartitioningColumns(partitionSchema, output)
+
+      val committer = new MergeTreeCommitProtocol("delta-mergetree", outputPath.toString, None)
+
+      // val (optionalStatsTracker, _) =
+      //   getOptionalStatsTrackerAndStatsCollection(output, outputPath, partitionSchema, data)
+      val (optionalStatsTracker, _) = (None, None)
+
+      val constraints =
+        Constraints.getAll(metadata, spark) ++ generatedColumnConstraints ++ additionalConstraints
+
+      SQLExecution.withNewExecutionId(queryExecution, Option("deltaTransactionalWrite")) {
+        val outputSpec = FileFormatWriter.OutputSpec(outputPath.toString, Map.empty, output)
+
+        val queryPlan = queryExecution.executedPlan
+        val newQueryPlan = insertFakeRowAdaptor(queryPlan)
+
+        val statsTrackers: ListBuffer[WriteJobStatsTracker] = ListBuffer()
+
+        if (spark.conf.get(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED)) {
+          val basicWriteJobStatsTracker = new BasicWriteJobStatsTracker(
+            new SerializableConfiguration(deltaLog.newDeltaHadoopConf()),
+            BasicWriteJobStatsTracker.metrics)
+          //        registerSQLMetrics(spark, basicWriteJobStatsTracker.driverSideMetrics)
+          statsTrackers.append(basicWriteJobStatsTracker)
+        }
+
+        // Retain only a minimal selection of Spark writer options to avoid any potential
+        // compatibility issues
+        var options = writeOptions match {
+          case None => Map.empty[String, String]
+          case Some(writeOptions) =>
+            writeOptions.options.filterKeys {
+              key =>
+                key.equalsIgnoreCase(DeltaOptions.MAX_RECORDS_PER_FILE) ||
+                key.equalsIgnoreCase(DeltaOptions.COMPRESSION)
+            }.toMap
+        }
+
+        spark.conf.getAll.foreach(
+          entry => {
+            if (
+              entry._1.startsWith(s"${CHBackendSettings.getBackendConfigPrefix}.runtime_settings")
+              || entry._1.equalsIgnoreCase(DeltaSQLConf.DELTA_OPTIMIZE_MIN_FILE_SIZE.key)
+            ) {
+              options += (entry._1 -> entry._2)
+            }
+          })
+
+        try {
+          val tableV2 = ClickHouseTableV2.getTable(deltaLog)
+          MergeTreeFileFormatWriter.write(
+            sparkSession = spark,
+            plan = newQueryPlan,
+            fileFormat = tableV2.getFileFormat(metadata),
+            // formats.
+            committer = committer,
+            outputSpec = outputSpec,
+            // scalastyle:off deltahadoopconfiguration
+            hadoopConf = spark.sessionState
+              .newHadoopConfWithOptions(metadata.configuration ++ deltaLog.options),
+            // scalastyle:on deltahadoopconfiguration
+            orderByKeyOption = tableV2.orderByKeyOption,
+            lowCardKeyOption = tableV2.lowCardKeyOption,
+            minmaxIndexKeyOption = tableV2.minmaxIndexKeyOption,
+            bfIndexKeyOption = tableV2.bfIndexKeyOption,
+            setIndexKeyOption = tableV2.setIndexKeyOption,
+            primaryKeyOption = tableV2.primaryKeyOption,
+            partitionColumns = partitioningColumns,
+            bucketSpec = tableV2.bucketOption,
+            statsTrackers = optionalStatsTracker.toSeq ++ statsTrackers,
+            options = options,
+            constraints = constraints
+          )
+        } catch {
+          case s: SparkException =>
+            // Pull an InvariantViolationException up to the top level if it was the root cause.
+            val violationException = ExceptionUtils.getRootCause(s)
+            if (violationException.isInstanceOf[InvariantViolationException]) {
+              throw violationException
+            } else {
+              throw s
+            }
+        }
+      }
+      committer.addedStatuses.toSeq ++ committer.changeFiles
+    } else {
+      // TODO: support native delta parquet write
+      // 1. insert FakeRowAdaptor
+      // 2. DeltaInvariantCheckerExec transform
+      // 3. DeltaTaskStatisticsTracker collect null count / min values / max values
+      // 4. set the parameters 'staticPartitionWriteOnly', 'isNativeAppliable',
+      //    'nativeFormat' in the LocalProperty of the sparkcontext
+      super.writeFiles(inputData, writeOptions, additionalConstraints)
+    }
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/DeltaAdapter.scala b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/DeltaAdapter.scala
new file mode 100644
index 000000000000..8a9c5585e888
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/DeltaAdapter.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta
+
+object DeltaAdapter extends DeltaAdapterTrait {
+  override def snapshot(deltaLog: DeltaLog): Snapshot = deltaLog.unsafeVolatileSnapshot
+}
diff --git a/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
new file mode 100644
index 000000000000..90370f0b1d99
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta.catalog
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
+import org.apache.spark.sql.delta.{ClickhouseSnapshot, DeltaErrors, DeltaLog, DeltaTimeTravelSpec, Snapshot}
+import org.apache.spark.sql.delta.actions.Metadata
+import org.apache.spark.sql.delta.catalog.ClickHouseTableV2.deltaLog2Table
+import org.apache.spark.sql.delta.sources.DeltaDataSource
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory}
+import org.apache.spark.sql.execution.datasources.utils.MergeTreePartsPartitionsUtil
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.source.DeltaMergeTreeFileFormat
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.collection.BitSet
+
+import org.apache.hadoop.fs.Path
+
+import java.{util => ju}
+
+import scala.collection.JavaConverters._
+
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+class ClickHouseTableV2(
+    override val spark: SparkSession,
+    override val path: Path,
+    override val catalogTable: Option[CatalogTable] = None,
+    override val tableIdentifier: Option[String] = None,
+    override val timeTravelOpt: Option[DeltaTimeTravelSpec] = None,
+    override val options: Map[String, String] = Map.empty,
+    override val cdcOptions: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty(),
+    val clickhouseExtensionOptions: Map[String, String] = Map.empty)
+  extends DeltaTableV2(
+    spark,
+    path,
+    catalogTable,
+    tableIdentifier,
+    timeTravelOpt,
+    options,
+    cdcOptions)
+  with ClickHouseTableV2Base {
+
+  lazy val (rootPath, partitionFilters, timeTravelByPath) = {
+    if (catalogTable.isDefined) {
+      // Fast path for reducing path munging overhead
+      (new Path(catalogTable.get.location), Nil, None)
+    } else {
+      DeltaDataSource.parsePathIdentifier(spark, path.toString, options)
+    }
+  }
+
+  private lazy val timeTravelSpec: Option[DeltaTimeTravelSpec] = {
+    if (timeTravelOpt.isDefined && timeTravelByPath.isDefined) {
+      throw DeltaErrors.multipleTimeTravelSyntaxUsed
+    }
+    timeTravelOpt.orElse(timeTravelByPath)
+  }
+
+  override def name(): String =
+    catalogTable
+      .map(_.identifier.unquotedString)
+      .orElse(tableIdentifier)
+      .getOrElse(s"clickhouse.`${deltaLog.dataPath}`")
+
+  override def properties(): ju.Map[String, String] = {
+    val ret = super.properties()
+
+    // for file path based write
+    if (snapshot.version < 0 && clickhouseExtensionOptions.nonEmpty) {
+      ret.putAll(clickhouseExtensionOptions.asJava)
+    }
+    ret
+  }
+
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    new WriteIntoDeltaBuilder(deltaLog, info.options)
+  }
+
+  def getFileFormat(meta: Metadata): DeltaMergeTreeFileFormat = {
+    new DeltaMergeTreeFileFormat(
+      meta,
+      dataBaseName,
+      tableName,
+      ClickhouseSnapshot.genSnapshotId(snapshot),
+      orderByKeyOption,
+      lowCardKeyOption,
+      minmaxIndexKeyOption,
+      bfIndexKeyOption,
+      setIndexKeyOption,
+      primaryKeyOption,
+      clickhouseTableConfigs,
+      partitionColumns
+    )
+  }
+
+  override def deltaProperties(): ju.Map[String, String] = properties()
+
+  override def deltaCatalog(): Option[CatalogTable] = catalogTable
+
+  override def deltaPath(): Path = path
+
+  override def deltaSnapshot(): Snapshot = snapshot
+
+  def cacheThis(): Unit = {
+    deltaLog2Table.put(deltaLog, this)
+  }
+
+  cacheThis()
+}
+
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+class TempClickHouseTableV2(
+    override val spark: SparkSession,
+    override val catalogTable: Option[CatalogTable] = None)
+  extends ClickHouseTableV2(spark, null, catalogTable) {
+  import collection.JavaConverters._
+  override def properties(): ju.Map[String, String] = catalogTable.get.properties.asJava
+  override lazy val partitionColumns: Seq[String] = catalogTable.get.partitionColumnNames
+  override def cacheThis(): Unit = {}
+}
+
+object ClickHouseTableV2 extends Logging {
+  private val deltaLog2Table =
+    new scala.collection.concurrent.TrieMap[DeltaLog, ClickHouseTableV2]()
+  // for CTAS use
+  val temporalThreadLocalCHTable = new ThreadLocal[ClickHouseTableV2]()
+
+  def getTable(deltaLog: DeltaLog): ClickHouseTableV2 = {
+    if (deltaLog2Table.contains(deltaLog)) {
+      deltaLog2Table(deltaLog)
+    } else if (temporalThreadLocalCHTable.get() != null) {
+      temporalThreadLocalCHTable.get()
+    } else {
+      throw new IllegalStateException(
+        s"Can not find ClickHouseTableV2 for deltalog ${deltaLog.dataPath}")
+    }
+  }
+
+  def clearCache(): Unit = {
+    deltaLog2Table.clear()
+    temporalThreadLocalCHTable.remove()
+  }
+
+  def partsPartitions(
+      deltaLog: DeltaLog,
+      relation: HadoopFsRelation,
+      selectedPartitions: Array[PartitionDirectory],
+      output: Seq[Attribute],
+      bucketedScan: Boolean,
+      optionalBucketSet: Option[BitSet],
+      optionalNumCoalescedBuckets: Option[Int],
+      disableBucketedScan: Boolean,
+      filterExprs: Seq[Expression]): Seq[InputPartition] = {
+    val tableV2 = ClickHouseTableV2.getTable(deltaLog)
+
+    MergeTreePartsPartitionsUtil.getMergeTreePartsPartitions(
+      relation,
+      selectedPartitions,
+      output,
+      bucketedScan,
+      tableV2.spark,
+      tableV2,
+      optionalBucketSet,
+      optionalNumCoalescedBuckets,
+      disableBucketedScan,
+      filterExprs)
+
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala
new file mode 100644
index 000000000000..f7a180b6a239
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta.commands
+
+import org.apache.gluten.expression.ConverterUtils
+
+import org.apache.spark.{TaskContext, TaskOutputFileAlreadyExistException}
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
+import org.apache.spark.internal.io.SparkHadoopWriterUtils
+import org.apache.spark.shuffle.FetchFailedException
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
+import org.apache.spark.sql.delta._
+import org.apache.spark.sql.delta.actions.{AddFile, FileAction}
+import org.apache.spark.sql.delta.catalog.ClickHouseTableV2
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.datasources.CHDatasourceJniWrapper
+import org.apache.spark.sql.execution.datasources.v1.CHMergeTreeWriterInjects
+import org.apache.spark.sql.execution.datasources.v1.clickhouse._
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.metadata.{AddFileTags, AddMergeTreeParts}
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.utils.CHDataSourceUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.{SerializableConfiguration, SystemClock, Utils}
+
+import org.apache.hadoop.fs.{FileAlreadyExistsException, Path}
+import org.apache.hadoop.mapreduce.{TaskAttemptContext, TaskAttemptID, TaskID, TaskType}
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+
+import java.util.{Date, UUID}
+
+import scala.collection.mutable.ArrayBuffer
+
+object OptimizeTableCommandOverwrites extends Logging {
+
+  case class TaskDescription(
+      path: String,
+      database: String,
+      tableName: String,
+      snapshotId: String,
+      orderByKeyOption: Option[Seq[String]],
+      lowCardKeyOption: Option[Seq[String]],
+      minmaxIndexKeyOption: Option[Seq[String]],
+      bfIndexKeyOption: Option[Seq[String]],
+      setIndexKeyOption: Option[Seq[String]],
+      primaryKeyOption: Option[Seq[String]],
+      partitionColumns: Seq[String],
+      partList: Seq[String],
+      tableSchema: StructType,
+      clickhouseTableConfigs: Map[String, String],
+      serializableHadoopConf: SerializableConfiguration,
+      jobIdInstant: Long,
+      partitionDir: Option[String],
+      bucketDir: Option[String]
+  )
+
+  private def executeTask(
+      description: TaskDescription,
+      sparkStageId: Int,
+      sparkPartitionId: Int,
+      sparkAttemptNumber: Int
+  ): MergeTreeWriteTaskResult = {
+
+    val jobId = SparkHadoopWriterUtils.createJobID(new Date(description.jobIdInstant), sparkStageId)
+    val taskId = new TaskID(jobId, TaskType.MAP, sparkPartitionId)
+    val taskAttemptId = new TaskAttemptID(taskId, sparkAttemptNumber)
+
+    // Set up the attempt context required to use in the output committer.
+    val taskAttemptContext: TaskAttemptContext = {
+      // Set up the configuration object
+      val hadoopConf = description.serializableHadoopConf.value
+      hadoopConf.set("mapreduce.job.id", jobId.toString)
+      hadoopConf.set("mapreduce.task.id", taskAttemptId.getTaskID.toString)
+      hadoopConf.set("mapreduce.task.attempt.id", taskAttemptId.toString)
+      hadoopConf.setBoolean("mapreduce.task.ismap", true)
+      hadoopConf.setInt("mapreduce.task.partition", 0)
+
+      new TaskAttemptContextImpl(hadoopConf, taskAttemptId)
+    }
+
+    try {
+      Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
+
+        val uuid = UUID.randomUUID.toString
+
+        val planWithSplitInfo = CHMergeTreeWriterInjects.genMergeTreeWriteRel(
+          description.path,
+          description.database,
+          description.tableName,
+          description.snapshotId,
+          description.orderByKeyOption,
+          description.lowCardKeyOption,
+          description.minmaxIndexKeyOption,
+          description.bfIndexKeyOption,
+          description.setIndexKeyOption,
+          description.primaryKeyOption,
+          description.partitionColumns,
+          description.partList,
+          ConverterUtils.convertNamedStructJson(description.tableSchema),
+          description.clickhouseTableConfigs,
+          description.tableSchema.toAttributes
+        )
+
+        val datasourceJniWrapper = new CHDatasourceJniWrapper()
+        val returnedMetrics =
+          datasourceJniWrapper.nativeMergeMTParts(
+            planWithSplitInfo.plan,
+            planWithSplitInfo.splitInfo,
+            uuid,
+            taskId.getId.toString,
+            description.partitionDir.getOrElse(""),
+            description.bucketDir.getOrElse("")
+          )
+        if (returnedMetrics != null && returnedMetrics.nonEmpty) {
+          val addFiles = AddFileTags.partsMetricsToAddFile(
+            description.database,
+            description.tableName,
+            description.path,
+            returnedMetrics,
+            Seq(Utils.localHostName()))
+
+          val (taskCommitMessage, taskCommitTime) = Utils.timeTakenMs {
+            // committer.commitTask(taskAttemptContext)
+            new TaskCommitMessage(addFiles.toSeq)
+          }
+
+//          val summary = MergeTreeExecutedWriteSummary(
+//            updatedPartitions = updatedPartitions.toSet,
+//            stats = statsTrackers.map(_.getFinalStats(taskCommitTime)))
+          MergeTreeWriteTaskResult(taskCommitMessage, null)
+        } else {
+          throw new IllegalStateException()
+        }
+      })(
+        catchBlock = {
+          // If there is an error, abort the task
+          logError(s"Job $jobId aborted.")
+        },
+        finallyBlock = {})
+    } catch {
+      case e: FetchFailedException =>
+        throw e
+      case f: FileAlreadyExistsException if SQLConf.get.fastFailFileFormatOutput =>
+        // If any output file to write already exists, it does not make sense to re-run this task.
+        // We throw the exception and let Executor throw ExceptionFailure to abort the job.
+        throw new TaskOutputFileAlreadyExistException(f)
+      case t: Throwable =>
+        throw QueryExecutionErrors.taskFailedWhileWritingRowsError(t)
+    }
+
+  }
+
+  def runOptimizeBinJobClickhouse(
+      txn: OptimisticTransaction,
+      partitionValues: Map[String, String],
+      bucketNum: String,
+      bin: Seq[AddFile],
+      maxFileSize: Long): Seq[FileAction] = {
+    val tableV2 = ClickHouseTableV2.getTable(txn.deltaLog);
+
+    val sparkSession = SparkSession.getActiveSession.get
+
+    val rddWithNonEmptyPartitions =
+      sparkSession.sparkContext.parallelize(Array.empty[InternalRow], 1)
+
+    val jobIdInstant = new Date().getTime
+    val ret = new Array[MergeTreeWriteTaskResult](rddWithNonEmptyPartitions.partitions.length)
+
+    val serializableHadoopConf = new SerializableConfiguration(
+      sparkSession.sessionState.newHadoopConfWithOptions(
+        txn.metadata.configuration ++ txn.deltaLog.options))
+
+    val partitionDir = if (tableV2.partitionColumns.isEmpty) {
+      None
+    } else {
+      Some(tableV2.partitionColumns.map(c => c + "=" + partitionValues(c)).mkString("/"))
+    }
+
+    val bucketDir = if (tableV2.bucketOption.isEmpty) {
+      None
+    } else {
+      Some(bucketNum)
+    }
+
+    val description = TaskDescription.apply(
+      txn.deltaLog.dataPath.toString,
+      tableV2.dataBaseName,
+      tableV2.tableName,
+      ClickhouseSnapshot.genSnapshotId(tableV2.snapshot),
+      tableV2.orderByKeyOption,
+      tableV2.lowCardKeyOption,
+      tableV2.minmaxIndexKeyOption,
+      tableV2.bfIndexKeyOption,
+      tableV2.setIndexKeyOption,
+      tableV2.primaryKeyOption,
+      tableV2.partitionColumns,
+      bin.map(_.asInstanceOf[AddMergeTreeParts].name),
+      tableV2.schema(),
+      tableV2.clickhouseTableConfigs,
+      serializableHadoopConf,
+      jobIdInstant,
+      partitionDir,
+      bucketDir
+    )
+    sparkSession.sparkContext.runJob(
+      rddWithNonEmptyPartitions,
+      (taskContext: TaskContext, _: Iterator[InternalRow]) => {
+        executeTask(
+          description,
+          taskContext.stageId(),
+          taskContext.partitionId(),
+          taskContext.taskAttemptId().toInt & Integer.MAX_VALUE
+        )
+      },
+      rddWithNonEmptyPartitions.partitions.indices,
+      (index, res: MergeTreeWriteTaskResult) => {
+        ret(index) = res
+      }
+    )
+
+    val addFiles = ret
+      .flatMap(_.commitMsg.obj.asInstanceOf[Seq[AddFile]])
+      .toSeq
+
+    val removeFiles =
+      bin.map(f => f.removeWithTimestamp(new SystemClock().getTimeMillis(), dataChange = false))
+    addFiles ++ removeFiles
+
+  }
+
+  def getDeltaLogClickhouse(
+      spark: SparkSession,
+      path: Option[String],
+      tableIdentifier: Option[TableIdentifier],
+      operationName: String,
+      hadoopConf: Map[String, String] = Map.empty): DeltaLog = {
+    val tablePath =
+      if (path.nonEmpty) {
+        new Path(path.get)
+      } else if (tableIdentifier.nonEmpty) {
+        val sessionCatalog = spark.sessionState.catalog
+        lazy val metadata = sessionCatalog.getTableMetadata(tableIdentifier.get)
+
+        if (CHDataSourceUtils.isClickhousePath(spark, tableIdentifier.get)) {
+          new Path(tableIdentifier.get.table)
+        } else if (CHDataSourceUtils.isClickHouseTable(spark, tableIdentifier.get)) {
+          new Path(metadata.location)
+        } else {
+          DeltaTableIdentifier(spark, tableIdentifier.get) match {
+            case Some(id) if id.path.nonEmpty =>
+              new Path(id.path.get)
+            case Some(id) if id.table.nonEmpty =>
+              new Path(metadata.location)
+            case _ =>
+              if (metadata.tableType == CatalogTableType.VIEW) {
+                throw DeltaErrors.viewNotSupported(operationName)
+              }
+              throw DeltaErrors.notADeltaTableException(operationName)
+          }
+        }
+      } else {
+        throw DeltaErrors.missingTableIdentifierException(operationName)
+      }
+
+    val startTime = Some(System.currentTimeMillis)
+    val deltaLog = DeltaLog.forTable(spark, tablePath, hadoopConf)
+    if (deltaLog.update(checkIfUpdatedSinceTs = startTime).version < 0) {
+      throw DeltaErrors.notADeltaTableException(
+        operationName,
+        DeltaTableIdentifier(path, tableIdentifier))
+    }
+    deltaLog
+  }
+
+  def groupFilesIntoBinsClickhouse(
+      partitionsToCompact: Seq[((String, Map[String, String]), Seq[AddFile])],
+      maxTargetFileSize: Long): Seq[((String, Map[String, String]), Seq[AddFile])] = {
+    partitionsToCompact.flatMap {
+      case (partition, files) =>
+        val bins = new ArrayBuffer[Seq[AddFile]]()
+
+        val currentBin = new ArrayBuffer[AddFile]()
+        var currentBinSize = 0L
+
+        files.sortBy(_.size).foreach {
+          file =>
+            // Generally, a bin is a group of existing files, whose total size does not exceed the
+            // desired maxFileSize. They will be coalesced into a single output file.
+            // However, if isMultiDimClustering = true, all files in a partition will be read by the
+            // same job, the data will be range-partitioned and
+            // numFiles = totalFileSize / maxFileSize
+            // will be produced. See below.
+
+            // isMultiDimClustering is always false for Gluten Clickhouse for now
+            if (file.size + currentBinSize > maxTargetFileSize /* && !isMultiDimClustering */ ) {
+              bins += currentBin.toVector
+              currentBin.clear()
+              currentBin += file
+              currentBinSize = file.size
+            } else {
+              currentBin += file
+              currentBinSize += file.size
+            }
+        }
+
+        if (currentBin.nonEmpty) {
+          bins += currentBin.toVector
+        }
+
+        bins
+          .map(b => (partition, b))
+          // select bins that have at least two files or in case of multi-dim clustering
+          // select all bins
+          .filter(_._2.size > 1 /* || isMultiDimClustering */ )
+    }
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala
new file mode 100644
index 000000000000..8c1062f4c7b6
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.clickhouse
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.delta._
+import org.apache.spark.sql.delta.catalog.ClickHouseTableV2
+import org.apache.spark.sql.delta.commands.WriteIntoDelta
+import org.apache.spark.sql.delta.commands.cdc.CDCReader
+import org.apache.spark.sql.delta.sources.{DeltaDataSource, DeltaSourceUtils, DeltaSQLConf}
+import org.apache.spark.sql.sources.BaseRelation
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+import org.apache.hadoop.fs.Path
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+/** A DataSource V1 for integrating Delta into Spark SQL batch and Streaming APIs. */
+class ClickHouseDataSource extends DeltaDataSource {
+
+  override def shortName(): String = {
+    ClickHouseConfig.NAME
+  }
+
+  override def getTable(
+      schema: StructType,
+      partitioning: Array[Transform],
+      properties: java.util.Map[String, String]): Table = {
+    val options = new CaseInsensitiveStringMap(properties)
+    val path = options.get("path")
+    if (path == null) throw DeltaErrors.pathNotSpecifiedException
+    new ClickHouseTableV2(
+      SparkSession.active,
+      new Path(path),
+      options = properties.asScala.toMap,
+      clickhouseExtensionOptions = ClickHouseConfig
+        .createMergeTreeConfigurations(
+          ClickHouseConfig
+            .getMergeTreeConfigurations(properties)
+            .asJava)
+    )
+  }
+
+  override def createRelation(
+      sqlContext: SQLContext,
+      mode: SaveMode,
+      parameters: Map[String, String],
+      data: DataFrame): BaseRelation = {
+    val path = parameters.getOrElse("path", throw DeltaErrors.pathNotSpecifiedException)
+    val partitionColumns = parameters
+      .get(DeltaSourceUtils.PARTITIONING_COLUMNS_KEY)
+      .map(DeltaDataSource.decodePartitioningColumns)
+      .getOrElse(Nil)
+
+    val deltaLog = DeltaLog.forTable(sqlContext.sparkSession, path, parameters)
+    // need to use the latest snapshot
+    val configs = if (deltaLog.update().version < 0) {
+      // when creating table, save the clickhouse config to the delta metadata
+      val clickHouseTableV2 = ClickHouseTableV2.getTable(deltaLog)
+      clickHouseTableV2.properties().asScala.toMap ++ DeltaConfigs
+        .validateConfigurations(parameters.filterKeys(_.startsWith("delta.")).toMap)
+    } else {
+      DeltaConfigs.validateConfigurations(parameters.filterKeys(_.startsWith("delta.")).toMap)
+    }
+    WriteIntoDelta(
+      deltaLog = deltaLog,
+      mode = mode,
+      new DeltaOptions(parameters, sqlContext.sparkSession.sessionState.conf),
+      partitionColumns = partitionColumns,
+      configuration = configs,
+      data = data
+    ).run(sqlContext.sparkSession)
+
+    deltaLog.createRelation()
+  }
+
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    recordFrameProfile("Delta", "DeltaDataSource.createRelation") {
+      val maybePath = parameters.getOrElse("path", throw DeltaErrors.pathNotSpecifiedException)
+
+      // Log any invalid options that are being passed in
+      DeltaOptions.verifyOptions(CaseInsensitiveMap(parameters))
+
+      val timeTravelByParams = DeltaDataSource.getTimeTravelVersion(parameters)
+      var cdcOptions: mutable.Map[String, String] = mutable.Map.empty
+      val caseInsensitiveParams = new CaseInsensitiveStringMap(parameters.asJava)
+      if (CDCReader.isCDCRead(caseInsensitiveParams)) {
+        cdcOptions = mutable.Map[String, String](DeltaDataSource.CDC_ENABLED_KEY -> "true")
+        if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_START_VERSION_KEY)) {
+          cdcOptions(DeltaDataSource.CDC_START_VERSION_KEY) =
+            caseInsensitiveParams.get(DeltaDataSource.CDC_START_VERSION_KEY)
+        }
+        if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_START_TIMESTAMP_KEY)) {
+          cdcOptions(DeltaDataSource.CDC_START_TIMESTAMP_KEY) =
+            caseInsensitiveParams.get(DeltaDataSource.CDC_START_TIMESTAMP_KEY)
+        }
+        if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_END_VERSION_KEY)) {
+          cdcOptions(DeltaDataSource.CDC_END_VERSION_KEY) =
+            caseInsensitiveParams.get(DeltaDataSource.CDC_END_VERSION_KEY)
+        }
+        if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_END_TIMESTAMP_KEY)) {
+          cdcOptions(DeltaDataSource.CDC_END_TIMESTAMP_KEY) =
+            caseInsensitiveParams.get(DeltaDataSource.CDC_END_TIMESTAMP_KEY)
+        }
+      }
+      val dfOptions: Map[String, String] =
+        if (
+          sqlContext.sparkSession.sessionState.conf.getConf(
+            DeltaSQLConf.LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS)
+        ) {
+          parameters
+        } else {
+          Map.empty
+        }
+      (new ClickHouseTableV2(
+        sqlContext.sparkSession,
+        new Path(maybePath),
+        timeTravelOpt = timeTravelByParams,
+        options = dfOptions,
+        cdcOptions = new CaseInsensitiveStringMap(cdcOptions.asJava)
+      )).toBaseRelation
+    }
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala
new file mode 100644
index 000000000000..61e1da44d0af
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-23/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala
@@ -0,0 +1,662 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.clickhouse
+
+import org.apache.gluten.sql.shims.SparkShimLoader
+
+import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException}
+import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.TableCapability.V1_BATCH_WRITE
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, V1Write, WriteBuilder}
+import org.apache.spark.sql.delta.{DeltaConfigs, DeltaErrors, DeltaLog, DeltaOptions, DeltaTableUtils}
+import org.apache.spark.sql.delta.DeltaTableIdentifier.gluePermissionError
+import org.apache.spark.sql.delta.catalog.{ClickHouseTableV2, DeltaTableV2, TempClickHouseTableV2}
+import org.apache.spark.sql.delta.commands.{CreateDeltaTableCommand, TableCreationModes, WriteIntoDelta}
+import org.apache.spark.sql.delta.metering.DeltaLogging
+import org.apache.spark.sql.delta.sources.{DeltaSourceUtils, DeltaSQLConf}
+import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.utils.CHDataSourceUtils
+import org.apache.spark.sql.sources.InsertableRelation
+import org.apache.spark.sql.types.StructType
+
+import org.apache.hadoop.fs.Path
+
+import java.util
+import java.util.Locale
+
+import scala.collection.JavaConverters._
+
+class ClickHouseSparkCatalog
+  extends DelegatingCatalogExtension
+  with StagingTableCatalog
+  with SupportsPathIdentifier
+  with DeltaLogging {
+
+  val spark = SparkSession.active
+
+  private def createCatalogTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]
+  ): Table = {
+    super.createTable(ident, schema, partitions, properties)
+  }
+
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    if (CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties))) {
+      createClickHouseTable(
+        ident,
+        schema,
+        partitions,
+        properties,
+        Map.empty,
+        sourceQuery = None,
+        TableCreationModes.Create)
+    } else if (DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))) {
+      createDeltaTable(
+        ident,
+        schema,
+        partitions,
+        properties,
+        Map.empty,
+        sourceQuery = None,
+        TableCreationModes.Create
+      )
+    } else {
+      createCatalogTable(ident, schema, partitions, properties)
+    }
+  }
+
+  /**
+   * Creates a ClickHouse table
+   *
+   * @param ident
+   *   The identifier of the table
+   * @param schema
+   *   The schema of the table
+   * @param partitions
+   *   The partition transforms for the table
+   * @param allTableProperties
+   *   The table properties that configure the behavior of the table or provide information about
+   *   the table
+   * @param writeOptions
+   *   Options specific to the write during table creation or replacement
+   * @param sourceQuery
+   *   A query if this CREATE request came from a CTAS or RTAS
+   * @param operation
+   *   The specific table creation mode, whether this is a Create/Replace/Create or Replace
+   */
+  private def createClickHouseTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      allTableProperties: util.Map[String, String],
+      writeOptions: Map[String, String],
+      sourceQuery: Option[DataFrame],
+      operation: TableCreationModes.CreationMode): Table = {
+    val (partitionColumns, maybeBucketSpec) =
+      SparkShimLoader.getSparkShims.convertPartitionTransforms(partitions)
+    var newSchema = schema
+    var newPartitionColumns = partitionColumns
+    var newBucketSpec = maybeBucketSpec
+
+    // Delta does not support bucket feature, so save the bucket infos into properties if exists.
+    val tableProperties =
+      ClickHouseConfig.createMergeTreeConfigurations(allTableProperties, newBucketSpec)
+
+    val isByPath = isPathIdentifier(ident)
+    val location = if (isByPath) {
+      Option(ident.name())
+    } else {
+      Option(allTableProperties.get("location"))
+    }
+    val locUriOpt = location.map(CatalogUtils.stringToURI)
+    val storage = DataSource
+      .buildStorageFormatFromOptions(writeOptions)
+      .copy(locationUri = locUriOpt)
+    val tableType =
+      if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
+    val id = {
+      TableIdentifier(ident.name(), ident.namespace().lastOption)
+    }
+    val existingTableOpt = getExistingTableIfExists(id)
+    val loc = new Path(locUriOpt.getOrElse(spark.sessionState.catalog.defaultTablePath(id)))
+    val commentOpt = Option(allTableProperties.get("comment"))
+
+    val tableDesc = new CatalogTable(
+      identifier = id,
+      tableType = tableType,
+      storage = storage,
+      schema = newSchema,
+      provider = Some(ClickHouseConfig.ALT_NAME),
+      partitionColumnNames = newPartitionColumns,
+      bucketSpec = newBucketSpec,
+      properties = tableProperties,
+      comment = commentOpt
+    )
+
+    val withDb = verifyTableAndSolidify(tableDesc, None, true)
+
+    val writer = sourceQuery.map {
+      df =>
+        WriteIntoDelta(
+          DeltaLog.forTable(spark, loc),
+          operation.mode,
+          new DeltaOptions(withDb.storage.properties, spark.sessionState.conf),
+          withDb.partitionColumnNames,
+          withDb.properties ++ commentOpt.map("comment" -> _),
+          df,
+          schemaInCatalog = if (newSchema != schema) Some(newSchema) else None
+        )
+    }
+    try {
+      ClickHouseTableV2.temporalThreadLocalCHTable.set(
+        new TempClickHouseTableV2(spark, Some(withDb)))
+
+      CreateDeltaTableCommand(
+        withDb,
+        existingTableOpt,
+        operation.mode,
+        writer,
+        operation = operation,
+        tableByPath = isByPath).run(spark)
+    } finally {
+      ClickHouseTableV2.temporalThreadLocalCHTable.remove()
+    }
+
+    logInfo(s"create table ${ident.toString} successfully.")
+    loadTable(ident)
+  }
+
+  /**
+   * Creates a Delta table
+   *
+   * @param ident
+   *   The identifier of the table
+   * @param schema
+   *   The schema of the table
+   * @param partitions
+   *   The partition transforms for the table
+   * @param allTableProperties
+   *   The table properties that configure the behavior of the table or provide information about
+   *   the table
+   * @param writeOptions
+   *   Options specific to the write during table creation or replacement
+   * @param sourceQuery
+   *   A query if this CREATE request came from a CTAS or RTAS
+   * @param operation
+   *   The specific table creation mode, whether this is a Create/Replace/Create or Replace
+   */
+  private def createDeltaTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      allTableProperties: util.Map[String, String],
+      writeOptions: Map[String, String],
+      sourceQuery: Option[DataFrame],
+      operation: TableCreationModes.CreationMode
+  ): Table = {
+    // These two keys are tableProperties in data source v2 but not in v1, so we have to filter
+    // them out. Otherwise property consistency checks will fail.
+    val tableProperties = allTableProperties.asScala.filterKeys {
+      case TableCatalog.PROP_LOCATION => false
+      case TableCatalog.PROP_PROVIDER => false
+      case TableCatalog.PROP_COMMENT => false
+      case TableCatalog.PROP_OWNER => false
+      case TableCatalog.PROP_EXTERNAL => false
+      case "path" => false
+      case _ => true
+    }.toMap
+    val (partitionColumns, maybeBucketSpec) =
+      SparkShimLoader.getSparkShims.convertPartitionTransforms(partitions)
+    var newSchema = schema
+    var newPartitionColumns = partitionColumns
+    var newBucketSpec = maybeBucketSpec
+    val conf = spark.sessionState.conf
+
+    val isByPath = isPathIdentifier(ident)
+    if (
+      isByPath && !conf.getConf(DeltaSQLConf.DELTA_LEGACY_ALLOW_AMBIGUOUS_PATHS)
+      && allTableProperties.containsKey("location")
+      // The location property can be qualified and different from the path in the identifier, so
+      // we check `endsWith` here.
+      && Option(allTableProperties.get("location")).exists(!_.endsWith(ident.name()))
+    ) {
+      throw DeltaErrors.ambiguousPathsInCreateTableException(
+        ident.name(),
+        allTableProperties.get("location"))
+    }
+    val location = if (isByPath) {
+      Option(ident.name())
+    } else {
+      Option(allTableProperties.get("location"))
+    }
+    val id = {
+      TableIdentifier(ident.name(), ident.namespace().lastOption)
+    }
+    var locUriOpt = location.map(CatalogUtils.stringToURI)
+    val existingTableOpt = getExistingTableIfExists(id)
+    val loc = locUriOpt
+      .orElse(existingTableOpt.flatMap(_.storage.locationUri))
+      .getOrElse(spark.sessionState.catalog.defaultTablePath(id))
+    val storage = DataSource
+      .buildStorageFormatFromOptions(writeOptions)
+      .copy(locationUri = Option(loc))
+    val tableType =
+      if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
+    val commentOpt = Option(allTableProperties.get("comment"))
+
+    var tableDesc = new CatalogTable(
+      identifier = id,
+      tableType = tableType,
+      storage = storage,
+      schema = newSchema,
+      provider = Some(DeltaSourceUtils.ALT_NAME),
+      partitionColumnNames = newPartitionColumns,
+      bucketSpec = newBucketSpec,
+      properties = tableProperties,
+      comment = commentOpt
+    )
+
+    val withDb = verifyTableAndSolidify(tableDesc, None)
+
+    val writer = sourceQuery.map {
+      df =>
+        WriteIntoDelta(
+          DeltaLog.forTable(spark, new Path(loc)),
+          operation.mode,
+          new DeltaOptions(withDb.storage.properties, spark.sessionState.conf),
+          withDb.partitionColumnNames,
+          withDb.properties ++ commentOpt.map("comment" -> _),
+          df,
+          schemaInCatalog = if (newSchema != schema) Some(newSchema) else None
+        )
+    }
+
+    CreateDeltaTableCommand(
+      withDb,
+      existingTableOpt,
+      operation.mode,
+      writer,
+      operation,
+      tableByPath = isByPath).run(spark)
+
+    loadTable(ident)
+  }
+
+  /** Performs checks on the parameters provided for table creation for a ClickHouse table. */
+  private def verifyTableAndSolidify(
+      tableDesc: CatalogTable,
+      query: Option[LogicalPlan],
+      isMergeTree: Boolean = false): CatalogTable = {
+
+    if (!isMergeTree && tableDesc.bucketSpec.isDefined) {
+      throw DeltaErrors.operationNotSupportedException("Bucketing", tableDesc.identifier)
+    }
+
+    val schema = query
+      .map {
+        plan =>
+          assert(tableDesc.schema.isEmpty, "Can't specify table schema in CTAS.")
+          plan.schema.asNullable
+      }
+      .getOrElse(tableDesc.schema)
+
+    PartitioningUtils.validatePartitionColumn(
+      schema,
+      tableDesc.partitionColumnNames,
+      caseSensitive = false
+    ) // Delta is case insensitive
+
+    val validatedConfigurations = if (isMergeTree) {
+      tableDesc.properties
+    } else {
+      DeltaConfigs.validateConfigurations(tableDesc.properties)
+    }
+
+    val db = tableDesc.identifier.database.getOrElse(catalog.getCurrentDatabase)
+    val tableIdentWithDB = tableDesc.identifier.copy(database = Some(db))
+    tableDesc.copy(
+      identifier = tableIdentWithDB,
+      schema = schema,
+      properties = validatedConfigurations)
+  }
+
+  /** Checks if a table already exists for the provided identifier. */
+  def getExistingTableIfExists(table: TableIdentifier): Option[CatalogTable] = {
+    // If this is a path identifier, we cannot return an existing CatalogTable. The Create command
+    // will check the file system itself
+    if (isPathIdentifier(table)) return None
+    val tableExists = catalog.tableExists(table)
+    if (tableExists) {
+      val oldTable = catalog.getTableMetadata(table)
+      if (oldTable.tableType == CatalogTableType.VIEW) {
+        throw new AnalysisException(s"$table is a view. You may not write data into a view.")
+      }
+      if (
+        !DeltaSourceUtils.isDeltaTable(oldTable.provider) &&
+        !CHDataSourceUtils.isClickHouseTable(oldTable.provider)
+      ) {
+        throw DeltaErrors.notADeltaTable(table.table)
+      }
+      Some(oldTable)
+    } else {
+      None
+    }
+  }
+
+  private def getProvider(properties: util.Map[String, String]): String = {
+    Option(properties.get("provider")).getOrElse(ClickHouseConfig.NAME)
+  }
+
+  override def loadTable(ident: Identifier): Table = {
+    try {
+      super.loadTable(ident) match {
+        case v1: V1Table if CHDataSourceUtils.isClickHouseTable(v1.catalogTable) =>
+          new ClickHouseTableV2(
+            spark,
+            new Path(v1.catalogTable.location),
+            catalogTable = Some(v1.catalogTable),
+            tableIdentifier = Some(ident.toString))
+        case v1: V1Table if DeltaTableUtils.isDeltaTable(v1.catalogTable) =>
+          DeltaTableV2(
+            spark,
+            new Path(v1.catalogTable.location),
+            catalogTable = Some(v1.catalogTable),
+            tableIdentifier = Some(ident.toString))
+        case o =>
+          o
+      }
+    } catch {
+      case _: NoSuchDatabaseException | _: NoSuchNamespaceException | _: NoSuchTableException
+          if isPathIdentifier(ident) =>
+        newDeltaPathTable(ident)
+      case e: AnalysisException if gluePermissionError(e) && isPathIdentifier(ident) =>
+        logWarning(
+          "Received an access denied error from Glue. Assuming this " +
+            s"identifier ($ident) is path based.",
+          e)
+        newDeltaPathTable(ident)
+    }
+  }
+
+  private def newDeltaPathTable(ident: Identifier): DeltaTableV2 = {
+    if (hasClickHouseNamespace(ident)) {
+      new ClickHouseTableV2(spark, new Path(ident.name()))
+    } else {
+      DeltaTableV2(spark, new Path(ident.name()))
+    }
+  }
+
+  /** support to delete mergetree data from the external table */
+  override def purgeTable(ident: Identifier): Boolean = {
+    try {
+      loadTable(ident) match {
+        case t: ClickHouseTableV2 =>
+          val tableType = t.properties().getOrDefault("Type", "")
+          // file-based or external table
+          val isExternal = tableType.isEmpty || tableType.equalsIgnoreCase("external")
+          val tablePath = t.rootPath
+          // first delete the table metadata
+          val deletedTable = super.dropTable(ident)
+          if (deletedTable && isExternal) {
+            val fs = tablePath.getFileSystem(spark.sessionState.newHadoopConf())
+            // delete all data if there is a external table
+            fs.delete(tablePath, true)
+          }
+          true
+        case _ => super.purgeTable(ident)
+      }
+    } catch {
+      case _: Exception =>
+        false
+    }
+  }
+
+  override def stageReplace(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    recordFrameProfile("DeltaCatalog", "stageReplace") {
+      if (
+        CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties)) ||
+        DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))
+      ) {
+        new StagedDeltaTableV2(ident, schema, partitions, properties, TableCreationModes.Replace)
+      } else {
+        super.dropTable(ident)
+        val table = createCatalogTable(ident, schema, partitions, properties)
+        BestEffortStagedTable(ident, table, this)
+      }
+    }
+
+  override def stageCreateOrReplace(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    recordFrameProfile("DeltaCatalog", "stageCreateOrReplace") {
+      if (
+        CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties)) ||
+        DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))
+      ) {
+        new StagedDeltaTableV2(
+          ident,
+          schema,
+          partitions,
+          properties,
+          TableCreationModes.CreateOrReplace)
+      } else {
+        try super.dropTable(ident)
+        catch {
+          case _: NoSuchDatabaseException => // this is fine
+          case _: NoSuchTableException => // this is fine
+        }
+        val table = createCatalogTable(ident, schema, partitions, properties)
+        BestEffortStagedTable(ident, table, this)
+      }
+    }
+
+  override def stageCreate(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    recordFrameProfile("DeltaCatalog", "stageCreate") {
+      if (
+        CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties)) ||
+        DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))
+      ) {
+        new StagedDeltaTableV2(ident, schema, partitions, properties, TableCreationModes.Create)
+      } else {
+        val table = createCatalogTable(ident, schema, partitions, properties)
+        BestEffortStagedTable(ident, table, this)
+      }
+    }
+
+  /**
+   * A staged delta table, which creates a HiveMetaStore entry and appends data if this was a
+   * CTAS/RTAS command. We have a ugly way of using this API right now, but it's the best way to
+   * maintain old behavior compatibility between Databricks Runtime and OSS Delta Lake.
+   */
+  private class StagedDeltaTableV2(
+      ident: Identifier,
+      override val schema: StructType,
+      val partitions: Array[Transform],
+      override val properties: util.Map[String, String],
+      operation: TableCreationModes.CreationMode)
+    extends StagedTable
+    with SupportsWrite {
+
+    private var asSelectQuery: Option[DataFrame] = None
+    private var writeOptions: Map[String, String] = Map.empty
+
+    override def commitStagedChanges(): Unit =
+      recordFrameProfile("DeltaCatalog", "commitStagedChanges") {
+        val conf = spark.sessionState.conf
+        val props = new util.HashMap[String, String]()
+        // Options passed in through the SQL API will show up both with an "option." prefix and
+        // without in Spark 3.1, so we need to remove those from the properties
+        val optionsThroughProperties = properties.asScala.collect {
+          case (k, _) if k.startsWith("option.") => k.stripPrefix("option.")
+        }.toSet
+        val sqlWriteOptions = new util.HashMap[String, String]()
+        properties.asScala.foreach {
+          case (k, v) =>
+            if (!k.startsWith("option.") && !optionsThroughProperties.contains(k)) {
+              // Do not add to properties
+              props.put(k, v)
+            } else if (optionsThroughProperties.contains(k)) {
+              sqlWriteOptions.put(k, v)
+            }
+        }
+        if (writeOptions.isEmpty && !sqlWriteOptions.isEmpty) {
+          writeOptions = sqlWriteOptions.asScala.toMap
+        }
+        if (conf.getConf(DeltaSQLConf.DELTA_LEGACY_STORE_WRITER_OPTIONS_AS_PROPS)) {
+          // Legacy behavior
+          writeOptions.foreach { case (k, v) => props.put(k, v) }
+        } else {
+          writeOptions.foreach {
+            case (k, v) =>
+              // Continue putting in Delta prefixed options to avoid breaking workloads
+              if (k.toLowerCase(Locale.ROOT).startsWith("delta.")) {
+                props.put(k, v)
+              }
+          }
+        }
+        if (CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties))) {
+          createClickHouseTable(
+            ident,
+            schema,
+            partitions,
+            props,
+            writeOptions,
+            asSelectQuery,
+            operation)
+        } else {
+          createDeltaTable(ident, schema, partitions, props, writeOptions, asSelectQuery, operation)
+        }
+      }
+
+    override def name(): String = ident.name()
+
+    override def abortStagedChanges(): Unit = {}
+
+    override def capabilities(): util.Set[TableCapability] = Set(V1_BATCH_WRITE).asJava
+
+    override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+      writeOptions = info.options.asCaseSensitiveMap().asScala.toMap
+      new DeltaV1WriteBuilder
+    }
+
+    /*
+     * WriteBuilder for creating a Delta table.
+     */
+    private class DeltaV1WriteBuilder extends WriteBuilder {
+      override def build(): V1Write = new V1Write {
+        override def toInsertableRelation(): InsertableRelation = {
+          new InsertableRelation {
+            override def insert(data: DataFrame, overwrite: Boolean): Unit = {
+              asSelectQuery = Option(data)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  private case class BestEffortStagedTable(ident: Identifier, table: Table, catalog: TableCatalog)
+    extends StagedTable
+    with SupportsWrite {
+    override def abortStagedChanges(): Unit = catalog.dropTable(ident)
+
+    override def commitStagedChanges(): Unit = {}
+
+    // Pass through
+    override def name(): String = table.name()
+    override def schema(): StructType = table.schema()
+    override def partitioning(): Array[Transform] = table.partitioning()
+    override def capabilities(): util.Set[TableCapability] = table.capabilities()
+    override def properties(): util.Map[String, String] = table.properties()
+
+    override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = table match {
+      case supportsWrite: SupportsWrite => supportsWrite.newWriteBuilder(info)
+      case _ => throw DeltaErrors.unsupportedWriteStagedTable(name)
+    }
+  }
+}
+
+/**
+ * A trait for handling table access through clickhouse.`/some/path`. This is a stop-gap solution
+ * until PathIdentifiers are implemented in Apache Spark.
+ */
+trait SupportsPathIdentifier extends TableCatalog {
+  self: ClickHouseSparkCatalog =>
+
+  protected lazy val catalog: SessionCatalog = spark.sessionState.catalog
+
+  override def tableExists(ident: Identifier): Boolean = {
+    if (isPathIdentifier(ident)) {
+      val path = new Path(ident.name())
+      val fs = path.getFileSystem(spark.sessionState.newHadoopConf())
+      fs.exists(path) && fs.listStatus(path).nonEmpty
+    } else {
+      super.tableExists(ident)
+    }
+  }
+
+  protected def isPathIdentifier(ident: Identifier): Boolean = {
+    // Should be a simple check of a special PathIdentifier class in the future
+    try {
+      supportSQLOnFile && (hasClickHouseNamespace(ident) || hasDeltaNamespace(ident)) &&
+      new Path(ident.name()).isAbsolute
+    } catch {
+      case _: IllegalArgumentException => false
+    }
+  }
+
+  protected def isPathIdentifier(table: CatalogTable): Boolean = {
+    isPathIdentifier(table.identifier)
+  }
+
+  protected def isPathIdentifier(tableIdentifier: TableIdentifier): Boolean = {
+    isPathIdentifier(Identifier.of(tableIdentifier.database.toArray, tableIdentifier.table))
+  }
+
+  private def supportSQLOnFile: Boolean = spark.sessionState.conf.runSQLonFile
+
+  protected def hasClickHouseNamespace(ident: Identifier): Boolean = {
+    ident.namespace().length == 1 &&
+    CHDataSourceUtils.isClickHouseDataSourceName(ident.namespace().head)
+  }
+
+  protected def hasDeltaNamespace(ident: Identifier): Boolean = {
+    ident.namespace().length == 1 && DeltaSourceUtils.isDeltaDataSourceName(ident.namespace().head)
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/io/delta/tables/ClickhouseTable.scala b/backends-clickhouse/src/main/delta-32/io/delta/tables/ClickhouseTable.scala
new file mode 100644
index 000000000000..790b4c1f8a37
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/io/delta/tables/ClickhouseTable.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.delta.tables
+
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+import org.apache.spark.sql.delta.{DeltaErrors, DeltaTableIdentifier, DeltaTableUtils}
+import org.apache.spark.sql.delta.catalog.ClickHouseTableV2
+
+import org.apache.hadoop.fs.Path
+
+import scala.collection.JavaConverters._
+
+class ClickhouseTable(
+    @transient private val _df: Dataset[Row],
+    @transient private val table: ClickHouseTableV2)
+  extends DeltaTable(_df, table) {
+
+  override def optimize(): DeltaOptimizeBuilder = {
+    DeltaOptimizeBuilder(table)
+  }
+}
+
+object ClickhouseTable {
+
+  /**
+   * Instantiate a [[DeltaTable]] object representing the data at the given path, If the given path
+   * is invalid (i.e. either no table exists or an existing table is not a Delta table), it throws a
+   * `not a Delta table` error.
+   *
+   * Note: This uses the active SparkSession in the current thread to read the table data. Hence,
+   * this throws error if active SparkSession has not been set, that is,
+   * `SparkSession.getActiveSession()` is empty.
+   *
+   * @since 0.3.0
+   */
+  def forPath(path: String): DeltaTable = {
+    val sparkSession = SparkSession.getActiveSession.getOrElse {
+      throw DeltaErrors.activeSparkSessionNotFound()
+    }
+    forPath(sparkSession, path)
+  }
+
+  /**
+   * Instantiate a [[DeltaTable]] object representing the data at the given path, If the given path
+   * is invalid (i.e. either no table exists or an existing table is not a Delta table), it throws a
+   * `not a Delta table` error.
+   *
+   * @since 0.3.0
+   */
+  def forPath(sparkSession: SparkSession, path: String): DeltaTable = {
+    forPath(sparkSession, path, Map.empty[String, String])
+  }
+
+  /**
+   * Instantiate a [[DeltaTable]] object representing the data at the given path, If the given path
+   * is invalid (i.e. either no table exists or an existing table is not a Delta table), it throws a
+   * `not a Delta table` error.
+   *
+   * @param hadoopConf
+   *   Hadoop configuration starting with "fs." or "dfs." will be picked up by `DeltaTable` to
+   *   access the file system when executing queries. Other configurations will not be allowed.
+   *
+   * {{{
+   *   val hadoopConf = Map(
+   *     "fs.s3a.access.key" -> "<access-key>",
+   *     "fs.s3a.secret.key" -> "<secret-key>"
+   *   )
+   *   DeltaTable.forPath(spark, "/path/to/table", hadoopConf)
+   * }}}
+   * @since 2.2.0
+   */
+  def forPath(
+      sparkSession: SparkSession,
+      path: String,
+      hadoopConf: scala.collection.Map[String, String]): DeltaTable = {
+    // We only pass hadoopConf so that we won't pass any unsafe options to Delta.
+    val badOptions = hadoopConf.filterKeys {
+      k => !DeltaTableUtils.validDeltaTableHadoopPrefixes.exists(k.startsWith)
+    }.toMap
+    if (!badOptions.isEmpty) {
+      throw DeltaErrors.unsupportedDeltaTableForPathHadoopConf(badOptions)
+    }
+    val fileSystemOptions: Map[String, String] = hadoopConf.toMap
+    val hdpPath = new Path(path)
+    if (DeltaTableUtils.isDeltaTable(sparkSession, hdpPath, fileSystemOptions)) {
+      new ClickhouseTable(
+        sparkSession.read.format("clickhouse").options(fileSystemOptions).load(path),
+        new ClickHouseTableV2(spark = sparkSession, path = hdpPath, options = fileSystemOptions)
+      )
+    } else {
+      throw DeltaErrors.notADeltaTableException(DeltaTableIdentifier(path = Some(path)))
+    }
+  }
+
+  /**
+   * Java friendly API to instantiate a [[DeltaTable]] object representing the data at the given
+   * path, If the given path is invalid (i.e. either no table exists or an existing table is not a
+   * Delta table), it throws a `not a Delta table` error.
+   *
+   * @param hadoopConf
+   *   Hadoop configuration starting with "fs." or "dfs." will be picked up by `DeltaTable` to
+   *   access the file system when executing queries. Other configurations will be ignored.
+   *
+   * {{{
+   *   val hadoopConf = Map(
+   *     "fs.s3a.access.key" -> "<access-key>",
+   *     "fs.s3a.secret.key", "<secret-key>"
+   *   )
+   *   DeltaTable.forPath(spark, "/path/to/table", hadoopConf)
+   * }}}
+   * @since 2.2.0
+   */
+  def forPath(
+      sparkSession: SparkSession,
+      path: String,
+      hadoopConf: java.util.Map[String, String]): DeltaTable = {
+    val fsOptions = hadoopConf.asScala.toMap
+    forPath(sparkSession, path, fsOptions)
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
new file mode 100644
index 000000000000..d8ab2c1d078c
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta
+
+import org.apache.gluten.backendsapi.clickhouse.CHBackendSettings
+import org.apache.gluten.execution.ColumnarToRowExecBase
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.delta.actions._
+import org.apache.spark.sql.delta.catalog.ClickHouseTableV2
+import org.apache.spark.sql.delta.constraints.{Constraint, Constraints}
+import org.apache.spark.sql.delta.files.MergeTreeCommitProtocol
+import org.apache.spark.sql.delta.schema.InvariantViolationException
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
+import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FakeRowAdaptor, FileFormatWriter, WriteJobStatsTracker}
+import org.apache.spark.sql.execution.datasources.v1.clickhouse.MergeTreeFileFormatWriter
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig
+import org.apache.spark.util.SerializableConfiguration
+
+import org.apache.commons.lang3.exception.ExceptionUtils
+
+import scala.collection.mutable.ListBuffer
+
+object ClickhouseOptimisticTransaction {}
+
+class ClickhouseOptimisticTransaction(
+    override val deltaLog: DeltaLog,
+    override val catalogTable: Option[CatalogTable],
+    override val snapshot: Snapshot)
+  extends OptimisticTransaction(deltaLog, catalogTable, snapshot) {
+
+  def this(
+      deltaLog: DeltaLog,
+      catalogTable: Option[CatalogTable],
+      snapshotOpt: Option[Snapshot] = None) {
+    this(
+      deltaLog,
+      catalogTable,
+      snapshotOpt.getOrElse(deltaLog.update())
+    )
+  }
+
+  def insertFakeRowAdaptor(queryPlan: SparkPlan): SparkPlan = queryPlan match {
+    // if the child is columnar, we can just wrap&transfer the columnar data
+    case c2r: ColumnarToRowExecBase =>
+      FakeRowAdaptor(c2r.child)
+    // If the child is aqe, we make aqe "support columnar",
+    // then aqe itself will guarantee to generate columnar outputs.
+    // So FakeRowAdaptor will always consumes columnar data,
+    // thus avoiding the case of c2r->aqe->r2c->writer
+    case aqe: AdaptiveSparkPlanExec =>
+      FakeRowAdaptor(
+        AdaptiveSparkPlanExec(
+          aqe.inputPlan,
+          aqe.context,
+          aqe.preprocessingRules,
+          aqe.isSubquery,
+          supportsColumnar = true
+        ))
+    case other => FakeRowAdaptor(other)
+  }
+
+  override def writeFiles(
+      inputData: Dataset[_],
+      writeOptions: Option[DeltaOptions],
+      additionalConstraints: Seq[Constraint]): Seq[FileAction] = {
+    if (ClickHouseConfig.isMergeTreeFormatEngine(metadata.configuration)) {
+      hasWritten = true
+
+      val spark = inputData.sparkSession
+      val (data, partitionSchema) = performCDCPartition(inputData)
+      val outputPath = deltaLog.dataPath
+
+      val (queryExecution, output, generatedColumnConstraints, _) =
+        normalizeData(deltaLog, writeOptions, data)
+      val partitioningColumns = getPartitioningColumns(partitionSchema, output)
+
+      val committer =
+        new MergeTreeCommitProtocol("delta-mergetree", outputPath.toString, None, None)
+
+      // val (optionalStatsTracker, _) =
+      //   getOptionalStatsTrackerAndStatsCollection(output, outputPath, partitionSchema, data)
+      val (optionalStatsTracker, _) = (None, None)
+
+      val constraints =
+        Constraints.getAll(metadata, spark) ++ generatedColumnConstraints ++ additionalConstraints
+
+      SQLExecution.withNewExecutionId(queryExecution, Option("deltaTransactionalWrite")) {
+        val outputSpec = FileFormatWriter.OutputSpec(outputPath.toString, Map.empty, output)
+
+        val queryPlan = queryExecution.executedPlan
+        val newQueryPlan = insertFakeRowAdaptor(queryPlan)
+
+        val statsTrackers: ListBuffer[WriteJobStatsTracker] = ListBuffer()
+
+        if (spark.conf.get(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED)) {
+          val basicWriteJobStatsTracker = new BasicWriteJobStatsTracker(
+            new SerializableConfiguration(deltaLog.newDeltaHadoopConf()),
+            BasicWriteJobStatsTracker.metrics)
+          //        registerSQLMetrics(spark, basicWriteJobStatsTracker.driverSideMetrics)
+          statsTrackers.append(basicWriteJobStatsTracker)
+        }
+
+        // Iceberg spec requires partition columns in data files
+        val writePartitionColumns = IcebergCompat.isAnyEnabled(metadata)
+        // Retain only a minimal selection of Spark writer options to avoid any potential
+        // compatibility issues
+        var options = (writeOptions match {
+          case None => Map.empty[String, String]
+          case Some(writeOptions) =>
+            writeOptions.options.filterKeys {
+              key =>
+                key.equalsIgnoreCase(DeltaOptions.MAX_RECORDS_PER_FILE) ||
+                key.equalsIgnoreCase(DeltaOptions.COMPRESSION)
+            }.toMap
+        }) + (DeltaOptions.WRITE_PARTITION_COLUMNS -> writePartitionColumns.toString)
+
+        spark.conf.getAll.foreach(
+          entry => {
+            if (
+              entry._1.startsWith(s"${CHBackendSettings.getBackendConfigPrefix}.runtime_settings")
+              || entry._1.equalsIgnoreCase(DeltaSQLConf.DELTA_OPTIMIZE_MIN_FILE_SIZE.key)
+            ) {
+              options += (entry._1 -> entry._2)
+            }
+          })
+
+        try {
+          val tableV2 = ClickHouseTableV2.getTable(deltaLog)
+          MergeTreeFileFormatWriter.write(
+            sparkSession = spark,
+            plan = newQueryPlan,
+            fileFormat = tableV2.getFileFormat(protocol, metadata),
+            // formats.
+            committer = committer,
+            outputSpec = outputSpec,
+            // scalastyle:off deltahadoopconfiguration
+            hadoopConf = spark.sessionState
+              .newHadoopConfWithOptions(metadata.configuration ++ deltaLog.options),
+            // scalastyle:on deltahadoopconfiguration
+            orderByKeyOption = tableV2.orderByKeyOption,
+            lowCardKeyOption = tableV2.lowCardKeyOption,
+            minmaxIndexKeyOption = tableV2.minmaxIndexKeyOption,
+            bfIndexKeyOption = tableV2.bfIndexKeyOption,
+            setIndexKeyOption = tableV2.setIndexKeyOption,
+            primaryKeyOption = tableV2.primaryKeyOption,
+            partitionColumns = partitioningColumns,
+            bucketSpec = tableV2.bucketOption,
+            statsTrackers = optionalStatsTracker.toSeq ++ statsTrackers,
+            options = options,
+            constraints = constraints
+          )
+        } catch {
+          case s: SparkException =>
+            // Pull an InvariantViolationException up to the top level if it was the root cause.
+            val violationException = ExceptionUtils.getRootCause(s)
+            if (violationException.isInstanceOf[InvariantViolationException]) {
+              throw violationException
+            } else {
+              throw s
+            }
+        }
+      }
+      committer.addedStatuses.toSeq ++ committer.changeFiles
+    } else {
+      // TODO: support native delta parquet write
+      // 1. insert FakeRowAdaptor
+      // 2. DeltaInvariantCheckerExec transform
+      // 3. DeltaTaskStatisticsTracker collect null count / min values / max values
+      // 4. set the parameters 'staticPartitionWriteOnly', 'isNativeAppliable',
+      //    'nativeFormat' in the LocalProperty of the sparkcontext
+      super.writeFiles(inputData, writeOptions, additionalConstraints)
+    }
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/DeltaAdapter.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/DeltaAdapter.scala
new file mode 100644
index 000000000000..8a9c5585e888
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/DeltaAdapter.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta
+
+object DeltaAdapter extends DeltaAdapterTrait {
+  override def snapshot(deltaLog: DeltaLog): Snapshot = deltaLog.unsafeVolatileSnapshot
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/DeltaLog.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/DeltaLog.scala
new file mode 100644
index 000000000000..dca14d7fb1fb
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/DeltaLog.scala
@@ -0,0 +1,1106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta
+
+// scalastyle:off import.ordering.noEmptyLine
+import java.io.IOException
+import java.lang.ref.WeakReference
+import java.net.URI
+import java.util.concurrent.TimeUnit
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+import scala.util.control.NonFatal
+
+import com.databricks.spark.util.TagDefinitions._
+import org.apache.spark.sql.delta.actions._
+import org.apache.spark.sql.delta.catalog.ClickHouseTableV2
+import org.apache.spark.sql.delta.commands.WriteIntoDelta
+import org.apache.spark.sql.delta.files.{TahoeBatchFileIndex, TahoeLogFileIndex}
+import org.apache.spark.sql.delta.managedcommit.ManagedCommitUtils
+import org.apache.spark.sql.delta.metering.DeltaLogging
+import org.apache.spark.sql.delta.schema.{SchemaMergingUtils, SchemaUtils}
+import org.apache.spark.sql.delta.sources._
+import org.apache.spark.sql.delta.storage.LogStoreProvider
+import org.apache.spark.sql.delta.util.FileNames
+import com.google.common.cache.{Cache, CacheBuilder, RemovalNotification}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.{FileSourceOptions, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable}
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Cast, Expression, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper
+import org.apache.spark.sql.catalyst.util.FailFastMode
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig
+import org.apache.spark.sql.expressions.UserDefinedFunction
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation}
+import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.util._
+
+/**
+ * Gluten overwrite Delta:
+ *
+ * This file is copied from Delta 3.2.0, it is modified to overcome the following issues:
+ *   1. return ClickhouseOptimisticTransaction
+ *   2. return DeltaMergeTreeFileFormat
+ *   3. create HadoopFsRelation with the bucket options
+ */
+
+/**
+ * Used to query the current state of the log as well as modify it by adding
+ * new atomic collections of actions.
+ *
+ * Internally, this class implements an optimistic concurrency control
+ * algorithm to handle multiple readers or writers. Any single read
+ * is guaranteed to see a consistent snapshot of the table.
+ *
+ * @param logPath Path of the Delta log JSONs.
+ * @param dataPath Path of the data files.
+ * @param options Filesystem options filtered from `allOptions`.
+ * @param allOptions All options provided by the user, for example via `df.write.option()`. This
+ *                   includes but not limited to filesystem and table properties.
+ * @param clock Clock to be used when starting a new transaction.
+ */
+class DeltaLog private(
+    val logPath: Path,
+    val dataPath: Path,
+    val options: Map[String, String],
+    val allOptions: Map[String, String],
+    val clock: Clock
+  ) extends Checkpoints
+  with MetadataCleanup
+  with LogStoreProvider
+  with SnapshotManagement
+  with DeltaFileFormat
+  with ProvidesUniFormConverters
+  with ReadChecksum {
+
+  import org.apache.spark.sql.delta.files.TahoeFileIndex
+
+  /**
+   * Path to sidecar directory.
+   * This is intentionally kept `lazy val` as otherwise any other constructor codepaths in DeltaLog
+   * (e.g. SnapshotManagement etc) will see it as null as they are executed before this line is
+   * called.
+   */
+  lazy val sidecarDirPath: Path = FileNames.sidecarDirPath(logPath)
+
+
+  protected def spark = SparkSession.active
+
+  checkRequiredConfigurations()
+
+  /**
+   * Keep a reference to `SparkContext` used to create `DeltaLog`. `DeltaLog` cannot be used when
+   * `SparkContext` is stopped. We keep the reference so that we can check whether the cache is
+   * still valid and drop invalid `DeltaLog`` objects.
+   */
+  private val sparkContext = new WeakReference(spark.sparkContext)
+
+  /**
+   * Returns the Hadoop [[Configuration]] object which can be used to access the file system. All
+   * Delta code should use this method to create the Hadoop [[Configuration]] object, so that the
+   * hadoop file system configurations specified in DataFrame options will come into effect.
+   */
+  // scalastyle:off deltahadoopconfiguration
+  final def newDeltaHadoopConf(): Configuration =
+    spark.sessionState.newHadoopConfWithOptions(options)
+  // scalastyle:on deltahadoopconfiguration
+
+  /** Used to read and write physical log files and checkpoints. */
+  lazy val store = createLogStore(spark)
+
+  /** Delta History Manager containing version and commit history. */
+  lazy val history = new DeltaHistoryManager(
+    this, spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_HISTORY_PAR_SEARCH_THRESHOLD))
+
+  /* --------------- *
+   |  Configuration  |
+   * --------------- */
+
+  /**
+   * The max lineage length of a Snapshot before Delta forces to build a Snapshot from scratch.
+   * Delta will build a Snapshot on top of the previous one if it doesn't see a checkpoint.
+   * However, there is a race condition that when two writers are writing at the same time,
+   * a writer may fail to pick up checkpoints written by another one, and the lineage will grow
+   * and finally cause StackOverflowError. Hence we have to force to build a Snapshot from scratch
+   * when the lineage length is too large to avoid hitting StackOverflowError.
+   */
+  def maxSnapshotLineageLength: Int =
+    spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_MAX_SNAPSHOT_LINEAGE_LENGTH)
+
+  /** The unique identifier for this table. */
+  def tableId: String = unsafeVolatileMetadata.id // safe because table id never changes
+
+  /**
+   * Combines the tableId with the path of the table to ensure uniqueness. Normally `tableId`
+   * should be globally unique, but nothing stops users from copying a Delta table directly to
+   * a separate location, where the transaction log is copied directly, causing the tableIds to
+   * match. When users mutate the copied table, and then try to perform some checks joining the
+   * two tables, optimizations that depend on `tableId` alone may not be correct. Hence we use a
+   * composite id.
+   */
+  private[delta] def compositeId: (String, Path) = tableId -> dataPath
+
+  /**
+   * Creates a [[LogicalRelation]] for a given [[DeltaLogFileIndex]], with all necessary file source
+   * options taken from the Delta Log. All reads of Delta metadata files should use this method.
+   */
+  def indexToRelation(
+      index: DeltaLogFileIndex,
+      schema: StructType = Action.logSchema): LogicalRelation = {
+    DeltaLog.indexToRelation(spark, index, options, schema)
+  }
+
+  /**
+   * Load the data using the FileIndex. This allows us to skip many checks that add overhead, e.g.
+   * file existence checks, partitioning schema inference.
+   */
+  def loadIndex(
+      index: DeltaLogFileIndex,
+      schema: StructType = Action.logSchema): DataFrame = {
+    Dataset.ofRows(spark, indexToRelation(index, schema))
+  }
+
+  /* ------------------ *
+   |  Delta Management  |
+   * ------------------ */
+
+  /**
+   * Returns a new [[OptimisticTransaction]] that can be used to read the current state of the log
+   * and then commit updates. The reads and updates will be checked for logical conflicts with any
+   * concurrent writes to the log, and post-commit hooks can be used to notify the table's catalog
+   * of schema changes, etc.
+   *
+   * Note that all reads in a transaction must go through the returned transaction object, and not
+   * directly to the [[DeltaLog]] otherwise they will not be checked for conflicts.
+   *
+   * @param catalogTableOpt The [[CatalogTable]] for the table this transaction updates. Passing
+   * None asserts this is a path-based table with no catalog entry.
+   *
+   * @param snapshotOpt THe [[Snapshot]] this transaction should use, if not latest.
+   */
+  def startTransaction(
+      catalogTableOpt: Option[CatalogTable],
+      snapshotOpt: Option[Snapshot] = None): OptimisticTransaction = {
+    TransactionExecutionObserver.threadObserver.get().startingTransaction {
+      // --- modified start
+      new ClickhouseOptimisticTransaction(this, catalogTableOpt, snapshotOpt)
+      // --- modified end
+    }
+  }
+
+  /** Legacy/compat overload that does not require catalog table information. Avoid prod use. */
+  // --- modified start
+  // @deprecated("Please use the CatalogTable overload instead", "3.0")
+  // --- modified end
+  def startTransaction(): OptimisticTransaction = {
+    startTransaction(catalogTableOpt = None, snapshotOpt = None)
+  }
+
+  /**
+   * Execute a piece of code within a new [[OptimisticTransaction]]. Reads/write sets will
+   * be recorded for this table, and all other tables will be read
+   * at a snapshot that is pinned on the first access.
+   *
+   * @param catalogTableOpt The [[CatalogTable]] for the table this transaction updates. Passing
+   * None asserts this is a path-based table with no catalog entry.
+   *
+   * @param snapshotOpt THe [[Snapshot]] this transaction should use, if not latest.
+   * @note This uses thread-local variable to make the active transaction visible. So do not use
+   *       multi-threaded code in the provided thunk.
+   */
+  def withNewTransaction[T](
+      catalogTableOpt: Option[CatalogTable],
+      snapshotOpt: Option[Snapshot] = None)(
+      thunk: OptimisticTransaction => T): T = {
+    try {
+      val txn = startTransaction(catalogTableOpt, snapshotOpt)
+      OptimisticTransaction.setActive(txn)
+      thunk(txn)
+    } finally {
+      OptimisticTransaction.clearActive()
+    }
+  }
+
+  /** Legacy/compat overload that does not require catalog table information. Avoid prod use. */
+  @deprecated("Please use the CatalogTable overload instead", "3.0")
+  def withNewTransaction[T](thunk: OptimisticTransaction => T): T = {
+    try {
+      val txn = startTransaction()
+      OptimisticTransaction.setActive(txn)
+      thunk(txn)
+    } finally {
+      OptimisticTransaction.clearActive()
+    }
+  }
+
+
+  /**
+   * Upgrade the table's protocol version, by default to the maximum recognized reader and writer
+   * versions in this Delta release. This method only upgrades protocol version, and will fail if
+   * the new protocol version is not a superset of the original one used by the snapshot.
+   */
+  def upgradeProtocol(
+      catalogTable: Option[CatalogTable],
+      snapshot: Snapshot,
+      newVersion: Protocol): Unit = {
+    val currentVersion = snapshot.protocol
+    if (newVersion == currentVersion) {
+      logConsole(s"Table $dataPath is already at protocol version $newVersion.")
+      return
+    }
+    if (!currentVersion.canUpgradeTo(newVersion)) {
+      throw new ProtocolDowngradeException(currentVersion, newVersion)
+    }
+
+    val txn = startTransaction(catalogTable, Some(snapshot))
+    try {
+      SchemaMergingUtils.checkColumnNameDuplication(txn.metadata.schema, "in the table schema")
+    } catch {
+      case e: AnalysisException =>
+        throw DeltaErrors.duplicateColumnsOnUpdateTable(e)
+    }
+    txn.commit(Seq(newVersion), DeltaOperations.UpgradeProtocol(newVersion))
+    logConsole(s"Upgraded table at $dataPath to $newVersion.")
+  }
+
+  /**
+   * Get all actions starting from "startVersion" (inclusive). If `startVersion` doesn't exist,
+   * return an empty Iterator.
+   * Callers are encouraged to use the other override which takes the endVersion if available to
+   * avoid I/O and improve performance of this method.
+   */
+  def getChanges(
+      startVersion: Long,
+      failOnDataLoss: Boolean = false): Iterator[(Long, Seq[Action])] = {
+    getChangeLogFiles(startVersion, failOnDataLoss).map { case (version, status) =>
+      (version, store.read(status, newDeltaHadoopConf()).map(Action.fromJson(_)))
+    }
+  }
+
+  private[sql] def getChanges(
+      startVersion: Long,
+      endVersion: Long,
+      failOnDataLoss: Boolean): Iterator[(Long, Seq[Action])] = {
+    getChangeLogFiles(startVersion, endVersion, failOnDataLoss).map { case (version, status) =>
+      (version, store.read(status, newDeltaHadoopConf()).map(Action.fromJson(_)))
+    }
+  }
+
+  private[sql] def getChangeLogFiles(
+      startVersion: Long,
+      endVersion: Long,
+      failOnDataLoss: Boolean): Iterator[(Long, FileStatus)] = {
+    implicit class IteratorWithStopAtHelper[T](underlying: Iterator[T]) {
+      // This method is used to stop the iterator when the condition is met.
+      def stopAt(stopAtFunc: (T) => Boolean): Iterator[T] = new Iterator[T] {
+        var shouldStop = false
+
+        override def hasNext: Boolean = !shouldStop && underlying.hasNext
+
+        override def next(): T = {
+          val v = underlying.next()
+          shouldStop = stopAtFunc(v)
+          v
+        }
+      }
+    }
+
+    getChangeLogFiles(startVersion, failOnDataLoss)
+      // takeWhile always looks at one extra item, which can trigger unnecessary work. Instead, we
+      // stop if we've seen the item we believe should be the last interesting item, without
+      // examining the one that follows.
+      .stopAt { case (version, _) => version >= endVersion }
+      // The last element in this iterator may not be <= endVersion, so we need to filter it out.
+      .takeWhile { case (version, _) => version <= endVersion }
+  }
+
+  /**
+   * Get access to all actions starting from "startVersion" (inclusive) via [[FileStatus]].
+   * If `startVersion` doesn't exist, return an empty Iterator.
+   * Callers are encouraged to use the other override which takes the endVersion if available to
+   * avoid I/O and improve performance of this method.
+   */
+  def getChangeLogFiles(
+      startVersion: Long,
+      failOnDataLoss: Boolean = false): Iterator[(Long, FileStatus)] = {
+    val deltasWithVersion = ManagedCommitUtils.commitFilesIterator(this, startVersion)
+    // Subtract 1 to ensure that we have the same check for the inclusive startVersion
+    var lastSeenVersion = startVersion - 1
+    deltasWithVersion.map { case (status, version) =>
+      if (failOnDataLoss && version > lastSeenVersion + 1) {
+        throw DeltaErrors.failOnDataLossException(lastSeenVersion + 1, version)
+      }
+      lastSeenVersion = version
+      (version, status)
+    }
+  }
+
+  /* --------------------- *
+   |  Protocol validation  |
+   * --------------------- */
+
+  /**
+   * Asserts the highest protocol supported by this client is not less than what required by the
+   * table for performing read or write operations. This ensures the client to support a
+   * greater-or-equal protocol versions and recognizes/supports all features enabled by the table.
+   *
+   * The operation type to be checked is passed as a string in `readOrWrite`. Valid values are
+   * `read` and `write`.
+   */
+  private def protocolCheck(tableProtocol: Protocol, readOrWrite: String): Unit = {
+    val clientSupportedProtocol = Action.supportedProtocolVersion()
+    // Depending on the operation, pull related protocol versions out of Protocol objects.
+    // `getEnabledFeatures` is a pointer to pull reader/writer features out of a Protocol.
+    val (clientSupportedVersions, tableRequiredVersion, getEnabledFeatures) = readOrWrite match {
+      case "read" => (
+        Action.supportedReaderVersionNumbers,
+        tableProtocol.minReaderVersion,
+        (f: Protocol) => f.readerFeatureNames)
+      case "write" => (
+        Action.supportedWriterVersionNumbers,
+        tableProtocol.minWriterVersion,
+        (f: Protocol) => f.writerFeatureNames)
+      case _ =>
+        throw new IllegalArgumentException("Table operation must be either `read` or `write`.")
+    }
+
+    // Check is complete when both the protocol version and all referenced features are supported.
+    val clientSupportedFeatureNames = getEnabledFeatures(clientSupportedProtocol)
+    val tableEnabledFeatureNames = getEnabledFeatures(tableProtocol)
+    if (tableEnabledFeatureNames.subsetOf(clientSupportedFeatureNames) &&
+      clientSupportedVersions.contains(tableRequiredVersion)) {
+      return
+    }
+
+    // Otherwise, either the protocol version, or few features referenced by the table, is
+    // unsupported.
+    val clientUnsupportedFeatureNames =
+      tableEnabledFeatureNames.diff(clientSupportedFeatureNames)
+    // Prepare event log constants and the appropriate error message handler.
+    val (opType, versionKey, unsupportedFeaturesException) = readOrWrite match {
+      case "read" => (
+          "delta.protocol.failure.read",
+          "minReaderVersion",
+          DeltaErrors.unsupportedReaderTableFeaturesInTableException _)
+      case "write" => (
+          "delta.protocol.failure.write",
+          "minWriterVersion",
+          DeltaErrors.unsupportedWriterTableFeaturesInTableException _)
+    }
+    recordDeltaEvent(
+      this,
+      opType,
+      data = Map(
+        "clientVersion" -> clientSupportedVersions.max,
+        versionKey -> tableRequiredVersion,
+        "clientFeatures" -> clientSupportedFeatureNames.mkString(","),
+        "clientUnsupportedFeatures" -> clientUnsupportedFeatureNames.mkString(",")))
+    if (!clientSupportedVersions.contains(tableRequiredVersion)) {
+      throw new InvalidProtocolVersionException(
+        dataPath.toString(),
+        tableProtocol.minReaderVersion,
+        tableProtocol.minWriterVersion,
+        Action.supportedReaderVersionNumbers.toSeq,
+        Action.supportedWriterVersionNumbers.toSeq)
+    } else {
+      throw unsupportedFeaturesException(dataPath.toString(), clientUnsupportedFeatureNames)
+    }
+  }
+
+  /**
+   * Asserts that the table's protocol enabled all features that are active in the metadata.
+   *
+   * A mismatch shouldn't happen when the table has gone through a proper write process because we
+   * require all active features during writes. However, other clients may void this guarantee.
+   */
+  def assertTableFeaturesMatchMetadata(
+      targetProtocol: Protocol,
+      targetMetadata: Metadata): Unit = {
+    if (!targetProtocol.supportsReaderFeatures && !targetProtocol.supportsWriterFeatures) return
+
+    val protocolEnabledFeatures = targetProtocol.writerFeatureNames
+      .flatMap(TableFeature.featureNameToFeature)
+    val activeFeatures =
+      Protocol.extractAutomaticallyEnabledFeatures(spark, targetMetadata, Some(targetProtocol))
+    val activeButNotEnabled = activeFeatures.diff(protocolEnabledFeatures)
+    if (activeButNotEnabled.nonEmpty) {
+      throw DeltaErrors.tableFeatureMismatchException(activeButNotEnabled.map(_.name))
+    }
+  }
+
+  /**
+   * Asserts that the client is up to date with the protocol and allowed to read the table that is
+   * using the given `protocol`.
+   */
+  def protocolRead(protocol: Protocol): Unit = {
+    protocolCheck(protocol, "read")
+  }
+
+  /**
+   * Asserts that the client is up to date with the protocol and allowed to write to the table
+   * that is using the given `protocol`.
+   */
+  def protocolWrite(protocol: Protocol): Unit = {
+    protocolCheck(protocol, "write")
+  }
+
+  /* ---------------------------------------- *
+   |  Log Directory Management and Retention  |
+   * ---------------------------------------- */
+
+  /**
+   * Whether a Delta table exists at this directory.
+   * It is okay to use the cached volatile snapshot here, since the worst case is that the table
+   * has recently started existing which hasn't been picked up here. If so, any subsequent command
+   * that updates the table will see the right value.
+   */
+  def tableExists: Boolean = unsafeVolatileSnapshot.version >= 0
+
+  def isSameLogAs(otherLog: DeltaLog): Boolean = this.compositeId == otherLog.compositeId
+
+  /** Creates the log directory if it does not exist. */
+  def ensureLogDirectoryExist(): Unit = {
+    val fs = logPath.getFileSystem(newDeltaHadoopConf())
+    def createDirIfNotExists(path: Path): Unit = {
+      // Optimistically attempt to create the directory first without checking its existence.
+      // This is efficient because we're assuming it's more likely that the directory doesn't
+      // exist and it saves an filesystem existence check in that case.
+      val (success, mkdirsIOExceptionOpt) = try {
+        // Return value of false should mean the directory already existed (not an error) but
+        // we will verify below because we're paranoid about buggy FileSystem implementations.
+        (fs.mkdirs(path), None)
+      } catch {
+        // A FileAlreadyExistsException is expected if a non-directory object exists but an explicit
+        // check is needed because buggy Hadoop FileSystem.mkdir wrongly throws the exception even
+        // on existing directories.
+        case io: IOException =>
+          val dirExists =
+            try {
+              fs.getFileStatus(path).isDirectory
+            } catch {
+              case NonFatal(_) => false
+            }
+          (dirExists, Some(io))
+      }
+      if (!success) {
+        throw DeltaErrors.cannotCreateLogPathException(
+          logPath = logPath.toString,
+          cause = mkdirsIOExceptionOpt.orNull)
+      }
+    }
+    createDirIfNotExists(FileNames.commitDirPath(logPath))
+  }
+
+  /**
+   * Create the log directory. Unlike `ensureLogDirectoryExist`, this method doesn't check whether
+   * the log directory exists and it will ignore the return value of `mkdirs`.
+   */
+  def createLogDirectory(): Unit = {
+    logPath.getFileSystem(newDeltaHadoopConf()).mkdirs(logPath)
+  }
+
+  /* ------------  *
+   |  Integration  |
+   * ------------  */
+
+  /**
+   * Returns a [[org.apache.spark.sql.DataFrame]] containing the new files within the specified
+   * version range.
+   */
+  def createDataFrame(
+      snapshot: SnapshotDescriptor,
+      addFiles: Seq[AddFile],
+      isStreaming: Boolean = false,
+      actionTypeOpt: Option[String] = None): DataFrame = {
+    val actionType = actionTypeOpt.getOrElse(if (isStreaming) "streaming" else "batch")
+    // It's ok to not pass down the partitionSchema to TahoeBatchFileIndex. Schema evolution will
+    // ensure any partitionSchema changes will be captured, and upon restart, the new snapshot will
+    // be initialized with the correct partition schema again.
+    val fileIndex = new TahoeBatchFileIndex(spark, actionType, addFiles, this, dataPath, snapshot)
+    // --- modified start
+    // TODO: Don't add the bucketOption here, it will cause the OOM when the merge into update
+    // key is the bucket column, fix later
+    // --- modified end
+    val relation = buildHadoopFsRelationWithFileIndex(snapshot, fileIndex, bucketSpec = None)
+    Dataset.ofRows(spark, LogicalRelation(relation, isStreaming = isStreaming))
+  }
+
+  /**
+   * Returns a [[BaseRelation]] that contains all of the data present
+   * in the table. This relation will be continually updated
+   * as files are added or removed from the table. However, new [[BaseRelation]]
+   * must be requested in order to see changes to the schema.
+   */
+  def createRelation(
+      partitionFilters: Seq[Expression] = Nil,
+      snapshotToUseOpt: Option[Snapshot] = None,
+      catalogTableOpt: Option[CatalogTable] = None,
+      isTimeTravelQuery: Boolean = false): BaseRelation = {
+
+    /** Used to link the files present in the table into the query planner. */
+    // TODO: If snapshotToUse is unspecified, get the correct snapshot from update()
+    val snapshotToUse = snapshotToUseOpt.getOrElse(unsafeVolatileSnapshot)
+    if (snapshotToUse.version < 0) {
+      // A negative version here means the dataPath is an empty directory. Read query should error
+      // out in this case.
+      throw DeltaErrors.pathNotExistsException(dataPath.toString)
+    }
+
+    val fileIndex = TahoeLogFileIndex(
+      spark, this, dataPath, snapshotToUse, partitionFilters, isTimeTravelQuery)
+    // --- modified start
+    var bucketSpec: Option[BucketSpec] =
+      if (ClickHouseConfig.isMergeTreeFormatEngine(snapshotToUse.metadata.configuration)) {
+        ClickHouseTableV2.getTable(this).bucketOption
+      } else {
+        None
+      }
+
+    val r = buildHadoopFsRelationWithFileIndex(snapshotToUse, fileIndex, bucketSpec = bucketSpec)
+    new DeltaLog.DeltaHadoopFsRelation(
+      r.location,
+      r.partitionSchema,
+      r.dataSchema,
+      r.bucketSpec,
+      r.fileFormat,
+      r.options
+    )(spark, this, catalogTableOpt)
+    // --- modified end
+  }
+
+  def buildHadoopFsRelationWithFileIndex(snapshot: SnapshotDescriptor, fileIndex: TahoeFileIndex,
+      bucketSpec: Option[BucketSpec]): HadoopFsRelation = {
+    HadoopFsRelation(
+      fileIndex,
+      partitionSchema = DeltaColumnMapping.dropColumnMappingMetadata(
+        snapshot.metadata.partitionSchema),
+      // We pass all table columns as `dataSchema` so that Spark will preserve the partition
+      // column locations. Otherwise, for any partition columns not in `dataSchema`, Spark would
+      // just append them to the end of `dataSchema`.
+      dataSchema = DeltaColumnMapping.dropColumnMappingMetadata(
+        DeltaTableUtils.removeInternalMetadata(spark,
+          SchemaUtils.dropNullTypeColumns(snapshot.metadata.schema))),
+      bucketSpec = bucketSpec,
+      fileFormat(snapshot.protocol, snapshot.metadata),
+      // `metadata.format.options` is not set today. Even if we support it in future, we shouldn't
+      // store any file system options since they may contain credentials. Hence, it will never
+      // conflict with `DeltaLog.options`.
+      snapshot.metadata.format.options ++ options)(spark)
+  }
+
+  /**
+   * Verify the required Spark conf for delta
+   * Throw `DeltaErrors.configureSparkSessionWithExtensionAndCatalog` exception if
+   * `spark.sql.catalog.spark_catalog` config is missing. We do not check for
+   * `spark.sql.extensions` because DeltaSparkSessionExtension can alternatively
+   * be activated using the `.withExtension()` API. This check can be disabled
+   * by setting DELTA_CHECK_REQUIRED_SPARK_CONF to false.
+   */
+  protected def checkRequiredConfigurations(): Unit = {
+    if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_REQUIRED_SPARK_CONFS_CHECK)) {
+      if (spark.conf.getOption(
+        SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key).isEmpty) {
+        throw DeltaErrors.configureSparkSessionWithExtensionAndCatalog(None)
+      }
+    }
+  }
+
+  /**
+   * Returns a proper path canonicalization function for the current Delta log.
+   *
+   * If `runsOnExecutors` is true, the returned method will use a broadcast Hadoop Configuration
+   * so that the method is suitable for execution on executors. Otherwise, the returned method
+   * will use a local Hadoop Configuration and the method can only be executed on the driver.
+   */
+  private[delta] def getCanonicalPathFunction(runsOnExecutors: Boolean): String => String = {
+    val hadoopConf = newDeltaHadoopConf()
+    // Wrap `hadoopConf` with a method to delay the evaluation to run on executors.
+    val getHadoopConf = if (runsOnExecutors) {
+      val broadcastHadoopConf =
+        spark.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+      () => broadcastHadoopConf.value.value
+    } else {
+      () => hadoopConf
+    }
+
+    new DeltaLog.CanonicalPathFunction(getHadoopConf)
+  }
+
+  /**
+   * Returns a proper path canonicalization UDF for the current Delta log.
+   *
+   * If `runsOnExecutors` is true, the returned UDF will use a broadcast Hadoop Configuration.
+   * Otherwise, the returned UDF will use a local Hadoop Configuration and the UDF can
+   * only be executed on the driver.
+   */
+  private[delta] def getCanonicalPathUdf(runsOnExecutors: Boolean = true): UserDefinedFunction = {
+    DeltaUDF.stringFromString(getCanonicalPathFunction(runsOnExecutors))
+  }
+
+  // --- modified start
+  override def fileFormat(protocol: Protocol, metadata: Metadata): FileFormat = {
+    if (ClickHouseConfig.isMergeTreeFormatEngine(metadata.configuration)) {
+      ClickHouseTableV2.getTable(this).getFileFormat(protocol, metadata)
+    } else {
+      super.fileFormat(protocol, metadata)
+    }
+  }
+  // --- modified end
+}
+
+object DeltaLog extends DeltaLogging {
+
+  // --- modified start
+  @SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+  private class DeltaHadoopFsRelation(
+      location: FileIndex,
+      partitionSchema: StructType,
+      // The top-level columns in `dataSchema` should match the actual physical file schema,
+      // otherwise the ORC data source may not work with the by-ordinal mode.
+      dataSchema: StructType,
+      bucketSpec: Option[BucketSpec],
+      fileFormat: FileFormat,
+      options: Map[String, String]
+    )(spark: SparkSession, deltaLog: DeltaLog, catalogTableOpt: Option[CatalogTable])
+    extends HadoopFsRelation(
+      location,
+      partitionSchema,
+      dataSchema,
+      bucketSpec,
+      fileFormat,
+      options)(spark)
+      with InsertableRelation {
+    def insert(data: DataFrame, overwrite: Boolean): Unit = {
+      val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
+      WriteIntoDelta(
+        deltaLog = deltaLog,
+        mode = mode,
+        new DeltaOptions(Map.empty[String, String], spark.sessionState.conf),
+        partitionColumns = Seq.empty,
+        configuration = Map.empty,
+        data = data,
+        catalogTableOpt = catalogTableOpt).run(spark)
+    }
+  }
+  // --- modified end
+
+  /**
+   * The key type of `DeltaLog` cache. It's a pair of the canonicalized table path and the file
+   * system options (options starting with "fs." or "dfs." prefix) passed into
+   * `DataFrameReader/Writer`
+   */
+  private type DeltaLogCacheKey = (Path, Map[String, String])
+
+  /** The name of the subdirectory that holds Delta metadata files */
+  private[delta] val LOG_DIR_NAME = "_delta_log"
+
+  private[delta] def logPathFor(dataPath: String): Path = logPathFor(new Path(dataPath))
+  private[delta] def logPathFor(dataPath: Path): Path =
+    DeltaTableUtils.safeConcatPaths(dataPath, LOG_DIR_NAME)
+
+  /**
+   * We create only a single [[DeltaLog]] for any given `DeltaLogCacheKey` to avoid wasted work
+   * in reconstructing the log.
+   */
+  type CacheKey = (Path, Map[String, String])
+  private[delta] def getOrCreateCache(conf: SQLConf):
+      Cache[CacheKey, DeltaLog] = synchronized {
+    deltaLogCache match {
+      case Some(c) => c
+      case None =>
+        val builder = createCacheBuilder(conf)
+          .removalListener(
+              (removalNotification: RemovalNotification[DeltaLogCacheKey, DeltaLog]) => {
+            val log = removalNotification.getValue
+            // TODO: We should use ref-counting to uncache snapshots instead of a manual timed op
+            try log.unsafeVolatileSnapshot.uncache() catch {
+              case _: java.lang.NullPointerException =>
+              // Various layers will throw null pointer if the RDD is already gone.
+            }
+          })
+        deltaLogCache = Some(builder.build[CacheKey, DeltaLog]())
+        deltaLogCache.get
+    }
+  }
+
+  private var deltaLogCache: Option[Cache[CacheKey, DeltaLog]] = None
+
+  /**
+   * Helper to create delta log caches
+   */
+  private def createCacheBuilder(conf: SQLConf): CacheBuilder[AnyRef, AnyRef] = {
+    val cacheRetention = conf.getConf(DeltaSQLConf.DELTA_LOG_CACHE_RETENTION_MINUTES)
+    val cacheSize = conf
+      .getConf(DeltaSQLConf.DELTA_LOG_CACHE_SIZE)
+      .max(sys.props.get("delta.log.cacheSize").map(_.toLong).getOrElse(0L))
+
+    CacheBuilder
+      .newBuilder()
+      .expireAfterAccess(cacheRetention, TimeUnit.MINUTES)
+      .maximumSize(cacheSize)
+  }
+
+
+  /**
+   * Creates a [[LogicalRelation]] for a given [[DeltaLogFileIndex]], with all necessary file source
+   * options taken from the Delta Log. All reads of Delta metadata files should use this method.
+   */
+  def indexToRelation(
+      spark: SparkSession,
+      index: DeltaLogFileIndex,
+      additionalOptions: Map[String, String],
+      schema: StructType = Action.logSchema): LogicalRelation = {
+    val formatSpecificOptions: Map[String, String] = index.format match {
+      case DeltaLogFileIndex.COMMIT_FILE_FORMAT =>
+        jsonCommitParseOption
+      case _ => Map.empty
+    }
+    // Delta should NEVER ignore missing or corrupt metadata files, because doing so can render the
+    // entire table unusable. Hard-wire that into the file source options so the user can't override
+    // it by setting spark.sql.files.ignoreCorruptFiles or spark.sql.files.ignoreMissingFiles.
+    val allOptions = additionalOptions ++ formatSpecificOptions ++ Map(
+      FileSourceOptions.IGNORE_CORRUPT_FILES -> "false",
+      FileSourceOptions.IGNORE_MISSING_FILES -> "false"
+    )
+    // --- modified start
+    // Don't need to add the bucketOption here, it handles the delta log meta json file
+    // --- modified end
+    val fsRelation = HadoopFsRelation(
+      index, index.partitionSchema, schema, None, index.format, allOptions)(spark)
+    LogicalRelation(fsRelation)
+  }
+
+  // Don't tolerate malformed JSON when parsing Delta log actions (default is PERMISSIVE)
+  val jsonCommitParseOption = Map("mode" -> FailFastMode.name)
+
+  /** Helper for creating a log when it stored at the root of the data. */
+  def forTable(spark: SparkSession, dataPath: String): DeltaLog = {
+    apply(spark, logPathFor(dataPath), Map.empty, new SystemClock)
+  }
+
+  /** Helper for creating a log when it stored at the root of the data. */
+  def forTable(spark: SparkSession, dataPath: Path): DeltaLog = {
+    apply(spark, logPathFor(dataPath), new SystemClock)
+  }
+
+  /** Helper for creating a log when it stored at the root of the data. */
+  def forTable(spark: SparkSession, dataPath: Path, options: Map[String, String]): DeltaLog = {
+    apply(spark, logPathFor(dataPath), options, new SystemClock)
+  }
+
+  /** Helper for creating a log when it stored at the root of the data. */
+  def forTable(spark: SparkSession, dataPath: Path, clock: Clock): DeltaLog = {
+    apply(spark, logPathFor(dataPath), clock)
+  }
+
+  /** Helper for creating a log for the table. */
+  def forTable(spark: SparkSession, tableName: TableIdentifier): DeltaLog = {
+    forTable(spark, tableName, new SystemClock)
+  }
+
+  /** Helper for creating a log for the table. */
+  def forTable(spark: SparkSession, table: CatalogTable): DeltaLog = {
+    forTable(spark, table, new SystemClock)
+  }
+
+  /** Helper for creating a log for the table. */
+  def forTable(spark: SparkSession, tableName: TableIdentifier, clock: Clock): DeltaLog = {
+    if (DeltaTableIdentifier.isDeltaPath(spark, tableName)) {
+      forTable(spark, new Path(tableName.table))
+    } else {
+      forTable(spark, spark.sessionState.catalog.getTableMetadata(tableName), clock)
+    }
+  }
+
+  /** Helper for creating a log for the table. */
+  def forTable(spark: SparkSession, table: CatalogTable, clock: Clock): DeltaLog = {
+    apply(spark, logPathFor(new Path(table.location)), clock)
+  }
+
+  private def apply(spark: SparkSession, rawPath: Path, clock: Clock = new SystemClock): DeltaLog =
+    apply(spark, rawPath, Map.empty, clock)
+
+
+  /** Helper for getting a log, as well as the latest snapshot, of the table */
+  def forTableWithSnapshot(spark: SparkSession, dataPath: String): (DeltaLog, Snapshot) =
+    withFreshSnapshot { forTable(spark, new Path(dataPath), _) }
+
+  /** Helper for getting a log, as well as the latest snapshot, of the table */
+  def forTableWithSnapshot(spark: SparkSession, dataPath: Path): (DeltaLog, Snapshot) =
+    withFreshSnapshot { forTable(spark, dataPath, _) }
+
+  /** Helper for getting a log, as well as the latest snapshot, of the table */
+  def forTableWithSnapshot(
+      spark: SparkSession,
+      tableName: TableIdentifier): (DeltaLog, Snapshot) =
+    withFreshSnapshot { forTable(spark, tableName, _) }
+
+  /** Helper for getting a log, as well as the latest snapshot, of the table */
+  def forTableWithSnapshot(
+      spark: SparkSession,
+      dataPath: Path,
+      options: Map[String, String]): (DeltaLog, Snapshot) =
+    withFreshSnapshot { apply(spark, logPathFor(dataPath), options, _) }
+
+  /**
+   * Helper function to be used with the forTableWithSnapshot calls. Thunk is a
+   * partially applied DeltaLog.forTable call, which we can then wrap around with a
+   * snapshot update. We use the system clock to avoid back-to-back updates.
+   */
+  private[delta] def withFreshSnapshot(thunk: Clock => DeltaLog): (DeltaLog, Snapshot) = {
+    val clock = new SystemClock
+    val ts = clock.getTimeMillis()
+    val deltaLog = thunk(clock)
+    val snapshot = deltaLog.update(checkIfUpdatedSinceTs = Some(ts))
+    (deltaLog, snapshot)
+  }
+
+  private def apply(
+      spark: SparkSession,
+      rawPath: Path,
+      options: Map[String, String],
+      clock: Clock
+  ): DeltaLog = {
+    val fileSystemOptions: Map[String, String] =
+      if (spark.sessionState.conf.getConf(
+          DeltaSQLConf.LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS)) {
+        // We pick up only file system options so that we don't pass any parquet or json options to
+        // the code that reads Delta transaction logs.
+        options.filterKeys { k =>
+          DeltaTableUtils.validDeltaTableHadoopPrefixes.exists(k.startsWith)
+        }.toMap
+      } else {
+        Map.empty
+      }
+    // scalastyle:off deltahadoopconfiguration
+    val hadoopConf = spark.sessionState.newHadoopConfWithOptions(fileSystemOptions)
+    // scalastyle:on deltahadoopconfiguration
+    val fs = rawPath.getFileSystem(hadoopConf)
+    val path = fs.makeQualified(rawPath)
+    def createDeltaLog(): DeltaLog = recordDeltaOperation(
+      null,
+      "delta.log.create",
+      Map(TAG_TAHOE_PATH -> path.getParent.toString)) {
+        AnalysisHelper.allowInvokingTransformsInAnalyzer {
+          new DeltaLog(
+            logPath = path,
+            dataPath = path.getParent,
+            options = fileSystemOptions,
+            allOptions = options,
+            clock = clock
+          )
+        }
+    }
+    def getDeltaLogFromCache(): DeltaLog = {
+      // The following cases will still create a new ActionLog even if there is a cached
+      // ActionLog using a different format path:
+      // - Different `scheme`
+      // - Different `authority` (e.g., different user tokens in the path)
+      // - Different mount point.
+      try {
+        getOrCreateCache(spark.sessionState.conf)
+          .get(path -> fileSystemOptions, () => {
+            createDeltaLog()
+          }
+        )
+      } catch {
+        case e: com.google.common.util.concurrent.UncheckedExecutionException => throw e.getCause
+        case e: java.util.concurrent.ExecutionException => throw e.getCause
+      }
+    }
+
+    val deltaLog = getDeltaLogFromCache()
+    if (Option(deltaLog.sparkContext.get).map(_.isStopped).getOrElse(true)) {
+      // Invalid the cached `DeltaLog` and create a new one because the `SparkContext` of the cached
+      // `DeltaLog` has been stopped.
+      getOrCreateCache(spark.sessionState.conf).invalidate(path -> fileSystemOptions)
+      getDeltaLogFromCache()
+    } else {
+      deltaLog
+    }
+  }
+
+  /** Invalidate the cached DeltaLog object for the given `dataPath`. */
+  def invalidateCache(spark: SparkSession, dataPath: Path): Unit = {
+    try {
+      val rawPath = logPathFor(dataPath)
+      // scalastyle:off deltahadoopconfiguration
+      // This method cannot be called from DataFrameReader/Writer so it's safe to assume the user
+      // has set the correct file system configurations in the session configs.
+      val fs = rawPath.getFileSystem(spark.sessionState.newHadoopConf())
+      // scalastyle:on deltahadoopconfiguration
+      val path = fs.makeQualified(rawPath)
+
+      val deltaLogCache = getOrCreateCache(spark.sessionState.conf)
+      if (spark.sessionState.conf.getConf(
+          DeltaSQLConf.LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS)) {
+        // We rely on the fact that accessing the key set doesn't modify the entry access time. See
+        // `CacheBuilder.expireAfterAccess`.
+        val keysToBeRemoved = mutable.ArrayBuffer[DeltaLogCacheKey]()
+        val iter = deltaLogCache.asMap().keySet().iterator()
+        while (iter.hasNext) {
+          val key = iter.next()
+          if (key._1 == path) {
+            keysToBeRemoved += key
+          }
+        }
+        deltaLogCache.invalidateAll(keysToBeRemoved.asJava)
+      } else {
+        deltaLogCache.invalidate(path -> Map.empty)
+      }
+    } catch {
+      case NonFatal(e) => logWarning(e.getMessage, e)
+    }
+  }
+
+  def clearCache(): Unit = {
+    deltaLogCache.foreach(_.invalidateAll())
+  }
+
+  /** Unset the caches. Exposing for testing */
+  private[delta] def unsetCache(): Unit = {
+    synchronized {
+      deltaLogCache = None
+    }
+  }
+
+  /** Return the number of cached `DeltaLog`s. Exposing for testing */
+  private[delta] def cacheSize: Long = {
+    deltaLogCache.map(_.size()).getOrElse(0L)
+  }
+
+  /**
+   * Filters the given [[Dataset]] by the given `partitionFilters`, returning those that match.
+   * @param files The active files in the DeltaLog state, which contains the partition value
+   *              information
+   * @param partitionFilters Filters on the partition columns
+   * @param partitionColumnPrefixes The path to the `partitionValues` column, if it's nested
+   * @param shouldRewritePartitionFilters Whether to rewrite `partitionFilters` to be over the
+   *                                      [[AddFile]] schema
+   */
+  def filterFileList(
+      partitionSchema: StructType,
+      files: DataFrame,
+      partitionFilters: Seq[Expression],
+      partitionColumnPrefixes: Seq[String] = Nil,
+      shouldRewritePartitionFilters: Boolean = true): DataFrame = {
+
+    val rewrittenFilters = if (shouldRewritePartitionFilters) {
+      rewritePartitionFilters(
+        partitionSchema,
+        files.sparkSession.sessionState.conf.resolver,
+        partitionFilters,
+        partitionColumnPrefixes)
+    } else {
+      partitionFilters
+    }
+    val expr = rewrittenFilters.reduceLeftOption(And).getOrElse(Literal.TrueLiteral)
+    val columnFilter = new Column(expr)
+    files.filter(columnFilter)
+  }
+
+  /**
+   * Rewrite the given `partitionFilters` to be used for filtering partition values.
+   * We need to explicitly resolve the partitioning columns here because the partition columns
+   * are stored as keys of a Map type instead of attributes in the AddFile schema (below) and thus
+   * cannot be resolved automatically.
+   *
+   * @param partitionFilters Filters on the partition columns
+   * @param partitionColumnPrefixes The path to the `partitionValues` column, if it's nested
+   */
+  def rewritePartitionFilters(
+      partitionSchema: StructType,
+      resolver: Resolver,
+      partitionFilters: Seq[Expression],
+      partitionColumnPrefixes: Seq[String] = Nil): Seq[Expression] = {
+    partitionFilters
+      .map(_.transformUp {
+      case a: Attribute =>
+        // If we have a special column name, e.g. `a.a`, then an UnresolvedAttribute returns
+        // the column name as '`a.a`' instead of 'a.a', therefore we need to strip the backticks.
+        val unquoted = a.name.stripPrefix("`").stripSuffix("`")
+        val partitionCol = partitionSchema.find { field => resolver(field.name, unquoted) }
+        partitionCol match {
+          case Some(f: StructField) =>
+            val name = DeltaColumnMapping.getPhysicalName(f)
+            Cast(
+              UnresolvedAttribute(partitionColumnPrefixes ++ Seq("partitionValues", name)),
+              f.dataType)
+          case None =>
+            // This should not be able to happen, but the case was present in the original code so
+            // we kept it to be safe.
+            log.error(s"Partition filter referenced column ${a.name} not in the partition schema")
+            UnresolvedAttribute(partitionColumnPrefixes ++ Seq("partitionValues", a.name))
+        }
+    })
+  }
+
+
+  /**
+   * Checks whether this table only accepts appends. If so it will throw an error in operations that
+   * can remove data such as DELETE/UPDATE/MERGE.
+   */
+  def assertRemovable(snapshot: Snapshot): Unit = {
+    val metadata = snapshot.metadata
+    if (DeltaConfigs.IS_APPEND_ONLY.fromMetaData(metadata)) {
+      throw DeltaErrors.modifyAppendOnlyTableException(metadata.name)
+    }
+  }
+
+  /** How long to keep around SetTransaction actions before physically deleting them. */
+  def minSetTransactionRetentionInterval(metadata: Metadata): Option[Long] = {
+    DeltaConfigs.TRANSACTION_ID_RETENTION_DURATION
+      .fromMetaData(metadata)
+      .map(DeltaConfigs.getMilliSeconds)
+  }
+  /** How long to keep around logically deleted files before physically deleting them. */
+  def tombstoneRetentionMillis(metadata: Metadata): Long = {
+    DeltaConfigs.getMilliSeconds(DeltaConfigs.TOMBSTONE_RETENTION.fromMetaData(metadata))
+  }
+
+  /** Get a function that canonicalizes a given `path`. */
+  private[delta] class CanonicalPathFunction(getHadoopConf: () => Configuration)
+      extends Function[String, String] with Serializable {
+    // Mark it `@transient lazy val` so that de-serialization happens only once on every executor.
+    @transient
+    private lazy val fs = {
+      // scalastyle:off FileSystemGet
+      FileSystem.get(getHadoopConf())
+      // scalastyle:on FileSystemGet
+    }
+
+    override def apply(path: String): String = {
+      // scalastyle:off pathfromuri
+      val hadoopPath = new Path(new URI(path))
+      // scalastyle:on pathfromuri
+      if (hadoopPath.isAbsoluteAndSchemeAuthorityNull) {
+        fs.makeQualified(hadoopPath).toUri.toString
+      } else {
+        // return untouched if it is a relative path or is already fully qualified
+        hadoopPath.toUri.toString
+      }
+    }
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/Snapshot.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/Snapshot.scala
new file mode 100644
index 000000000000..13a91f051dae
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/Snapshot.scala
@@ -0,0 +1,663 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta
+
+// scalastyle:off import.ordering.noEmptyLine
+import scala.collection.mutable
+
+import org.apache.spark.sql.delta.actions._
+import org.apache.spark.sql.delta.actions.Action.logSchema
+import org.apache.spark.sql.delta.managedcommit.{CommitOwnerProvider, TableCommitOwnerClient}
+import org.apache.spark.sql.delta.metering.DeltaLogging
+import org.apache.spark.sql.delta.schema.SchemaUtils
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+import org.apache.spark.sql.delta.stats.DataSkippingReader
+import org.apache.spark.sql.delta.stats.DeltaScan
+import org.apache.spark.sql.delta.stats.DeltaStatsColumnSpec
+import org.apache.spark.sql.delta.stats.StatisticsCollection
+import org.apache.spark.sql.delta.util.DeltaCommitFileProvider
+import org.apache.spark.sql.delta.util.FileNames
+import org.apache.spark.sql.delta.util.StateCache
+import org.apache.spark.sql.util.ScalaExtensions._
+import org.apache.hadoop.fs.{FileStatus, Path}
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.Utils
+
+/**
+ * Gluten overwrite Delta:
+ *
+ * This file is copied from Delta 3.2.0. It is modified to overcome the following issues:
+ *   1. filesForScan() will cache the DeltaScan by the FilterExprsAsKey
+ *   2. filesForScan() should return DeltaScan of AddMergeTreeParts instead of AddFile
+ */
+
+/**
+ * A description of a Delta [[Snapshot]], including basic information such its [[DeltaLog]]
+ * metadata, protocol, and version.
+ */
+trait SnapshotDescriptor {
+  def deltaLog: DeltaLog
+  def version: Long
+  def metadata: Metadata
+  def protocol: Protocol
+
+  def schema: StructType = metadata.schema
+
+  protected[delta] def numOfFilesIfKnown: Option[Long]
+  protected[delta] def sizeInBytesIfKnown: Option[Long]
+}
+
+/**
+ * An immutable snapshot of the state of the log at some delta version. Internally
+ * this class manages the replay of actions stored in checkpoint or delta files.
+ *
+ * After resolving any new actions, it caches the result and collects the
+ * following basic information to the driver:
+ *  - Protocol Version
+ *  - Metadata
+ *  - Transaction state
+ *
+ * @param inCommitTimestampOpt The in-commit-timestamp of the latest commit in milliseconds. Can
+ *                  be set to None if
+ *                   1. The timestamp has not been read yet - generally the case for cold tables.
+ *                   2. Or the table has not been initialized, i.e. `version = -1`.
+ *                   3. Or the table does not have [[InCommitTimestampTableFeature]] enabled.
+ *
+ */
+class Snapshot(
+    val path: Path,
+    override val version: Long,
+    val logSegment: LogSegment,
+    override val deltaLog: DeltaLog,
+    val checksumOpt: Option[VersionChecksum]
+  )
+  extends SnapshotDescriptor
+  with SnapshotStateManager
+  with StateCache
+  with StatisticsCollection
+  with DataSkippingReader
+  with DeltaLogging {
+
+  import Snapshot._
+  import DeltaLogFileIndex.COMMIT_VERSION_COLUMN
+  // For implicits which re-use Encoder:
+  import org.apache.spark.sql.delta.implicits._
+
+  protected def spark = SparkSession.active
+
+  /** Snapshot to scan by the DeltaScanGenerator for metadata query optimizations */
+  override val snapshotToScan: Snapshot = this
+
+  override def columnMappingMode: DeltaColumnMappingMode = metadata.columnMappingMode
+
+  /**
+   * Returns the timestamp of the latest commit of this snapshot.
+   * For an uninitialized snapshot, this returns -1.
+   *
+   * When InCommitTimestampTableFeature is enabled, the timestamp
+   * is retrieved from the CommitInfo of the latest commit which
+   * can result in an IO operation.
+   */
+  def timestamp: Long =
+    getInCommitTimestampOpt.getOrElse(logSegment.lastCommitFileModificationTimestamp)
+
+  /**
+   * Returns the inCommitTimestamp if ICT is enabled, otherwise returns None.
+   * This potentially triggers an IO operation to read the inCommitTimestamp.
+   * This is a lazy val, so repeated calls will not trigger multiple IO operations.
+   */
+  protected lazy val getInCommitTimestampOpt: Option[Long] =
+    Option.when(DeltaConfigs.IN_COMMIT_TIMESTAMPS_ENABLED.fromMetaData(metadata)) {
+      _reconstructedProtocolMetadataAndICT.inCommitTimestamp
+        .getOrElse {
+          val startTime = System.currentTimeMillis()
+          var exception = Option.empty[Throwable]
+          try {
+            val commitInfoOpt = DeltaHistoryManager.getCommitInfoOpt(
+              deltaLog.store,
+              DeltaCommitFileProvider(this).deltaFile(version),
+              deltaLog.newDeltaHadoopConf())
+            CommitInfo.getRequiredInCommitTimestamp(commitInfoOpt, version.toString)
+          } catch {
+            case e: Throwable =>
+              exception = Some(e)
+              throw e
+          } finally {
+            recordDeltaEvent(
+              deltaLog,
+              "delta.inCommitTimestamp.read",
+              data = Map(
+                "version" -> version,
+                "callSite" -> "Snapshot.getInCommitTimestampOpt",
+                "checkpointVersion" -> logSegment.checkpointProvider.version,
+                "durationMs" -> (System.currentTimeMillis() - startTime),
+                "exceptionMessage" -> exception.map(_.getMessage).getOrElse(""),
+                "exceptionStackTrace" -> exception.map(_.getStackTrace.mkString("\n")).getOrElse("")
+              )
+            )
+          }
+        }
+    }
+
+
+  private[delta] lazy val nonFileActions: Seq[Action] = {
+    Seq(protocol, metadata) ++
+      setTransactions ++
+      domainMetadata
+  }
+
+  @volatile private[delta] var stateReconstructionTriggered = false
+
+  /**
+   * Use [[stateReconstruction]] to create a representation of the actions in this table.
+   * Cache the resultant output.
+   */
+  private lazy val cachedState = recordFrameProfile("Delta", "snapshot.cachedState") {
+    stateReconstructionTriggered = true
+    cacheDS(stateReconstruction, s"Delta Table State #$version - $redactedPath")
+  }
+
+  /**
+   * Given the list of files from `LogSegment`, create respective file indices to help create
+   * a DataFrame and short-circuit the many file existence and partition schema inference checks
+   * that exist in DataSource.resolveRelation().
+   */
+  protected[delta] lazy val deltaFileIndexOpt: Option[DeltaLogFileIndex] = {
+    assertLogFilesBelongToTable(path, logSegment.deltas)
+    DeltaLogFileIndex(DeltaLogFileIndex.COMMIT_FILE_FORMAT, logSegment.deltas)
+  }
+
+  protected lazy val fileIndices: Seq[DeltaLogFileIndex] = {
+    val checkpointFileIndexes = checkpointProvider.allActionsFileIndexes()
+    checkpointFileIndexes ++ deltaFileIndexOpt.toSeq
+  }
+
+  /**
+   * Protocol, Metadata, and In-Commit Timestamp retrieved through
+   * `protocolMetadataAndICTReconstruction` which skips a full state reconstruction.
+   */
+  case class ReconstructedProtocolMetadataAndICT(
+      protocol: Protocol,
+      metadata: Metadata,
+      inCommitTimestamp: Option[Long])
+
+  /**
+   * Generate the protocol and metadata for this snapshot. This is usually cheaper than a
+   * full state reconstruction, but still only compute it when necessary.
+   */
+  private lazy val _reconstructedProtocolMetadataAndICT: ReconstructedProtocolMetadataAndICT =
+      {
+    // Should be small. At most 'checkpointInterval' rows, unless new commits are coming
+    // in before a checkpoint can be written
+    var protocol: Protocol = null
+    var metadata: Metadata = null
+    var inCommitTimestamp: Option[Long] = None
+    protocolMetadataAndICTReconstruction().foreach {
+      case ReconstructedProtocolMetadataAndICT(p: Protocol, _, _) => protocol = p
+      case ReconstructedProtocolMetadataAndICT(_, m: Metadata, _) => metadata = m
+      case ReconstructedProtocolMetadataAndICT(_, _, ict: Option[Long]) => inCommitTimestamp = ict
+    }
+
+    if (protocol == null) {
+      recordDeltaEvent(
+        deltaLog,
+        opType = "delta.assertions.missingAction",
+        data = Map(
+          "version" -> version.toString, "action" -> "Protocol", "source" -> "Snapshot"))
+      throw DeltaErrors.actionNotFoundException("protocol", version)
+    }
+
+    if (metadata == null) {
+      recordDeltaEvent(
+        deltaLog,
+        opType = "delta.assertions.missingAction",
+        data = Map(
+          "version" -> version.toString, "action" -> "Metadata", "source" -> "Snapshot"))
+      throw DeltaErrors.actionNotFoundException("metadata", version)
+    }
+
+    ReconstructedProtocolMetadataAndICT(protocol, metadata, inCommitTimestamp)
+  }
+
+  /**
+   * [[CommitOwnerClient]] for the given delta table as of this snapshot.
+   * - This must be present when managed commit is enabled.
+   * - This must be None when managed commit is disabled.
+   */
+  val tableCommitOwnerClientOpt: Option[TableCommitOwnerClient] = initializeTableCommitOwner()
+  protected def initializeTableCommitOwner(): Option[TableCommitOwnerClient] = {
+    CommitOwnerProvider.getTableCommitOwner(this)
+  }
+
+  /** Number of columns to collect stats on for data skipping */
+  override lazy val statsColumnSpec: DeltaStatsColumnSpec =
+    StatisticsCollection.configuredDeltaStatsColumnSpec(metadata)
+
+  /** Performs validations during initialization */
+  protected def init(): Unit = {
+    deltaLog.protocolRead(protocol)
+    deltaLog.assertTableFeaturesMatchMetadata(protocol, metadata)
+    SchemaUtils.recordUndefinedTypes(deltaLog, metadata.schema)
+  }
+
+  /** The current set of actions in this [[Snapshot]] as plain Rows */
+  def stateDF: DataFrame = recordFrameProfile("Delta", "stateDF") {
+    cachedState.getDF
+  }
+
+  /** The current set of actions in this [[Snapshot]] as a typed Dataset. */
+  def stateDS: Dataset[SingleAction] = recordFrameProfile("Delta", "stateDS") {
+    cachedState.getDS
+  }
+
+  private[delta] def allFilesViaStateReconstruction: Dataset[AddFile] = {
+    stateDS.where("add IS NOT NULL").select(col("add").as[AddFile])
+  }
+
+  // Here we need to bypass the ACL checks for SELECT anonymous function permissions.
+  /** All of the files present in this [[Snapshot]]. */
+  def allFiles: Dataset[AddFile] = allFilesViaStateReconstruction
+
+  /** All unexpired tombstones. */
+  def tombstones: Dataset[RemoveFile] = {
+    stateDS.where("remove IS NOT NULL").select(col("remove").as[RemoveFile])
+  }
+
+  def deltaFileSizeInBytes(): Long = deltaFileIndexOpt.map(_.sizeInBytes).getOrElse(0L)
+
+  def checkpointSizeInBytes(): Long = checkpointProvider.effectiveCheckpointSizeInBytes()
+
+  override def metadata: Metadata = _reconstructedProtocolMetadataAndICT.metadata
+
+  override def protocol: Protocol = _reconstructedProtocolMetadataAndICT.protocol
+
+  /**
+   * Pulls the protocol and metadata of the table from the files that are used to compute the
+   * Snapshot directly--without triggering a full state reconstruction. This is important, because
+   * state reconstruction depends on protocol and metadata for correctness.
+   * If the current table version does not have a checkpoint, this function will also return the
+   * in-commit-timestamp of the latest commit if available.
+   *
+   * Also this method should only access methods defined in [[UninitializedCheckpointProvider]]
+   * which are not present in [[CheckpointProvider]]. This is because initialization of
+   * [[Snapshot.checkpointProvider]] depends on [[Snapshot.protocolMetadataAndICTReconstruction()]]
+   * and so if [[Snapshot.protocolMetadataAndICTReconstruction()]] starts depending on
+   * [[Snapshot.checkpointProvider]] then there will be cyclic dependency.
+   */
+  protected def protocolMetadataAndICTReconstruction():
+      Array[ReconstructedProtocolMetadataAndICT] = {
+    import implicits._
+
+    val schemaToUse = Action.logSchema(Set("protocol", "metaData", "commitInfo"))
+    val checkpointOpt = checkpointProvider.topLevelFileIndex.map { index =>
+      deltaLog.loadIndex(index, schemaToUse)
+        .withColumn(COMMIT_VERSION_COLUMN, lit(checkpointProvider.version))
+    }
+    (checkpointOpt ++ deltaFileIndexOpt.map(deltaLog.loadIndex(_, schemaToUse)).toSeq)
+      .reduceOption(_.union(_)).getOrElse(emptyDF)
+      .select("protocol", "metaData", "commitInfo.inCommitTimestamp", COMMIT_VERSION_COLUMN)
+      .where("protocol.minReaderVersion is not null or metaData.id is not null " +
+        s"or (commitInfo.inCommitTimestamp is not null and version = $version)")
+      .as[(Protocol, Metadata, Option[Long], Long)]
+      .collect()
+      .sortBy(_._4)
+      .map {
+        case (p, m, ict, _) => ReconstructedProtocolMetadataAndICT(p, m, ict)
+      }
+  }
+
+  // Reconstruct the state by applying deltas in order to the checkpoint.
+  // We partition by path as it is likely the bulk of the data is add/remove.
+  // Non-path based actions will be collocated to a single partition.
+  protected def stateReconstruction: Dataset[SingleAction] = {
+    recordFrameProfile("Delta", "snapshot.stateReconstruction") {
+      // for serializability
+      val localMinFileRetentionTimestamp = minFileRetentionTimestamp
+      val localMinSetTransactionRetentionTimestamp = minSetTransactionRetentionTimestamp
+
+      val canonicalPath = deltaLog.getCanonicalPathUdf()
+
+      // Canonicalize the paths so we can repartition the actions correctly, but only rewrite the
+      // add/remove actions themselves after partitioning and sorting are complete. Otherwise, the
+      // optimizer can generate a really bad plan that re-evaluates _EVERY_ field of the rewritten
+      // struct(...)  projection every time we touch _ANY_ field of the rewritten struct.
+      //
+      // NOTE: We sort by [[COMMIT_VERSION_COLUMN]] (provided by [[loadActions]]), to ensure that
+      // actions are presented to InMemoryLogReplay in the ascending version order it expects.
+      val ADD_PATH_CANONICAL_COL_NAME = "add_path_canonical"
+      val REMOVE_PATH_CANONICAL_COL_NAME = "remove_path_canonical"
+      loadActions
+        .withColumn(ADD_PATH_CANONICAL_COL_NAME, when(
+          col("add.path").isNotNull, canonicalPath(col("add.path"))))
+        .withColumn(REMOVE_PATH_CANONICAL_COL_NAME, when(
+          col("remove.path").isNotNull, canonicalPath(col("remove.path"))))
+        .repartition(
+          getNumPartitions,
+          coalesce(col(ADD_PATH_CANONICAL_COL_NAME), col(REMOVE_PATH_CANONICAL_COL_NAME)))
+        .sortWithinPartitions(COMMIT_VERSION_COLUMN)
+        .withColumn("add", when(
+          col("add.path").isNotNull,
+          struct(
+            col(ADD_PATH_CANONICAL_COL_NAME).as("path"),
+            col("add.partitionValues"),
+            col("add.size"),
+            col("add.modificationTime"),
+            col("add.dataChange"),
+            col(ADD_STATS_TO_USE_COL_NAME).as("stats"),
+            col("add.tags"),
+            col("add.deletionVector"),
+            col("add.baseRowId"),
+            col("add.defaultRowCommitVersion"),
+            col("add.clusteringProvider")
+          )))
+        .withColumn("remove", when(
+          col("remove.path").isNotNull,
+          col("remove").withField("path", col(REMOVE_PATH_CANONICAL_COL_NAME))))
+        .as[SingleAction]
+        .mapPartitions { iter =>
+          val state: LogReplay =
+            new InMemoryLogReplay(
+              localMinFileRetentionTimestamp,
+              localMinSetTransactionRetentionTimestamp)
+          state.append(0, iter.map(_.unwrap))
+          state.checkpoint.map(_.wrap)
+        }
+    }
+  }
+
+  /**
+   * Loads the file indices into a DataFrame that can be used for LogReplay.
+   *
+   * In addition to the usual nested columns provided by the SingleAction schema, it should provide
+   * two additional columns to simplify the log replay process: [[COMMIT_VERSION_COLUMN]] (which,
+   * when sorted in ascending order, will order older actions before newer ones, as required by
+   * [[InMemoryLogReplay]]); and [[ADD_STATS_TO_USE_COL_NAME]] (to handle certain combinations of
+   * config settings for delta.checkpoint.writeStatsAsJson and delta.checkpoint.writeStatsAsStruct).
+   */
+  protected def loadActions: DataFrame = {
+    fileIndices.map(deltaLog.loadIndex(_))
+      .reduceOption(_.union(_)).getOrElse(emptyDF)
+      .withColumn(ADD_STATS_TO_USE_COL_NAME, col("add.stats"))
+  }
+
+  /**
+   * Tombstones before the [[minFileRetentionTimestamp]] timestamp will be dropped from the
+   * checkpoint.
+   */
+  private[delta] def minFileRetentionTimestamp: Long = {
+    deltaLog.clock.getTimeMillis() - DeltaLog.tombstoneRetentionMillis(metadata)
+  }
+
+  /**
+   * [[SetTransaction]]s before [[minSetTransactionRetentionTimestamp]] will be considered expired
+   * and dropped from the snapshot.
+   */
+  private[delta] def minSetTransactionRetentionTimestamp: Option[Long] = {
+    DeltaLog.minSetTransactionRetentionInterval(metadata).map(deltaLog.clock.getTimeMillis() - _)
+  }
+
+  private[delta] def getNumPartitions: Int = {
+    spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_SNAPSHOT_PARTITIONS)
+      .getOrElse(Snapshot.defaultNumSnapshotPartitions)
+  }
+
+  /**
+   * Computes all the information that is needed by the checksum for the current snapshot.
+   * May kick off state reconstruction if needed by any of the underlying fields.
+   * Note that it's safe to set txnId to none, since the snapshot doesn't always have a txn
+   * attached. E.g. if a snapshot is created by reading a checkpoint, then no txnId is present.
+   */
+  def computeChecksum: VersionChecksum = VersionChecksum(
+    txnId = None,
+    tableSizeBytes = sizeInBytes,
+    numFiles = numOfFiles,
+    numMetadata = numOfMetadata,
+    numProtocol = numOfProtocol,
+    inCommitTimestampOpt = getInCommitTimestampOpt,
+    setTransactions = checksumOpt.flatMap(_.setTransactions),
+    domainMetadata = domainMetadatasIfKnown,
+    metadata = metadata,
+    protocol = protocol,
+    histogramOpt = fileSizeHistogram,
+    allFiles = checksumOpt.flatMap(_.allFiles))
+
+  /** Returns the data schema of the table, used for reading stats */
+  def tableSchema: StructType = metadata.dataSchema
+
+  def outputTableStatsSchema: StructType = metadata.dataSchema
+
+  def outputAttributeSchema: StructType = metadata.dataSchema
+
+  /** Returns the schema of the columns written out to file (overridden in write path) */
+  def dataSchema: StructType = metadata.dataSchema
+
+  /** Return the set of properties of the table. */
+  def getProperties: mutable.Map[String, String] = {
+    val base = new mutable.LinkedHashMap[String, String]()
+    metadata.configuration.foreach { case (k, v) =>
+      if (k != "path") {
+        base.put(k, v)
+      }
+    }
+    base.put(Protocol.MIN_READER_VERSION_PROP, protocol.minReaderVersion.toString)
+    base.put(Protocol.MIN_WRITER_VERSION_PROP, protocol.minWriterVersion.toString)
+    if (protocol.supportsReaderFeatures || protocol.supportsWriterFeatures) {
+      val features = protocol.readerAndWriterFeatureNames.map(name =>
+        s"${TableFeatureProtocolUtils.FEATURE_PROP_PREFIX}$name" ->
+          TableFeatureProtocolUtils.FEATURE_PROP_SUPPORTED)
+      base ++ features.toSeq.sorted
+    } else {
+      base
+    }
+  }
+
+  /** The [[CheckpointProvider]] for the underlying checkpoint */
+  lazy val checkpointProvider: CheckpointProvider = logSegment.checkpointProvider match {
+    case cp: CheckpointProvider => cp
+    case uninitializedProvider: UninitializedCheckpointProvider =>
+      CheckpointProvider(spark, this, checksumOpt, uninitializedProvider)
+    case o => throw new IllegalStateException(s"Unknown checkpoint provider: ${o.getClass.getName}")
+  }
+
+  def redactedPath: String =
+    Utils.redact(spark.sessionState.conf.stringRedactionPattern, path.toUri.toString)
+
+  /**
+   * Ensures that commit files are backfilled up to the current version in the snapshot.
+   *
+   * This method checks if there are any un-backfilled versions up to the current version and
+   * triggers the backfilling process using the commit-owner. It verifies that the delta file for
+   * the current version exists after the backfilling process.
+   *
+   * @throws IllegalStateException
+   *   if the delta file for the current version is not found after backfilling.
+   */
+  def ensureCommitFilesBackfilled(): Unit = {
+    val tableCommitOwnerClient = tableCommitOwnerClientOpt.getOrElse {
+      return
+    }
+    val minUnbackfilledVersion = DeltaCommitFileProvider(this).minUnbackfilledVersion
+    if (minUnbackfilledVersion <= version) {
+      val hadoopConf = deltaLog.newDeltaHadoopConf()
+      tableCommitOwnerClient.backfillToVersion(
+        startVersion = minUnbackfilledVersion, endVersion = Some(version))
+      val fs = deltaLog.logPath.getFileSystem(hadoopConf)
+      val expectedBackfilledDeltaFile = FileNames.unsafeDeltaFile(deltaLog.logPath, version)
+      if (!fs.exists(expectedBackfilledDeltaFile)) {
+        throw new IllegalStateException("Backfilling of commit files failed. " +
+          s"Expected delta file $expectedBackfilledDeltaFile not found.")
+      }
+    }
+  }
+
+
+  protected def emptyDF: DataFrame =
+    spark.createDataFrame(spark.sparkContext.emptyRDD[Row], logSchema)
+
+
+  override def logInfo(msg: => String): Unit = {
+    super.logInfo(s"[tableId=${deltaLog.tableId}] " + msg)
+  }
+
+  override def logWarning(msg: => String): Unit = {
+    super.logWarning(s"[tableId=${deltaLog.tableId}] " + msg)
+  }
+
+  override def logWarning(msg: => String, throwable: Throwable): Unit = {
+    super.logWarning(s"[tableId=${deltaLog.tableId}] " + msg, throwable)
+  }
+
+  override def logError(msg: => String): Unit = {
+    super.logError(s"[tableId=${deltaLog.tableId}] " + msg)
+  }
+
+  override def logError(msg: => String, throwable: Throwable): Unit = {
+    super.logError(s"[tableId=${deltaLog.tableId}] " + msg, throwable)
+  }
+
+  override def toString: String =
+    s"${getClass.getSimpleName}(path=$path, version=$version, metadata=$metadata, " +
+      s"logSegment=$logSegment, checksumOpt=$checksumOpt)"
+
+  // --- modified start
+  override def filesForScan(filters: Seq[Expression], keepNumRecords: Boolean): DeltaScan = {
+    val deltaScan = ClickhouseSnapshot.deltaScanCache.get(
+      FilterExprsAsKey(path, ClickhouseSnapshot.genSnapshotId(this), filters, None),
+      () => {
+        super.filesForScan(filters, keepNumRecords)
+      })
+
+    replaceWithAddMergeTreeParts(deltaScan)
+  }
+
+  override def filesForScan(limit: Long, partitionFilters: Seq[Expression]): DeltaScan = {
+    val deltaScan = ClickhouseSnapshot.deltaScanCache.get(
+      FilterExprsAsKey(path, ClickhouseSnapshot.genSnapshotId(this), partitionFilters, Some(limit)),
+      () => {
+        super.filesForScan(limit, partitionFilters)
+      })
+
+    replaceWithAddMergeTreeParts(deltaScan)
+  }
+
+  private def replaceWithAddMergeTreeParts(deltaScan: DeltaScan) = {
+    if (ClickHouseConfig.isMergeTreeFormatEngine(metadata.configuration)) {
+      DeltaScan.apply(
+        deltaScan.version,
+        deltaScan.files
+          .map(
+            addFile => {
+              val addFileAsKey = AddFileAsKey(addFile)
+
+              val ret = ClickhouseSnapshot.addFileToAddMTPCache.get(addFileAsKey)
+              // this is for later use
+              ClickhouseSnapshot.pathToAddMTPCache.put(ret.fullPartPath(), ret)
+              ret
+            }),
+        deltaScan.total,
+        deltaScan.partition,
+        deltaScan.scanned
+      )(
+        deltaScan.scannedSnapshot,
+        deltaScan.partitionFilters,
+        deltaScan.dataFilters,
+        deltaScan.unusedFilters,
+        deltaScan.scanDurationMs,
+        deltaScan.dataSkippingType
+      )
+    } else {
+      deltaScan
+    }
+  }
+  // --- modified end
+
+  logInfo(s"Created snapshot $this")
+  init()
+}
+
+object Snapshot extends DeltaLogging {
+
+  // Used by [[loadActions]] and [[stateReconstruction]]
+  val ADD_STATS_TO_USE_COL_NAME = "add_stats_to_use"
+
+  private val defaultNumSnapshotPartitions: Int = 50
+
+  /** Verifies that a set of delta or checkpoint files to be read actually belongs to this table. */
+  private def assertLogFilesBelongToTable(logBasePath: Path, files: Seq[FileStatus]): Unit = {
+    val logPath = new Path(logBasePath.toUri)
+    val commitDirPath = FileNames.commitDirPath(logPath)
+    files.map(_.getPath).foreach { filePath =>
+      val commitParent = new Path(filePath.toUri).getParent
+      if (commitParent != logPath && commitParent != commitDirPath) {
+        // scalastyle:off throwerror
+        throw new AssertionError(s"File ($filePath) doesn't belong in the " +
+          s"transaction log at $logBasePath.")
+        // scalastyle:on throwerror
+      }
+    }
+  }
+}
+
+/**
+ * An initial snapshot with only metadata specified. Useful for creating a DataFrame from an
+ * existing parquet table during its conversion to delta.
+ *
+ * @param logPath the path to transaction log
+ * @param deltaLog the delta log object
+ * @param metadata the metadata of the table
+ */
+class InitialSnapshot(
+    val logPath: Path,
+    override val deltaLog: DeltaLog,
+    override val metadata: Metadata)
+  extends Snapshot(
+    path = logPath,
+    version = -1,
+    logSegment = LogSegment.empty(logPath),
+    deltaLog = deltaLog,
+    checksumOpt = None
+  ) {
+
+  def this(logPath: Path, deltaLog: DeltaLog) = this(
+    logPath,
+    deltaLog,
+    Metadata(
+      configuration = DeltaConfigs.mergeGlobalConfigs(
+        sqlConfs = SparkSession.active.sessionState.conf,
+        tableConf = Map.empty,
+        ignoreProtocolConfsOpt = Some(
+          DeltaConfigs.ignoreProtocolDefaultsIsSet(
+            sqlConfs = SparkSession.active.sessionState.conf,
+            tableConf = deltaLog.allOptions))),
+      createdTime = Some(System.currentTimeMillis())))
+
+  override def stateDS: Dataset[SingleAction] = emptyDF.as[SingleAction]
+  override def stateDF: DataFrame = emptyDF
+  override protected lazy val computedState: SnapshotState = initialState(metadata)
+  override def protocol: Protocol = computedState.protocol
+  override protected lazy val getInCommitTimestampOpt: Option[Long] = None
+
+  // The [[InitialSnapshot]] is not backed by any external commit-owner.
+  override def initializeTableCommitOwner(): Option[TableCommitOwnerClient] = None
+  override def timestamp: Long = -1L
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
new file mode 100644
index 000000000000..8b4a13a30a69
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta.catalog
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.{ResolvedTable, UnresolvedTable}
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.V1Table
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
+import org.apache.spark.sql.delta.{ClickhouseSnapshot, DeltaErrors, DeltaLog, DeltaTableUtils, DeltaTimeTravelSpec, Snapshot, UnresolvedPathBasedDeltaTable}
+import org.apache.spark.sql.delta.actions.{Metadata, Protocol}
+import org.apache.spark.sql.delta.sources.DeltaDataSource
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory}
+import org.apache.spark.sql.execution.datasources.utils.MergeTreePartsPartitionsUtil
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.source.DeltaMergeTreeFileFormat
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.utils.CHDataSourceUtils
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.collection.BitSet
+
+import org.apache.hadoop.fs.Path
+
+import java.{util => ju}
+
+import scala.collection.JavaConverters._
+
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+class ClickHouseTableV2(
+    override val spark: SparkSession,
+    override val path: Path,
+    override val catalogTable: Option[CatalogTable] = None,
+    override val tableIdentifier: Option[String] = None,
+    override val timeTravelOpt: Option[DeltaTimeTravelSpec] = None,
+    override val options: Map[String, String] = Map.empty,
+    val clickhouseExtensionOptions: Map[String, String] = Map.empty)
+  extends DeltaTableV2(spark, path, catalogTable, tableIdentifier, timeTravelOpt, options)
+  with ClickHouseTableV2Base {
+
+  lazy val (rootPath, partitionFilters, timeTravelByPath) = {
+    if (catalogTable.isDefined) {
+      // Fast path for reducing path munging overhead
+      (new Path(catalogTable.get.location), Nil, None)
+    } else {
+      DeltaDataSource.parsePathIdentifier(spark, path.toString, options)
+    }
+  }
+
+  override def name(): String =
+    catalogTable
+      .map(_.identifier.unquotedString)
+      .orElse(tableIdentifier)
+      .getOrElse(s"clickhouse.`${deltaLog.dataPath}`")
+
+  private lazy val timeTravelSpec: Option[DeltaTimeTravelSpec] = {
+    if (timeTravelOpt.isDefined && timeTravelByPath.isDefined) {
+      throw DeltaErrors.multipleTimeTravelSyntaxUsed
+    }
+    timeTravelOpt.orElse(timeTravelByPath)
+  }
+
+  private lazy val caseInsensitiveOptions = new CaseInsensitiveStringMap(options.asJava)
+
+  override def properties(): ju.Map[String, String] = {
+    val ret = super.properties()
+
+    // for file path based write
+    if (initialSnapshot.version < 0 && clickhouseExtensionOptions.nonEmpty) {
+      ret.putAll(clickhouseExtensionOptions.asJava)
+    }
+    ret
+  }
+
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    new WriteIntoDeltaBuilder(
+      this,
+      info.options,
+      spark.sessionState.conf.useNullsForMissingDefaultColumnValues)
+  }
+
+  def getFileFormat(protocol: Protocol, meta: Metadata): DeltaMergeTreeFileFormat = {
+    new DeltaMergeTreeFileFormat(
+      protocol,
+      meta,
+      dataBaseName,
+      tableName,
+      ClickhouseSnapshot.genSnapshotId(initialSnapshot),
+      orderByKeyOption,
+      lowCardKeyOption,
+      minmaxIndexKeyOption,
+      bfIndexKeyOption,
+      setIndexKeyOption,
+      primaryKeyOption,
+      clickhouseTableConfigs,
+      partitionColumns
+    )
+  }
+
+  override def deltaProperties(): ju.Map[String, String] = properties()
+
+  override def deltaCatalog(): Option[CatalogTable] = catalogTable
+
+  override def deltaPath(): Path = path
+
+  override def deltaSnapshot(): Snapshot = initialSnapshot
+
+  def cacheThis(): Unit = {
+    ClickHouseTableV2.deltaLog2Table.put(deltaLog, this)
+  }
+
+  cacheThis()
+}
+
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+class TempClickHouseTableV2(
+    override val spark: SparkSession,
+    override val catalogTable: Option[CatalogTable] = None)
+  extends ClickHouseTableV2(spark, null, catalogTable) {
+  import collection.JavaConverters._
+  override def properties(): ju.Map[String, String] = catalogTable.get.properties.asJava
+  override lazy val partitionColumns: Seq[String] = catalogTable.get.partitionColumnNames
+  override def cacheThis(): Unit = {}
+}
+
+object ClickHouseTableV2 extends Logging {
+  private val deltaLog2Table =
+    new scala.collection.concurrent.TrieMap[DeltaLog, ClickHouseTableV2]()
+  // for CTAS use
+  val temporalThreadLocalCHTable = new ThreadLocal[ClickHouseTableV2]()
+
+  def getTable(deltaLog: DeltaLog): ClickHouseTableV2 = {
+    if (deltaLog2Table.contains(deltaLog)) {
+      deltaLog2Table(deltaLog)
+    } else if (temporalThreadLocalCHTable.get() != null) {
+      temporalThreadLocalCHTable.get()
+    } else {
+      throw new IllegalStateException(
+        s"Can not find ClickHouseTableV2 for deltalog ${deltaLog.dataPath}")
+    }
+  }
+
+  def clearCache(): Unit = {
+    deltaLog2Table.clear()
+    temporalThreadLocalCHTable.remove()
+  }
+
+  def partsPartitions(
+      deltaLog: DeltaLog,
+      relation: HadoopFsRelation,
+      selectedPartitions: Array[PartitionDirectory],
+      output: Seq[Attribute],
+      bucketedScan: Boolean,
+      optionalBucketSet: Option[BitSet],
+      optionalNumCoalescedBuckets: Option[Int],
+      disableBucketedScan: Boolean,
+      filterExprs: Seq[Expression]): Seq[InputPartition] = {
+    val tableV2 = ClickHouseTableV2.getTable(deltaLog)
+
+    MergeTreePartsPartitionsUtil.getMergeTreePartsPartitions(
+      relation,
+      selectedPartitions,
+      output,
+      bucketedScan,
+      tableV2.spark,
+      tableV2,
+      optionalBucketSet,
+      optionalNumCoalescedBuckets,
+      disableBucketedScan,
+      filterExprs)
+  }
+
+  /** Resolves a path into a DeltaTableV2, leveraging standard v2 table resolution. */
+  def apply(
+      spark: SparkSession,
+      tablePath: Path,
+      options: Map[String, String],
+      cmd: String): DeltaTableV2 =
+    resolve(spark, UnresolvedPathBasedDeltaTable(tablePath.toString, options, cmd), cmd)
+
+  /** Resolves a table identifier into a DeltaTableV2, leveraging standard v2 table resolution. */
+  def apply(spark: SparkSession, tableId: TableIdentifier, cmd: String): DeltaTableV2 = {
+    resolve(spark, UnresolvedTable(tableId.nameParts, cmd, None), cmd)
+  }
+
+  /** Applies standard v2 table resolution to an unresolved Delta table plan node */
+  def resolve(spark: SparkSession, unresolved: LogicalPlan, cmd: String): DeltaTableV2 =
+    extractFrom(spark.sessionState.analyzer.ResolveRelations(unresolved), cmd)
+
+  /**
+   * Extracts the DeltaTableV2 from a resolved Delta table plan node, throwing "table not found" if
+   * the node does not actually represent a resolved Delta table.
+   */
+  def extractFrom(plan: LogicalPlan, cmd: String): DeltaTableV2 = plan match {
+    case ResolvedTable(_, _, d: ClickHouseTableV2, _) => d
+    case ResolvedTable(_, _, d: DeltaTableV2, _) => d
+    case ResolvedTable(_, _, t: V1Table, _)
+        if CHDataSourceUtils.isClickHouseTable(t.catalogTable) =>
+      new ClickHouseTableV2(SparkSession.active, new Path(t.v1Table.location), Some(t.v1Table))
+    case ResolvedTable(_, _, t: V1Table, _) if DeltaTableUtils.isDeltaTable(t.catalogTable) =>
+      DeltaTableV2(SparkSession.active, new Path(t.v1Table.location), Some(t.v1Table))
+    case _ => throw DeltaErrors.notADeltaTableException(cmd)
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/DeleteCommand.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/DeleteCommand.scala
new file mode 100644
index 000000000000..dec1f4b9c3f5
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/DeleteCommand.scala
@@ -0,0 +1,557 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta.commands
+
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.sql.delta.metric.IncrementMetric
+import org.apache.spark.sql.delta._
+import org.apache.spark.sql.delta.actions.{Action, AddCDCFile, AddFile, FileAction}
+import org.apache.spark.sql.delta.commands.DeleteCommand.{rewritingFilesMsg, FINDING_TOUCHED_FILES_MSG}
+import org.apache.spark.sql.delta.commands.MergeIntoCommandBase.totalBytesAndDistinctPartitionValues
+import org.apache.spark.sql.delta.files.TahoeBatchFileIndex
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession}
+import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, EqualNullSafe, Expression, If, Literal, Not}
+import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
+import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.logical.{DeltaDelete, LogicalPlan}
+import org.apache.spark.sql.execution.command.LeafRunnableCommand
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.metric.SQLMetrics.{createMetric, createTimingMetric}
+import org.apache.spark.sql.functions.{col, explode, input_file_name, split}
+import org.apache.spark.sql.types.LongType
+
+/**
+ * Gluten overwrite Delta:
+ *
+ * This file is copied from Delta 3.2.0. It is modified to overcome the following issues:
+ *   1. In Clickhouse backend, we can't implement input_file_name() correctly, we can only implement
+ *      it so that it return a a list of filenames (concated by ',').
+ */
+
+trait DeleteCommandMetrics { self: LeafRunnableCommand =>
+  @transient private lazy val sc: SparkContext = SparkContext.getOrCreate()
+
+  def createMetrics: Map[String, SQLMetric] = Map[String, SQLMetric](
+    "numRemovedFiles" -> createMetric(sc, "number of files removed."),
+    "numAddedFiles" -> createMetric(sc, "number of files added."),
+    "numDeletedRows" -> createMetric(sc, "number of rows deleted."),
+    "numFilesBeforeSkipping" -> createMetric(sc, "number of files before skipping"),
+    "numBytesBeforeSkipping" -> createMetric(sc, "number of bytes before skipping"),
+    "numFilesAfterSkipping" -> createMetric(sc, "number of files after skipping"),
+    "numBytesAfterSkipping" -> createMetric(sc, "number of bytes after skipping"),
+    "numPartitionsAfterSkipping" -> createMetric(sc, "number of partitions after skipping"),
+    "numPartitionsAddedTo" -> createMetric(sc, "number of partitions added"),
+    "numPartitionsRemovedFrom" -> createMetric(sc, "number of partitions removed"),
+    "numCopiedRows" -> createMetric(sc, "number of rows copied"),
+    "numAddedBytes" -> createMetric(sc, "number of bytes added"),
+    "numRemovedBytes" -> createMetric(sc, "number of bytes removed"),
+    "executionTimeMs" ->
+      createTimingMetric(sc, "time taken to execute the entire operation"),
+    "scanTimeMs" ->
+      createTimingMetric(sc, "time taken to scan the files for matches"),
+    "rewriteTimeMs" ->
+      createTimingMetric(sc, "time taken to rewrite the matched files"),
+    "numAddedChangeFiles" -> createMetric(sc, "number of change data capture files generated"),
+    "changeFileBytes" -> createMetric(sc, "total size of change data capture files generated"),
+    "numTouchedRows" -> createMetric(sc, "number of rows touched"),
+    "numDeletionVectorsAdded" -> createMetric(sc, "number of deletion vectors added"),
+    "numDeletionVectorsRemoved" -> createMetric(sc, "number of deletion vectors removed"),
+    "numDeletionVectorsUpdated" -> createMetric(sc, "number of deletion vectors updated")
+  )
+
+  def getDeletedRowsFromAddFilesAndUpdateMetrics(files: Seq[AddFile]) : Option[Long] = {
+    if (!conf.getConf(DeltaSQLConf.DELTA_DML_METRICS_FROM_METADATA)) {
+      return None;
+    }
+    // No file to get metadata, return none to be consistent with metadata stats disabled
+    if (files.isEmpty) {
+      return None
+    }
+    // Return None if any file does not contain numLogicalRecords status
+    var count: Long = 0
+    for (file <- files) {
+      if (file.numLogicalRecords.isEmpty) {
+        return None
+      }
+      count += file.numLogicalRecords.get
+    }
+    metrics("numDeletedRows").set(count)
+    return Some(count)
+  }
+}
+
+/**
+ * Performs a Delete based on the search condition
+ *
+ * Algorithm:
+ *   1) Scan all the files and determine which files have
+ *      the rows that need to be deleted.
+ *   2) Traverse the affected files and rebuild the touched files.
+ *   3) Use the Delta protocol to atomically write the remaining rows to new files and remove
+ *      the affected files that are identified in step 1.
+ */
+case class DeleteCommand(
+    deltaLog: DeltaLog,
+    catalogTable: Option[CatalogTable],
+    target: LogicalPlan,
+    condition: Option[Expression])
+  extends LeafRunnableCommand with DeltaCommand with DeleteCommandMetrics {
+
+  override def innerChildren: Seq[QueryPlan[_]] = Seq(target)
+
+  override val output: Seq[Attribute] = Seq(AttributeReference("num_affected_rows", LongType)())
+
+  override lazy val metrics = createMetrics
+
+  final override def run(sparkSession: SparkSession): Seq[Row] = {
+    recordDeltaOperation(deltaLog, "delta.dml.delete") {
+      deltaLog.withNewTransaction(catalogTable) { txn =>
+        DeltaLog.assertRemovable(txn.snapshot)
+        if (hasBeenExecuted(txn, sparkSession)) {
+          sendDriverMetrics(sparkSession, metrics)
+          return Seq.empty
+        }
+
+        val deleteActions = performDelete(sparkSession, deltaLog, txn)
+        txn.commitIfNeeded(actions = deleteActions,
+          op = DeltaOperations.Delete(condition.toSeq),
+          tags = RowTracking.addPreservedRowTrackingTagIfNotSet(txn.snapshot))
+      }
+      // Re-cache all cached plans(including this relation itself, if it's cached) that refer to
+      // this data source relation.
+      sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, target)
+    }
+
+    // Adjust for deletes at partition boundaries. Deletes at partition boundaries is a metadata
+    // operation, therefore we don't actually have any information around how many rows were deleted
+    // While this info may exist in the file statistics, it's not guaranteed that we have these
+    // statistics. To avoid any performance regressions, we currently just return a -1 in such cases
+    if (metrics("numRemovedFiles").value > 0 && metrics("numDeletedRows").value == 0) {
+      Seq(Row(-1L))
+    } else {
+      Seq(Row(metrics("numDeletedRows").value))
+    }
+  }
+
+  def performDelete(
+      sparkSession: SparkSession,
+      deltaLog: DeltaLog,
+      txn: OptimisticTransaction): Seq[Action] = {
+    import org.apache.spark.sql.delta.implicits._
+
+    var numRemovedFiles: Long = 0
+    var numAddedFiles: Long = 0
+    var numAddedChangeFiles: Long = 0
+    var scanTimeMs: Long = 0
+    var rewriteTimeMs: Long = 0
+    var numAddedBytes: Long = 0
+    var changeFileBytes: Long = 0
+    var numRemovedBytes: Long = 0
+    var numFilesBeforeSkipping: Long = 0
+    var numBytesBeforeSkipping: Long = 0
+    var numFilesAfterSkipping: Long = 0
+    var numBytesAfterSkipping: Long = 0
+    var numPartitionsAfterSkipping: Option[Long] = None
+    var numPartitionsRemovedFrom: Option[Long] = None
+    var numPartitionsAddedTo: Option[Long] = None
+    var numDeletedRows: Option[Long] = None
+    var numCopiedRows: Option[Long] = None
+    var numDeletionVectorsAdded: Long = 0
+    var numDeletionVectorsRemoved: Long = 0
+    var numDeletionVectorsUpdated: Long = 0
+
+    val startTime = System.nanoTime()
+    val numFilesTotal = txn.snapshot.numOfFiles
+
+    val deleteActions: Seq[Action] = condition match {
+      case None =>
+        // Case 1: Delete the whole table if the condition is true
+        val reportRowLevelMetrics = conf.getConf(DeltaSQLConf.DELTA_DML_METRICS_FROM_METADATA)
+        val allFiles = txn.filterFiles(Nil, keepNumRecords = reportRowLevelMetrics)
+
+        numRemovedFiles = allFiles.size
+        numDeletionVectorsRemoved = allFiles.count(_.deletionVector != null)
+        scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+        val (numBytes, numPartitions) = totalBytesAndDistinctPartitionValues(allFiles)
+        numRemovedBytes = numBytes
+        numFilesBeforeSkipping = numRemovedFiles
+        numBytesBeforeSkipping = numBytes
+        numFilesAfterSkipping = numRemovedFiles
+        numBytesAfterSkipping = numBytes
+        numDeletedRows = getDeletedRowsFromAddFilesAndUpdateMetrics(allFiles)
+
+        if (txn.metadata.partitionColumns.nonEmpty) {
+          numPartitionsAfterSkipping = Some(numPartitions)
+          numPartitionsRemovedFrom = Some(numPartitions)
+          numPartitionsAddedTo = Some(0)
+        }
+        val operationTimestamp = System.currentTimeMillis()
+        allFiles.map(_.removeWithTimestamp(operationTimestamp))
+      case Some(cond) =>
+        val (metadataPredicates, otherPredicates) =
+          DeltaTableUtils.splitMetadataAndDataPredicates(
+            cond, txn.metadata.partitionColumns, sparkSession)
+
+        numFilesBeforeSkipping = txn.snapshot.numOfFiles
+        numBytesBeforeSkipping = txn.snapshot.sizeInBytes
+
+        if (otherPredicates.isEmpty) {
+          // Case 2: The condition can be evaluated using metadata only.
+          //         Delete a set of files without the need of scanning any data files.
+          val operationTimestamp = System.currentTimeMillis()
+          val reportRowLevelMetrics = conf.getConf(DeltaSQLConf.DELTA_DML_METRICS_FROM_METADATA)
+          val candidateFiles =
+            txn.filterFiles(metadataPredicates, keepNumRecords = reportRowLevelMetrics)
+
+          scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+          numRemovedFiles = candidateFiles.size
+          numRemovedBytes = candidateFiles.map(_.size).sum
+          numFilesAfterSkipping = candidateFiles.size
+          numDeletionVectorsRemoved = candidateFiles.count(_.deletionVector != null)
+          val (numCandidateBytes, numCandidatePartitions) =
+            totalBytesAndDistinctPartitionValues(candidateFiles)
+          numBytesAfterSkipping = numCandidateBytes
+          numDeletedRows = getDeletedRowsFromAddFilesAndUpdateMetrics(candidateFiles)
+
+          if (txn.metadata.partitionColumns.nonEmpty) {
+            numPartitionsAfterSkipping = Some(numCandidatePartitions)
+            numPartitionsRemovedFrom = Some(numCandidatePartitions)
+            numPartitionsAddedTo = Some(0)
+          }
+          candidateFiles.map(_.removeWithTimestamp(operationTimestamp))
+        } else {
+          // Case 3: Delete the rows based on the condition.
+
+          // Should we write the DVs to represent the deleted rows?
+          val shouldWriteDVs = shouldWritePersistentDeletionVectors(sparkSession, txn)
+
+          val candidateFiles = txn.filterFiles(
+            metadataPredicates ++ otherPredicates,
+            keepNumRecords = shouldWriteDVs)
+          // `candidateFiles` contains the files filtered using statistics and delete condition
+          // They may or may not contains any rows that need to be deleted.
+
+          numFilesAfterSkipping = candidateFiles.size
+          val (numCandidateBytes, numCandidatePartitions) =
+            totalBytesAndDistinctPartitionValues(candidateFiles)
+          numBytesAfterSkipping = numCandidateBytes
+          if (txn.metadata.partitionColumns.nonEmpty) {
+            numPartitionsAfterSkipping = Some(numCandidatePartitions)
+          }
+
+          val nameToAddFileMap = generateCandidateFileMap(deltaLog.dataPath, candidateFiles)
+
+          val fileIndex = new TahoeBatchFileIndex(
+            sparkSession, "delete", candidateFiles, deltaLog, deltaLog.dataPath, txn.snapshot)
+          if (shouldWriteDVs) {
+            val targetDf = DMLWithDeletionVectorsHelper.createTargetDfForScanningForMatches(
+              sparkSession,
+              target,
+              fileIndex)
+
+            // Does the target table already has DVs enabled? If so, we need to read the table
+            // with deletion vectors.
+            val mustReadDeletionVectors = DeletionVectorUtils.deletionVectorsReadable(txn.snapshot)
+
+            val touchedFiles = DMLWithDeletionVectorsHelper.findTouchedFiles(
+              sparkSession,
+              txn,
+              mustReadDeletionVectors,
+              deltaLog,
+              targetDf,
+              fileIndex,
+              cond,
+              opName = "DELETE")
+
+            if (touchedFiles.nonEmpty) {
+              val (actions, metricMap) = DMLWithDeletionVectorsHelper.processUnmodifiedData(
+                sparkSession,
+                touchedFiles,
+                txn.snapshot)
+              metrics("numDeletedRows").set(metricMap("numModifiedRows"))
+              numDeletionVectorsAdded = metricMap("numDeletionVectorsAdded")
+              numDeletionVectorsRemoved = metricMap("numDeletionVectorsRemoved")
+              numDeletionVectorsUpdated = metricMap("numDeletionVectorsUpdated")
+              numRemovedFiles = metricMap("numRemovedFiles")
+              actions
+            } else {
+              Nil // Nothing to update
+            }
+          } else {
+            // Keep everything from the resolved target except a new TahoeFileIndex
+            // that only involves the affected files instead of all files.
+            val newTarget = DeltaTableUtils.replaceFileIndex(target, fileIndex)
+            val data = Dataset.ofRows(sparkSession, newTarget)
+            val incrDeletedCountExpr = IncrementMetric(TrueLiteral, metrics("numDeletedRows"))
+            val filesToRewrite =
+              withStatusCode("DELTA", FINDING_TOUCHED_FILES_MSG) {
+                if (candidateFiles.isEmpty) {
+                  Array.empty[String]
+                } else {
+                  // --- modified start
+                  data.filter(new Column(cond))
+                    .select(input_file_name().as("input_files"))
+                    .filter(new Column(incrDeletedCountExpr))
+                    .select(explode(split(col("input_files"), ",")))
+                    .distinct()
+                    .as[String]
+                    .collect()
+                  // --- modified end
+                }
+              }
+
+            numRemovedFiles = filesToRewrite.length
+            scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+            if (filesToRewrite.isEmpty) {
+              // Case 3.1: no row matches and no delete will be triggered
+              if (txn.metadata.partitionColumns.nonEmpty) {
+                numPartitionsRemovedFrom = Some(0)
+                numPartitionsAddedTo = Some(0)
+              }
+              Nil
+            } else {
+              // Case 3.2: some files need an update to remove the deleted files
+              // Do the second pass and just read the affected files
+              val baseRelation = buildBaseRelation(
+                sparkSession, txn, "delete", deltaLog.dataPath, filesToRewrite, nameToAddFileMap)
+              // Keep everything from the resolved target except a new TahoeFileIndex
+              // that only involves the affected files instead of all files.
+              val newTarget = DeltaTableUtils.replaceFileIndex(target, baseRelation.location)
+              val targetDF = RowTracking.preserveRowTrackingColumns(
+                dfWithoutRowTrackingColumns = Dataset.ofRows(sparkSession, newTarget),
+                snapshot = txn.snapshot)
+              val filterCond = Not(EqualNullSafe(cond, Literal.TrueLiteral))
+              val rewrittenActions = rewriteFiles(txn, targetDF, filterCond, filesToRewrite.length)
+              val (changeFiles, rewrittenFiles) = rewrittenActions
+                .partition(_.isInstanceOf[AddCDCFile])
+              numAddedFiles = rewrittenFiles.size
+              val removedFiles = filesToRewrite.map(f =>
+                getTouchedFile(deltaLog.dataPath, f, nameToAddFileMap))
+              val (removedBytes, removedPartitions) =
+                totalBytesAndDistinctPartitionValues(removedFiles)
+              numRemovedBytes = removedBytes
+              val (rewrittenBytes, rewrittenPartitions) =
+                totalBytesAndDistinctPartitionValues(rewrittenFiles)
+              numAddedBytes = rewrittenBytes
+              if (txn.metadata.partitionColumns.nonEmpty) {
+                numPartitionsRemovedFrom = Some(removedPartitions)
+                numPartitionsAddedTo = Some(rewrittenPartitions)
+              }
+              numAddedChangeFiles = changeFiles.size
+              changeFileBytes = changeFiles.collect { case f: AddCDCFile => f.size }.sum
+              rewriteTimeMs =
+                TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime) - scanTimeMs
+              numDeletedRows = Some(metrics("numDeletedRows").value)
+              numCopiedRows =
+                Some(metrics("numTouchedRows").value - metrics("numDeletedRows").value)
+              numDeletionVectorsRemoved = removedFiles.count(_.deletionVector != null)
+              val operationTimestamp = System.currentTimeMillis()
+              removeFilesFromPaths(
+                deltaLog, nameToAddFileMap, filesToRewrite, operationTimestamp) ++ rewrittenActions
+            }
+          }
+        }
+    }
+    metrics("numRemovedFiles").set(numRemovedFiles)
+    metrics("numAddedFiles").set(numAddedFiles)
+    val executionTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+    metrics("executionTimeMs").set(executionTimeMs)
+    metrics("scanTimeMs").set(scanTimeMs)
+    metrics("rewriteTimeMs").set(rewriteTimeMs)
+    metrics("numAddedChangeFiles").set(numAddedChangeFiles)
+    metrics("changeFileBytes").set(changeFileBytes)
+    metrics("numAddedBytes").set(numAddedBytes)
+    metrics("numRemovedBytes").set(numRemovedBytes)
+    metrics("numFilesBeforeSkipping").set(numFilesBeforeSkipping)
+    metrics("numBytesBeforeSkipping").set(numBytesBeforeSkipping)
+    metrics("numFilesAfterSkipping").set(numFilesAfterSkipping)
+    metrics("numBytesAfterSkipping").set(numBytesAfterSkipping)
+    metrics("numDeletionVectorsAdded").set(numDeletionVectorsAdded)
+    metrics("numDeletionVectorsRemoved").set(numDeletionVectorsRemoved)
+    metrics("numDeletionVectorsUpdated").set(numDeletionVectorsUpdated)
+    numPartitionsAfterSkipping.foreach(metrics("numPartitionsAfterSkipping").set)
+    numPartitionsAddedTo.foreach(metrics("numPartitionsAddedTo").set)
+    numPartitionsRemovedFrom.foreach(metrics("numPartitionsRemovedFrom").set)
+    numCopiedRows.foreach(metrics("numCopiedRows").set)
+    txn.registerSQLMetrics(sparkSession, metrics)
+    sendDriverMetrics(sparkSession, metrics)
+
+    recordDeltaEvent(
+      deltaLog,
+      "delta.dml.delete.stats",
+      data = DeleteMetric(
+        condition = condition.map(_.sql).getOrElse("true"),
+        numFilesTotal,
+        numFilesAfterSkipping,
+        numAddedFiles,
+        numRemovedFiles,
+        numAddedFiles,
+        numAddedChangeFiles = numAddedChangeFiles,
+        numFilesBeforeSkipping,
+        numBytesBeforeSkipping,
+        numFilesAfterSkipping,
+        numBytesAfterSkipping,
+        numPartitionsAfterSkipping,
+        numPartitionsAddedTo,
+        numPartitionsRemovedFrom,
+        numCopiedRows,
+        numDeletedRows,
+        numAddedBytes,
+        numRemovedBytes,
+        changeFileBytes = changeFileBytes,
+        scanTimeMs,
+        rewriteTimeMs,
+        numDeletionVectorsAdded,
+        numDeletionVectorsRemoved,
+        numDeletionVectorsUpdated)
+    )
+
+    if (deleteActions.nonEmpty) {
+      createSetTransaction(sparkSession, deltaLog).toSeq ++ deleteActions
+    } else {
+      Seq.empty
+    }
+  }
+
+  /**
+   * Returns the list of [[AddFile]]s and [[AddCDCFile]]s that have been re-written.
+   */
+  private def rewriteFiles(
+      txn: OptimisticTransaction,
+      baseData: DataFrame,
+      filterCondition: Expression,
+      numFilesToRewrite: Long): Seq[FileAction] = {
+    val shouldWriteCdc = DeltaConfigs.CHANGE_DATA_FEED.fromMetaData(txn.metadata)
+
+    // number of total rows that we have seen / are either copying or deleting (sum of both).
+    val incrTouchedCountExpr = IncrementMetric(TrueLiteral, metrics("numTouchedRows"))
+
+    withStatusCode(
+      "DELTA", rewritingFilesMsg(numFilesToRewrite)) {
+      val dfToWrite = if (shouldWriteCdc) {
+        import org.apache.spark.sql.delta.commands.cdc.CDCReader._
+        // The logic here ends up being surprisingly elegant, with all source rows ending up in
+        // the output. Recall that we flipped the user-provided delete condition earlier, before the
+        // call to `rewriteFiles`. All rows which match this latest `filterCondition` are retained
+        // as table data, while all rows which don't match are removed from the rewritten table data
+        // but do get included in the output as CDC events.
+        baseData
+          .filter(new Column(incrTouchedCountExpr))
+          .withColumn(
+            CDC_TYPE_COLUMN_NAME,
+            new Column(If(filterCondition, CDC_TYPE_NOT_CDC, CDC_TYPE_DELETE))
+          )
+      } else {
+        baseData
+          .filter(new Column(incrTouchedCountExpr))
+          .filter(new Column(filterCondition))
+      }
+
+      txn.writeFiles(dfToWrite)
+    }
+  }
+
+  def shouldWritePersistentDeletionVectors(
+      spark: SparkSession, txn: OptimisticTransaction): Boolean = {
+    spark.conf.get(DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS) &&
+      DeletionVectorUtils.deletionVectorsWritable(txn.snapshot)
+  }
+}
+
+object DeleteCommand {
+  def apply(delete: DeltaDelete): DeleteCommand = {
+    EliminateSubqueryAliases(delete.child) match {
+      case DeltaFullTable(relation, fileIndex) =>
+        DeleteCommand(fileIndex.deltaLog, relation.catalogTable, delete.child, delete.condition)
+      case o =>
+        throw DeltaErrors.notADeltaSourceException("DELETE", Some(o))
+    }
+  }
+
+  val FILE_NAME_COLUMN: String = "_input_file_name_"
+  val FINDING_TOUCHED_FILES_MSG: String = "Finding files to rewrite for DELETE operation"
+
+  def rewritingFilesMsg(numFilesToRewrite: Long): String =
+    s"Rewriting $numFilesToRewrite files for DELETE operation"
+}
+
+/**
+ * Used to report details about delete.
+ *
+ * @param condition: what was the delete condition
+ * @param numFilesTotal: how big is the table
+ * @param numTouchedFiles: how many files did we touch. Alias for `numFilesAfterSkipping`
+ * @param numRewrittenFiles: how many files had to be rewritten. Alias for `numAddedFiles`
+ * @param numRemovedFiles: how many files we removed. Alias for `numTouchedFiles`
+ * @param numAddedFiles: how many files we added. Alias for `numRewrittenFiles`
+ * @param numAddedChangeFiles: how many change files were generated
+ * @param numFilesBeforeSkipping: how many candidate files before skipping
+ * @param numBytesBeforeSkipping: how many candidate bytes before skipping
+ * @param numFilesAfterSkipping: how many candidate files after skipping
+ * @param numBytesAfterSkipping: how many candidate bytes after skipping
+ * @param numPartitionsAfterSkipping: how many candidate partitions after skipping
+ * @param numPartitionsAddedTo: how many new partitions were added
+ * @param numPartitionsRemovedFrom: how many partitions were removed
+ * @param numCopiedRows: how many rows were copied
+ * @param numDeletedRows: how many rows were deleted
+ * @param numBytesAdded: how many bytes were added
+ * @param numBytesRemoved: how many bytes were removed
+ * @param changeFileBytes: total size of change files generated
+ * @param scanTimeMs: how long did finding take
+ * @param rewriteTimeMs: how long did rewriting take
+ * @param numDeletionVectorsAdded: how many deletion vectors were added
+ * @param numDeletionVectorsRemoved: how many deletion vectors were removed
+ * @param numDeletionVectorsUpdated: how many deletion vectors were updated
+ *
+ * @note All the time units are milliseconds.
+ */
+case class DeleteMetric(
+    condition: String,
+    numFilesTotal: Long,
+    numTouchedFiles: Long,
+    numRewrittenFiles: Long,
+    numRemovedFiles: Long,
+    numAddedFiles: Long,
+    numAddedChangeFiles: Long,
+    numFilesBeforeSkipping: Long,
+    numBytesBeforeSkipping: Long,
+    numFilesAfterSkipping: Long,
+    numBytesAfterSkipping: Long,
+    numPartitionsAfterSkipping: Option[Long],
+    numPartitionsAddedTo: Option[Long],
+    numPartitionsRemovedFrom: Option[Long],
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    numCopiedRows: Option[Long],
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    numDeletedRows: Option[Long],
+    numBytesAdded: Long,
+    numBytesRemoved: Long,
+    changeFileBytes: Long,
+    scanTimeMs: Long,
+    rewriteTimeMs: Long,
+    numDeletionVectorsAdded: Long,
+    numDeletionVectorsRemoved: Long,
+    numDeletionVectorsUpdated: Long
+)
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/OptimizeTableCommand.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/OptimizeTableCommand.scala
new file mode 100644
index 000000000000..5b2170220228
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/OptimizeTableCommand.scala
@@ -0,0 +1,608 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta.commands
+
+import java.util.ConcurrentModificationException
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.delta.skipping.MultiDimClustering
+import org.apache.spark.sql.delta.skipping.clustering.{ClusteredTableUtils, ClusteringColumnInfo}
+import org.apache.spark.sql.delta._
+import org.apache.spark.sql.delta.DeltaOperations.Operation
+import org.apache.spark.sql.delta.actions.{Action, AddFile, DeletionVectorDescriptor, FileAction, RemoveFile}
+import org.apache.spark.sql.delta.commands.optimize._
+import org.apache.spark.sql.delta.files.SQLMetricsReporting
+import org.apache.spark.sql.delta.schema.SchemaUtils
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext.SPARK_JOB_GROUP_ID
+import org.apache.spark.sql.{AnalysisException, Encoders, Row, SparkSession}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
+import org.apache.spark.sql.execution.command.RunnableCommand
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.metadata.AddMergeTreeParts
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.metric.SQLMetrics.createMetric
+import org.apache.spark.sql.types._
+import org.apache.spark.util.{SystemClock, ThreadUtils}
+
+/**
+ * Gluten overwrite Delta:
+ *
+ * This file is copied from Delta 3.2.0. It is modified in:
+ *   1. getDeltaTable supports to get ClickHouseTableV2
+ *   2. runOptimizeBinJobClickhouse
+ *   3. groupFilesIntoBinsClickhouse
+ */
+
+/** Base class defining abstract optimize command */
+abstract class OptimizeTableCommandBase extends RunnableCommand with DeltaCommand {
+
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("path", StringType)(),
+    AttributeReference("metrics", Encoders.product[OptimizeMetrics].schema)())
+
+  /**
+   * Validates ZOrderBy columns
+   * - validates that partitions columns are not used in `unresolvedZOrderByCols`
+   * - validates that we already collect stats for all the columns used in `unresolvedZOrderByCols`
+   *
+   * @param spark [[SparkSession]] to use
+   * @param txn the [[OptimisticTransaction]] being used to optimize
+   * @param unresolvedZOrderByCols Seq of [[UnresolvedAttribute]] corresponding to zOrderBy columns
+   */
+  def validateZorderByColumns(
+      spark: SparkSession,
+      txn: OptimisticTransaction,
+      unresolvedZOrderByCols: Seq[UnresolvedAttribute]): Unit = {
+    if (unresolvedZOrderByCols.isEmpty) return
+    val metadata = txn.snapshot.metadata
+    val partitionColumns = metadata.partitionColumns.toSet
+    val dataSchema =
+      StructType(metadata.schema.filterNot(c => partitionColumns.contains(c.name)))
+    val df = spark.createDataFrame(new java.util.ArrayList[Row](), dataSchema)
+    val checkColStat = spark.sessionState.conf.getConf(
+      DeltaSQLConf.DELTA_OPTIMIZE_ZORDER_COL_STAT_CHECK)
+    val statCollectionSchema = txn.snapshot.statCollectionLogicalSchema
+    val colsWithoutStats = ArrayBuffer[String]()
+
+    unresolvedZOrderByCols.foreach { colAttribute =>
+      val colName = colAttribute.name
+      if (checkColStat) {
+        try {
+          SchemaUtils.findColumnPosition(colAttribute.nameParts, statCollectionSchema)
+        } catch {
+          case e: AnalysisException if e.getMessage.contains("Couldn't find column") =>
+            colsWithoutStats.append(colName)
+        }
+      }
+      val isNameEqual = spark.sessionState.conf.resolver
+      if (partitionColumns.find(isNameEqual(_, colName)).nonEmpty) {
+        throw DeltaErrors.zOrderingOnPartitionColumnException(colName)
+      }
+      if (df.queryExecution.analyzed.resolve(colAttribute.nameParts, isNameEqual).isEmpty) {
+        throw DeltaErrors.zOrderingColumnDoesNotExistException(colName)
+      }
+    }
+    if (checkColStat && colsWithoutStats.nonEmpty) {
+      throw DeltaErrors.zOrderingOnColumnWithNoStatsException(
+        colsWithoutStats.toSeq, spark)
+    }
+  }
+}
+
+object OptimizeTableCommand {
+  /**
+   * Alternate constructor that converts a provided path or table identifier into the
+   * correct child LogicalPlan node. If both path and tableIdentifier are specified (or
+   * if both are None), this method will throw an exception. If a table identifier is
+   * specified, the child LogicalPlan will be an [[UnresolvedTable]] whereas if a path
+   * is specified, it will be an [[UnresolvedPathBasedDeltaTable]].
+   *
+   * Note that the returned OptimizeTableCommand will have an *unresolved* child table
+   * and hence, the command needs to be analyzed before it can be executed.
+   */
+  def apply(
+      path: Option[String],
+      tableIdentifier: Option[TableIdentifier],
+      userPartitionPredicates: Seq[String],
+      optimizeContext: DeltaOptimizeContext = DeltaOptimizeContext())(
+      zOrderBy: Seq[UnresolvedAttribute]): OptimizeTableCommand = {
+    val plan = UnresolvedDeltaPathOrIdentifier(path, tableIdentifier, "OPTIMIZE")
+    OptimizeTableCommand(plan, userPartitionPredicates, optimizeContext)(zOrderBy)
+  }
+}
+
+/**
+ * The `optimize` command implementation for Spark SQL. Example SQL:
+ * {{{
+ *    OPTIMIZE ('/path/to/dir' | delta.table) [WHERE part = 25];
+ * }}}
+ */
+case class OptimizeTableCommand(
+    override val child: LogicalPlan,
+    userPartitionPredicates: Seq[String],
+    optimizeContext: DeltaOptimizeContext
+)(val zOrderBy: Seq[UnresolvedAttribute])
+  extends OptimizeTableCommandBase with RunnableCommand with UnaryNode {
+
+  override val otherCopyArgs: Seq[AnyRef] = zOrderBy :: Nil
+
+  override protected def withNewChildInternal(newChild: LogicalPlan): OptimizeTableCommand =
+    copy(child = newChild)(zOrderBy)
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    // --- modified start
+    val table = OptimizeTableCommandOverwrites.getDeltaTable(child, "OPTIMIZE")
+    // --- modified end
+
+    val txn = table.startTransaction()
+    if (txn.readVersion == -1) {
+      throw DeltaErrors.notADeltaTableException(table.deltaLog.dataPath.toString)
+    }
+
+    if (ClusteredTableUtils.isSupported(txn.protocol)) {
+      if (userPartitionPredicates.nonEmpty) {
+        throw DeltaErrors.clusteringWithPartitionPredicatesException(userPartitionPredicates)
+      }
+      if (zOrderBy.nonEmpty) {
+        throw DeltaErrors.clusteringWithZOrderByException(zOrderBy)
+      }
+    }
+
+    val partitionColumns = txn.snapshot.metadata.partitionColumns
+    // Parse the predicate expression into Catalyst expression and verify only simple filters
+    // on partition columns are present
+
+    val partitionPredicates = userPartitionPredicates.flatMap { predicate =>
+        val predicates = parsePredicates(sparkSession, predicate)
+        verifyPartitionPredicates(
+          sparkSession,
+          partitionColumns,
+          predicates)
+        predicates
+    }
+
+    validateZorderByColumns(sparkSession, txn, zOrderBy)
+    val zOrderByColumns = zOrderBy.map(_.name).toSeq
+
+    new OptimizeExecutor(
+      sparkSession,
+      txn,
+      partitionPredicates,
+      zOrderByColumns,
+      isAutoCompact = false,
+      optimizeContext
+    ).optimize()
+  }
+}
+
+/**
+ * Stored all runtime context information that can control the execution of optimize.
+ *
+ * @param reorg The REORG operation that triggered the rewriting task, if any.
+ * @param minFileSize Files which are smaller than this threshold will be selected for compaction.
+ *                    If not specified, [[DeltaSQLConf.DELTA_OPTIMIZE_MIN_FILE_SIZE]] will be used.
+ *                    This parameter must be set to `0` when [[reorg]] is set.
+ * @param maxDeletedRowsRatio Files with a ratio of soft-deleted rows to the total rows larger than
+ *                            this threshold will be rewritten by the OPTIMIZE command. If not
+ *                            specified, [[DeltaSQLConf.DELTA_OPTIMIZE_MAX_DELETED_ROWS_RATIO]]
+ *                            will be used. This parameter must be set to `0` when [[reorg]] is set.
+ */
+case class DeltaOptimizeContext(
+    reorg: Option[DeltaReorgOperation] = None,
+    minFileSize: Option[Long] = None,
+    maxFileSize: Option[Long] = None,
+    maxDeletedRowsRatio: Option[Double] = None) {
+  if (reorg.nonEmpty) {
+    require(
+      minFileSize.contains(0L) && maxDeletedRowsRatio.contains(0d),
+      "minFileSize and maxDeletedRowsRatio must be 0 when running REORG TABLE.")
+  }
+}
+
+/**
+ * Optimize job which compacts small files into larger files to reduce
+ * the number of files and potentially allow more efficient reads.
+ *
+ * @param sparkSession Spark environment reference.
+ * @param txn The transaction used to optimize this table
+ * @param partitionPredicate List of partition predicates to select subset of files to optimize.
+ */
+class OptimizeExecutor(
+    sparkSession: SparkSession,
+    txn: OptimisticTransaction,
+    partitionPredicate: Seq[Expression],
+    zOrderByColumns: Seq[String],
+    isAutoCompact: Boolean,
+    optimizeContext: DeltaOptimizeContext)
+  extends DeltaCommand with SQLMetricsReporting with Serializable {
+
+  /**
+   * In which mode the Optimize command is running. There are three valid modes:
+   * 1. Compaction
+   * 2. ZOrder
+   * 3. Clustering
+   */
+  private val optimizeStrategy =
+    OptimizeTableStrategy(sparkSession, txn.snapshot, optimizeContext, zOrderByColumns)
+
+  /** Timestamp to use in [[FileAction]] */
+  private val operationTimestamp = new SystemClock().getTimeMillis()
+
+  private val isClusteredTable = ClusteredTableUtils.isSupported(txn.snapshot.protocol)
+
+  private val isMultiDimClustering =
+    optimizeStrategy.isInstanceOf[ClusteringStrategy] ||
+    optimizeStrategy.isInstanceOf[ZOrderStrategy]
+
+  private val clusteringColumns: Seq[String] = {
+    if (zOrderByColumns.nonEmpty) {
+      zOrderByColumns
+    } else if (isClusteredTable) {
+      ClusteringColumnInfo.extractLogicalNames(txn.snapshot)
+    } else {
+      Nil
+    }
+  }
+
+  def optimize(): Seq[Row] = {
+    recordDeltaOperation(txn.deltaLog, "delta.optimize") {
+
+      // --- modified start
+      val isMergeTreeFormat = ClickHouseConfig
+        .isMergeTreeFormatEngine(txn.deltaLog.unsafeVolatileMetadata.configuration)
+      // --- modified end
+
+      val minFileSize = optimizeContext.minFileSize.getOrElse(
+        sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MIN_FILE_SIZE))
+      val maxFileSize = optimizeContext.maxFileSize.getOrElse(
+        sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MAX_FILE_SIZE))
+      val maxDeletedRowsRatio = optimizeContext.maxDeletedRowsRatio.getOrElse(
+        sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MAX_DELETED_ROWS_RATIO))
+
+      val candidateFiles = txn.filterFiles(partitionPredicate, keepNumRecords = true)
+      val partitionSchema = txn.metadata.partitionSchema
+
+      val filesToProcess = optimizeContext.reorg match {
+        case Some(reorgOperation) => reorgOperation.filterFilesToReorg(txn.snapshot, candidateFiles)
+        case None => filterCandidateFileList(minFileSize, maxDeletedRowsRatio, candidateFiles)
+      }
+      // --- modified start
+      val maxThreads =
+        sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MAX_THREADS)
+      val (updates, jobs) = if (isMergeTreeFormat) {
+        val partitionsToCompact = filesToProcess
+          .groupBy(file => (file.asInstanceOf[AddMergeTreeParts].bucketNum, file.partitionValues))
+          .toSeq
+        val jobs = OptimizeTableCommandOverwrites
+          .groupFilesIntoBinsClickhouse(partitionsToCompact, maxFileSize)
+        val updates = ThreadUtils.parmap(jobs, "OptimizeJob", maxThreads) {
+          partitionBinGroup =>
+          // --- modified start
+            OptimizeTableCommandOverwrites.runOptimizeBinJobClickhouse(
+            txn,
+            partitionBinGroup._1._2,
+            partitionBinGroup._1._1,
+            partitionBinGroup._2,
+            maxFileSize)
+          // --- modified end
+        }.flatten
+        // uniform the jobs type
+        (updates, jobs.map(v => (v._1._2 ++ Map("bucketNum" -> v._1.toString()), v._2)))
+      } else {
+        val partitionsToCompact = filesToProcess.groupBy(_.partitionValues).toSeq
+
+        val jobs = groupFilesIntoBins(partitionsToCompact)
+
+        val updates = ThreadUtils.parmap(jobs, "OptimizeJob", maxThreads) { partitionBinGroup =>
+          runOptimizeBinJob(txn, partitionBinGroup._1, partitionBinGroup._2, maxFileSize)
+        }.flatten
+        (updates, jobs)
+      }
+      // --- modified end
+
+      val addedFiles = updates.collect { case a: AddFile => a }
+      val removedFiles = updates.collect { case r: RemoveFile => r }
+      val removedDVs = filesToProcess.filter(_.deletionVector != null).map(_.deletionVector).toSeq
+      if (addedFiles.size > 0) {
+        val metrics = createMetrics(sparkSession.sparkContext, addedFiles, removedFiles, removedDVs)
+        commitAndRetry(txn, getOperation(), updates, metrics) { newTxn =>
+          val newPartitionSchema = newTxn.metadata.partitionSchema
+          val candidateSetOld = candidateFiles.map(_.path).toSet
+          val candidateSetNew = newTxn.filterFiles(partitionPredicate).map(_.path).toSet
+
+          // As long as all of the files that we compacted are still part of the table,
+          // and the partitioning has not changed it is valid to continue to try
+          // and commit this checkpoint.
+          if (candidateSetOld.subsetOf(candidateSetNew) && partitionSchema == newPartitionSchema) {
+            true
+          } else {
+            val deleted = candidateSetOld -- candidateSetNew
+            logWarning(s"The following compacted files were delete " +
+              s"during checkpoint ${deleted.mkString(",")}. Aborting the compaction.")
+            false
+          }
+        }
+      }
+
+      val optimizeStats = OptimizeStats()
+      optimizeStats.addedFilesSizeStats.merge(addedFiles)
+      optimizeStats.removedFilesSizeStats.merge(removedFiles)
+      optimizeStats.numPartitionsOptimized = jobs.map(j => j._1).distinct.size
+      optimizeStats.numBatches = jobs.size
+      optimizeStats.totalConsideredFiles = candidateFiles.size
+      optimizeStats.totalFilesSkipped = optimizeStats.totalConsideredFiles - removedFiles.size
+      optimizeStats.totalClusterParallelism = sparkSession.sparkContext.defaultParallelism
+      val numTableColumns = txn.snapshot.metadata.schema.size
+      optimizeStats.numTableColumns = numTableColumns
+      optimizeStats.numTableColumnsWithStats =
+        DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.fromMetaData(txn.snapshot.metadata)
+          .min(numTableColumns)
+      if (removedDVs.size > 0) {
+        optimizeStats.deletionVectorStats = Some(DeletionVectorStats(
+          numDeletionVectorsRemoved = removedDVs.size,
+          numDeletionVectorRowsRemoved = removedDVs.map(_.cardinality).sum))
+      }
+
+      optimizeStrategy.updateOptimizeStats(optimizeStats, removedFiles, jobs)
+
+      return Seq(Row(txn.deltaLog.dataPath.toString, optimizeStats.toOptimizeMetrics))
+    }
+  }
+
+  /**
+   * Helper method to prune the list of selected files based on fileSize and ratio of
+   * deleted rows according to the deletion vector in [[AddFile]].
+   */
+  private def filterCandidateFileList(
+      minFileSize: Long, maxDeletedRowsRatio: Double, files: Seq[AddFile]): Seq[AddFile] = {
+
+    // Select all files in case of multi-dimensional clustering
+    if (isMultiDimClustering) return files
+
+    def shouldCompactBecauseOfDeletedRows(file: AddFile): Boolean = {
+      // Always compact files with DVs but without numRecords stats.
+      // This may be overly aggressive, but it fixes the problem in the long-term,
+      // as the compacted files will have stats.
+      (file.deletionVector != null && file.numPhysicalRecords.isEmpty) ||
+          file.deletedToPhysicalRecordsRatio.getOrElse(0d) > maxDeletedRowsRatio
+    }
+
+    // Select files that are small or have too many deleted rows
+    files.filter(
+      addFile => addFile.size < minFileSize || shouldCompactBecauseOfDeletedRows(addFile))
+  }
+
+  /**
+   * Utility methods to group files into bins for optimize.
+   *
+   * @param partitionsToCompact List of files to compact group by partition.
+   *                            Partition is defined by the partition values (partCol -> partValue)
+   * @return Sequence of bins. Each bin contains one or more files from the same
+   *         partition and targeted for one output file.
+   */
+  private def groupFilesIntoBins(
+      partitionsToCompact: Seq[(Map[String, String], Seq[AddFile])])
+  : Seq[(Map[String, String], Seq[AddFile])] = {
+    val maxBinSize = optimizeStrategy.maxBinSize
+    partitionsToCompact.flatMap {
+      case (partition, files) =>
+        val bins = new ArrayBuffer[Seq[AddFile]]()
+
+        val currentBin = new ArrayBuffer[AddFile]()
+        var currentBinSize = 0L
+
+        val preparedFiles = optimizeStrategy.prepareFilesPerPartition(files)
+        preparedFiles.foreach { file =>
+          // Generally, a bin is a group of existing files, whose total size does not exceed the
+          // desired maxBinSize. The output file size depends on the mode:
+          // 1. Compaction: Files in a bin will be coalesced into a single output file.
+          // 2. ZOrder:  all files in a partition will be read by the
+          //    same job, the data will be range-partitioned and
+          //    numFiles = totalFileSize / maxFileSize will be produced.
+          // 3. Clustering: Files in a bin belongs to one ZCUBE, the data will be
+          //    range-partitioned and numFiles = totalFileSize / maxFileSize.
+          if (file.size + currentBinSize > maxBinSize) {
+            bins += currentBin.toVector
+            currentBin.clear()
+            currentBin += file
+            currentBinSize = file.size
+          } else {
+            currentBin += file
+            currentBinSize += file.size
+          }
+        }
+
+        if (currentBin.nonEmpty) {
+          bins += currentBin.toVector
+        }
+
+        bins.filter { bin =>
+          bin.size > 1 || // bin has more than one file or
+          bin.size == 1 && optimizeContext.reorg.nonEmpty || // always rewrite files during reorg
+          isMultiDimClustering // multi-clustering
+        }.map(b => (partition, b))
+    }
+  }
+
+  /**
+   * Utility method to run a Spark job to compact the files in given bin
+   *
+   * @param txn [[OptimisticTransaction]] instance in use to commit the changes to DeltaLog.
+   * @param partition Partition values of the partition that files in [[bin]] belongs to.
+   * @param bin List of files to compact into one large file.
+   * @param maxFileSize Targeted output file size in bytes
+   */
+  private def runOptimizeBinJob(
+      txn: OptimisticTransaction,
+      partition: Map[String, String],
+      bin: Seq[AddFile],
+      maxFileSize: Long): Seq[FileAction] = {
+    val baseTablePath = txn.deltaLog.dataPath
+
+    var input = txn.deltaLog.createDataFrame(txn.snapshot, bin, actionTypeOpt = Some("Optimize"))
+    input = RowTracking.preserveRowTrackingColumns(input, txn.snapshot)
+    val repartitionDF = if (isMultiDimClustering) {
+      val totalSize = bin.map(_.size).sum
+      val approxNumFiles = Math.max(1, totalSize / maxFileSize).toInt
+      MultiDimClustering.cluster(
+        input,
+        approxNumFiles,
+        clusteringColumns,
+        optimizeStrategy.curve)
+    } else {
+      val useRepartition = sparkSession.sessionState.conf.getConf(
+        DeltaSQLConf.DELTA_OPTIMIZE_REPARTITION_ENABLED)
+      if (useRepartition) {
+        input.repartition(numPartitions = 1)
+      } else {
+        input.coalesce(numPartitions = 1)
+      }
+    }
+
+    val partitionDesc = partition.toSeq.map(entry => entry._1 + "=" + entry._2).mkString(",")
+
+    val partitionName = if (partition.isEmpty) "" else s" in partition ($partitionDesc)"
+    val description = s"$baseTablePath<br/>Optimizing ${bin.size} files" + partitionName
+    sparkSession.sparkContext.setJobGroup(
+      sparkSession.sparkContext.getLocalProperty(SPARK_JOB_GROUP_ID),
+      description)
+
+    val binInfo = optimizeStrategy.initNewBin
+    val addFiles = txn.writeFiles(repartitionDF, None, isOptimize = true, Nil).collect {
+      case a: AddFile => optimizeStrategy.tagAddFile(a, binInfo)
+      case other =>
+        throw new IllegalStateException(
+          s"Unexpected action $other with type ${other.getClass}. File compaction job output" +
+              s"should only have AddFiles")
+    }
+    val removeFiles = bin.map(f => f.removeWithTimestamp(operationTimestamp, dataChange = false))
+    val updates = addFiles ++ removeFiles
+    updates
+  }
+
+  /**
+   * Attempts to commit the given actions to the log. In the case of a concurrent update,
+   * the given function will be invoked with a new transaction to allow custom conflict
+   * detection logic to indicate it is safe to try again, by returning `true`.
+   *
+   * This function will continue to try to commit to the log as long as `f` returns `true`,
+   * otherwise throws a subclass of [[ConcurrentModificationException]].
+   */
+  private def commitAndRetry(
+      txn: OptimisticTransaction,
+      optimizeOperation: Operation,
+      actions: Seq[Action],
+      metrics: Map[String, SQLMetric])(f: OptimisticTransaction => Boolean): Unit = {
+    try {
+      txn.registerSQLMetrics(sparkSession, metrics)
+      txn.commit(actions, optimizeOperation,
+        RowTracking.addPreservedRowTrackingTagIfNotSet(txn.snapshot))
+    } catch {
+      case e: ConcurrentModificationException =>
+        val newTxn = txn.deltaLog.startTransaction(txn.catalogTable)
+        if (f(newTxn)) {
+          logInfo("Retrying commit after checking for semantic conflicts with concurrent updates.")
+          commitAndRetry(newTxn, optimizeOperation, actions, metrics)(f)
+        } else {
+          logWarning("Semantic conflicts detected. Aborting operation.")
+          throw e
+        }
+    }
+  }
+
+  /** Create the appropriate [[Operation]] object for txn commit history */
+  private def getOperation(): Operation = {
+    if (optimizeContext.reorg.nonEmpty) {
+      DeltaOperations.Reorg(partitionPredicate)
+    } else {
+      DeltaOperations.Optimize(
+        predicate = partitionPredicate,
+        zOrderBy = zOrderByColumns,
+        auto = isAutoCompact,
+        clusterBy = if (isClusteredTable) Option(clusteringColumns).filter(_.nonEmpty) else None)
+    }
+  }
+
+  /** Create a map of SQL metrics for adding to the commit history. */
+  private def createMetrics(
+      sparkContext: SparkContext,
+      addedFiles: Seq[AddFile],
+      removedFiles: Seq[RemoveFile],
+      removedDVs: Seq[DeletionVectorDescriptor]): Map[String, SQLMetric] = {
+
+    def setAndReturnMetric(description: String, value: Long) = {
+      val metric = createMetric(sparkContext, description)
+      metric.set(value)
+      metric
+    }
+
+    def totalSize(actions: Seq[FileAction]): Long = {
+      var totalSize = 0L
+      actions.foreach { file =>
+        val fileSize = file match {
+          case addFile: AddFile => addFile.size
+          case removeFile: RemoveFile => removeFile.size.getOrElse(0L)
+          case default =>
+            throw new IllegalArgumentException(s"Unknown FileAction type: ${default.getClass}")
+        }
+        totalSize += fileSize
+      }
+      totalSize
+    }
+
+    val (deletionVectorRowsRemoved, deletionVectorBytesRemoved) =
+      removedDVs.map(dv => (dv.cardinality, dv.sizeInBytes.toLong))
+        .reduceLeftOption((dv1, dv2) => (dv1._1 + dv2._1, dv1._2 + dv2._2))
+        .getOrElse((0L, 0L))
+
+    val dvMetrics: Map[String, SQLMetric] = Map(
+      "numDeletionVectorsRemoved" ->
+        setAndReturnMetric(
+          "total number of deletion vectors removed",
+          removedDVs.size),
+      "numDeletionVectorRowsRemoved" ->
+        setAndReturnMetric(
+          "total number of deletion vector rows removed",
+          deletionVectorRowsRemoved),
+      "numDeletionVectorBytesRemoved" ->
+        setAndReturnMetric(
+          "total number of bytes of removed deletion vectors",
+          deletionVectorBytesRemoved))
+
+    val sizeStats = FileSizeStatsWithHistogram.create(addedFiles.map(_.size).sorted)
+    Map[String, SQLMetric](
+      "minFileSize" -> setAndReturnMetric("minimum file size", sizeStats.get.min),
+      "p25FileSize" -> setAndReturnMetric("25th percentile file size", sizeStats.get.p25),
+      "p50FileSize" -> setAndReturnMetric("50th percentile file size", sizeStats.get.p50),
+      "p75FileSize" -> setAndReturnMetric("75th percentile file size", sizeStats.get.p75),
+      "maxFileSize" -> setAndReturnMetric("maximum file size", sizeStats.get.max),
+      "numAddedFiles" -> setAndReturnMetric("total number of files added.", addedFiles.size),
+      "numRemovedFiles" -> setAndReturnMetric("total number of files removed.", removedFiles.size),
+      "numAddedBytes" -> setAndReturnMetric("total number of bytes added", totalSize(addedFiles)),
+      "numRemovedBytes" ->
+        setAndReturnMetric("total number of bytes removed", totalSize(removedFiles))
+    ) ++ dvMetrics
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala
new file mode 100644
index 000000000000..7b4c3231b8c3
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/OptimizeTableCommandOverwrites.scala
@@ -0,0 +1,345 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta.commands
+
+import org.apache.gluten.expression.ConverterUtils
+
+import org.apache.spark.{TaskContext, TaskOutputFileAlreadyExistException}
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
+import org.apache.spark.internal.io.SparkHadoopWriterUtils
+import org.apache.spark.shuffle.FetchFailedException
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.delta._
+import org.apache.spark.sql.delta.actions.{AddFile, FileAction}
+import org.apache.spark.sql.delta.catalog.{ClickHouseTableV2, DeltaTableV2}
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.datasources.CHDatasourceJniWrapper
+import org.apache.spark.sql.execution.datasources.v1.CHMergeTreeWriterInjects
+import org.apache.spark.sql.execution.datasources.v1.clickhouse._
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.metadata.{AddFileTags, AddMergeTreeParts}
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.utils.CHDataSourceUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.{SerializableConfiguration, SystemClock, Utils}
+
+import org.apache.hadoop.fs.{FileAlreadyExistsException, Path}
+import org.apache.hadoop.mapreduce.{TaskAttemptContext, TaskAttemptID, TaskID, TaskType}
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+
+import java.util.{Date, UUID}
+
+import scala.collection.mutable.ArrayBuffer
+
+object OptimizeTableCommandOverwrites extends Logging {
+
+  case class TaskDescription(
+      path: String,
+      database: String,
+      tableName: String,
+      snapshotId: String,
+      orderByKeyOption: Option[Seq[String]],
+      lowCardKeyOption: Option[Seq[String]],
+      minmaxIndexKeyOption: Option[Seq[String]],
+      bfIndexKeyOption: Option[Seq[String]],
+      setIndexKeyOption: Option[Seq[String]],
+      primaryKeyOption: Option[Seq[String]],
+      partitionColumns: Seq[String],
+      partList: Seq[String],
+      tableSchema: StructType,
+      clickhouseTableConfigs: Map[String, String],
+      serializableHadoopConf: SerializableConfiguration,
+      jobIdInstant: Long,
+      partitionDir: Option[String],
+      bucketDir: Option[String]
+  )
+
+  private def executeTask(
+      description: TaskDescription,
+      sparkStageId: Int,
+      sparkPartitionId: Int,
+      sparkAttemptNumber: Int
+  ): MergeTreeWriteTaskResult = {
+
+    val jobId = SparkHadoopWriterUtils.createJobID(new Date(description.jobIdInstant), sparkStageId)
+    val taskId = new TaskID(jobId, TaskType.MAP, sparkPartitionId)
+    val taskAttemptId = new TaskAttemptID(taskId, sparkAttemptNumber)
+
+    // Set up the attempt context required to use in the output committer.
+    val taskAttemptContext: TaskAttemptContext = {
+      // Set up the configuration object
+      val hadoopConf = description.serializableHadoopConf.value
+      hadoopConf.set("mapreduce.job.id", jobId.toString)
+      hadoopConf.set("mapreduce.task.id", taskAttemptId.getTaskID.toString)
+      hadoopConf.set("mapreduce.task.attempt.id", taskAttemptId.toString)
+      hadoopConf.setBoolean("mapreduce.task.ismap", true)
+      hadoopConf.setInt("mapreduce.task.partition", 0)
+
+      new TaskAttemptContextImpl(hadoopConf, taskAttemptId)
+    }
+
+    try {
+      Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
+
+        val uuid = UUID.randomUUID.toString
+
+        val planWithSplitInfo = CHMergeTreeWriterInjects.genMergeTreeWriteRel(
+          description.path,
+          description.database,
+          description.tableName,
+          description.snapshotId,
+          description.orderByKeyOption,
+          description.lowCardKeyOption,
+          description.minmaxIndexKeyOption,
+          description.bfIndexKeyOption,
+          description.setIndexKeyOption,
+          description.primaryKeyOption,
+          description.partitionColumns,
+          description.partList,
+          ConverterUtils.convertNamedStructJson(description.tableSchema),
+          description.clickhouseTableConfigs,
+          DataTypeUtils.toAttributes(description.tableSchema)
+        )
+
+        val datasourceJniWrapper = new CHDatasourceJniWrapper()
+        val returnedMetrics =
+          datasourceJniWrapper.nativeMergeMTParts(
+            planWithSplitInfo.plan,
+            planWithSplitInfo.splitInfo,
+            uuid,
+            taskId.getId.toString,
+            description.partitionDir.getOrElse(""),
+            description.bucketDir.getOrElse("")
+          )
+        if (returnedMetrics != null && returnedMetrics.nonEmpty) {
+          val addFiles = AddFileTags.partsMetricsToAddFile(
+            description.database,
+            description.tableName,
+            description.path,
+            returnedMetrics,
+            Seq(Utils.localHostName()))
+
+          val (taskCommitMessage, taskCommitTime) = Utils.timeTakenMs {
+            // committer.commitTask(taskAttemptContext)
+            new TaskCommitMessage(addFiles.toSeq)
+          }
+
+//          val summary = MergeTreeExecutedWriteSummary(
+//            updatedPartitions = updatedPartitions.toSet,
+//            stats = statsTrackers.map(_.getFinalStats(taskCommitTime)))
+          MergeTreeWriteTaskResult(taskCommitMessage, null)
+        } else {
+          throw new IllegalStateException()
+        }
+      })(
+        catchBlock = {
+          // If there is an error, abort the task
+          logError(s"Job $jobId aborted.")
+        },
+        finallyBlock = {})
+    } catch {
+      case e: FetchFailedException =>
+        throw e
+      case f: FileAlreadyExistsException if SQLConf.get.fastFailFileFormatOutput =>
+        // If any output file to write already exists, it does not make sense to re-run this task.
+        // We throw the exception and let Executor throw ExceptionFailure to abort the job.
+        throw new TaskOutputFileAlreadyExistException(f)
+      case t: Throwable =>
+        throw QueryExecutionErrors.taskFailedWhileWritingRowsError(description.path, t)
+    }
+
+  }
+
+  def runOptimizeBinJobClickhouse(
+      txn: OptimisticTransaction,
+      partitionValues: Map[String, String],
+      bucketNum: String,
+      bin: Seq[AddFile],
+      maxFileSize: Long): Seq[FileAction] = {
+    val tableV2 = ClickHouseTableV2.getTable(txn.deltaLog);
+
+    val sparkSession = SparkSession.getActiveSession.get
+
+    val rddWithNonEmptyPartitions =
+      sparkSession.sparkContext.parallelize(Array.empty[InternalRow], 1)
+
+    val jobIdInstant = new Date().getTime
+    val ret = new Array[MergeTreeWriteTaskResult](rddWithNonEmptyPartitions.partitions.length)
+
+    val serializableHadoopConf = new SerializableConfiguration(
+      sparkSession.sessionState.newHadoopConfWithOptions(
+        txn.metadata.configuration ++ txn.deltaLog.options))
+
+    val partitionDir = if (tableV2.partitionColumns.isEmpty) {
+      None
+    } else {
+      Some(tableV2.partitionColumns.map(c => c + "=" + partitionValues(c)).mkString("/"))
+    }
+
+    val bucketDir = if (tableV2.bucketOption.isEmpty) {
+      None
+    } else {
+      Some(bucketNum)
+    }
+
+    val description = TaskDescription.apply(
+      txn.deltaLog.dataPath.toString,
+      tableV2.dataBaseName,
+      tableV2.tableName,
+      ClickhouseSnapshot.genSnapshotId(tableV2.initialSnapshot),
+      tableV2.orderByKeyOption,
+      tableV2.lowCardKeyOption,
+      tableV2.minmaxIndexKeyOption,
+      tableV2.bfIndexKeyOption,
+      tableV2.setIndexKeyOption,
+      tableV2.primaryKeyOption,
+      tableV2.partitionColumns,
+      bin.map(_.asInstanceOf[AddMergeTreeParts].name),
+      tableV2.schema(),
+      tableV2.clickhouseTableConfigs,
+      serializableHadoopConf,
+      jobIdInstant,
+      partitionDir,
+      bucketDir
+    )
+    sparkSession.sparkContext.runJob(
+      rddWithNonEmptyPartitions,
+      (taskContext: TaskContext, _: Iterator[InternalRow]) => {
+        executeTask(
+          description,
+          taskContext.stageId(),
+          taskContext.partitionId(),
+          taskContext.taskAttemptId().toInt & Integer.MAX_VALUE
+        )
+      },
+      rddWithNonEmptyPartitions.partitions.indices,
+      (index, res: MergeTreeWriteTaskResult) => {
+        ret(index) = res
+      }
+    )
+
+    val addFiles = ret
+      .flatMap(_.commitMsg.obj.asInstanceOf[Seq[AddFile]])
+      .toSeq
+
+    val removeFiles =
+      bin.map(f => f.removeWithTimestamp(new SystemClock().getTimeMillis(), dataChange = false))
+    addFiles ++ removeFiles
+
+  }
+
+  def getDeltaLogClickhouse(
+      spark: SparkSession,
+      path: Option[String],
+      tableIdentifier: Option[TableIdentifier],
+      operationName: String,
+      hadoopConf: Map[String, String] = Map.empty): DeltaLog = {
+    val tablePath =
+      if (path.nonEmpty) {
+        new Path(path.get)
+      } else if (tableIdentifier.nonEmpty) {
+        val sessionCatalog = spark.sessionState.catalog
+        lazy val metadata = sessionCatalog.getTableMetadata(tableIdentifier.get)
+
+        if (CHDataSourceUtils.isClickhousePath(spark, tableIdentifier.get)) {
+          new Path(tableIdentifier.get.table)
+        } else if (CHDataSourceUtils.isClickHouseTable(spark, tableIdentifier.get)) {
+          new Path(metadata.location)
+        } else {
+          DeltaTableIdentifier(spark, tableIdentifier.get) match {
+            case Some(id) if id.path.nonEmpty =>
+              new Path(id.path.get)
+            case Some(id) if id.table.nonEmpty =>
+              new Path(metadata.location)
+            case _ =>
+              if (metadata.tableType == CatalogTableType.VIEW) {
+                throw DeltaErrors.viewNotSupported(operationName)
+              }
+              throw DeltaErrors.notADeltaTableException(operationName)
+          }
+        }
+      } else {
+        throw DeltaErrors.missingTableIdentifierException(operationName)
+      }
+
+    val startTime = Some(System.currentTimeMillis)
+    val deltaLog = DeltaLog.forTable(spark, tablePath, hadoopConf)
+    if (deltaLog.update(checkIfUpdatedSinceTs = startTime).version < 0) {
+      throw DeltaErrors.notADeltaTableException(
+        operationName,
+        DeltaTableIdentifier(path, tableIdentifier))
+    }
+    deltaLog
+  }
+
+  /**
+   * Extracts the [[DeltaTableV2]] from a LogicalPlan iff the LogicalPlan is a [[ResolvedTable]]
+   * with either a [[DeltaTableV2]] or a [[V1Table]] that is referencing a Delta table. In all
+   * other cases this method will throw a "Table not found" exception.
+   */
+  def getDeltaTable(target: LogicalPlan, cmd: String): DeltaTableV2 = {
+    // TODO: Remove this wrapper and let former callers invoke DeltaTableV2.extractFrom directly.
+    ClickHouseTableV2.extractFrom(target, cmd)
+  }
+
+  def groupFilesIntoBinsClickhouse(
+      partitionsToCompact: Seq[((String, Map[String, String]), Seq[AddFile])],
+      maxTargetFileSize: Long): Seq[((String, Map[String, String]), Seq[AddFile])] = {
+    partitionsToCompact.flatMap {
+      case (partition, files) =>
+        val bins = new ArrayBuffer[Seq[AddFile]]()
+
+        val currentBin = new ArrayBuffer[AddFile]()
+        var currentBinSize = 0L
+
+        files.sortBy(_.size).foreach {
+          file =>
+            // Generally, a bin is a group of existing files, whose total size does not exceed the
+            // desired maxFileSize. They will be coalesced into a single output file.
+            // However, if isMultiDimClustering = true, all files in a partition will be read by the
+            // same job, the data will be range-partitioned and
+            // numFiles = totalFileSize / maxFileSize
+            // will be produced. See below.
+
+            // isMultiDimClustering is always false for Gluten Clickhouse for now
+            if (file.size + currentBinSize > maxTargetFileSize /* && !isMultiDimClustering */ ) {
+              bins += currentBin.toVector
+              currentBin.clear()
+              currentBin += file
+              currentBinSize = file.size
+            } else {
+              currentBin += file
+              currentBinSize += file.size
+            }
+        }
+
+        if (currentBin.nonEmpty) {
+          bins += currentBin.toVector
+        }
+
+        bins
+          .map(b => (partition, b))
+          // select bins that have at least two files or in case of multi-dim clustering
+          // select all bins
+          .filter(_._2.size > 1 /* || isMultiDimClustering */ )
+    }
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/UpdateCommand.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/UpdateCommand.scala
new file mode 100644
index 000000000000..9a7fb96775f0
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/UpdateCommand.scala
@@ -0,0 +1,556 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta.commands
+
+// scalastyle:off import.ordering.noEmptyLine
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.sql.delta.metric.IncrementMetric
+import org.apache.spark.sql.delta._
+import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, FileAction}
+import org.apache.spark.sql.delta.commands.cdc.CDCReader.{CDC_TYPE_COLUMN_NAME, CDC_TYPE_NOT_CDC, CDC_TYPE_UPDATE_POSTIMAGE, CDC_TYPE_UPDATE_PREIMAGE}
+import org.apache.spark.sql.delta.files.{TahoeBatchFileIndex, TahoeFileIndex}
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, If, Literal}
+import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
+import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.command.LeafRunnableCommand
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.metric.SQLMetrics.{createMetric, createTimingMetric}
+import org.apache.spark.sql.functions.{array, col, explode, input_file_name, lit, split, struct}
+import org.apache.spark.sql.types.LongType
+
+/**
+ * Gluten overwrite Delta:
+ *
+ * This file is copied from Delta 3.2.0. It is modified to overcome the following issues:
+ *   1. In Clickhouse backend, we can't implement input_file_name() correctly, we can only implement
+ *      it so that it return a a list of filenames (concated by ',').
+ */
+
+/**
+ * Performs an Update using `updateExpression` on the rows that match `condition`
+ *
+ * Algorithm:
+ *   1) Identify the affected files, i.e., the files that may have the rows to be updated.
+ *   2) Scan affected files, apply the updates, and generate a new DF with updated rows.
+ *   3) Use the Delta protocol to atomically write the new DF as new files and remove
+ *      the affected files that are identified in step 1.
+ */
+case class UpdateCommand(
+    tahoeFileIndex: TahoeFileIndex,
+    catalogTable: Option[CatalogTable],
+    target: LogicalPlan,
+    updateExpressions: Seq[Expression],
+    condition: Option[Expression])
+  extends LeafRunnableCommand with DeltaCommand {
+
+  override val output: Seq[Attribute] = {
+    Seq(AttributeReference("num_affected_rows", LongType)())
+  }
+
+  override def innerChildren: Seq[QueryPlan[_]] = Seq(target)
+
+  @transient private lazy val sc: SparkContext = SparkContext.getOrCreate()
+
+  override lazy val metrics = Map[String, SQLMetric](
+    "numAddedFiles" -> createMetric(sc, "number of files added."),
+    "numAddedBytes" -> createMetric(sc, "number of bytes added"),
+    "numRemovedFiles" -> createMetric(sc, "number of files removed."),
+    "numRemovedBytes" -> createMetric(sc, "number of bytes removed"),
+    "numUpdatedRows" -> createMetric(sc, "number of rows updated."),
+    "numCopiedRows" -> createMetric(sc, "number of rows copied."),
+    "executionTimeMs" ->
+      createTimingMetric(sc, "time taken to execute the entire operation"),
+    "scanTimeMs" ->
+      createTimingMetric(sc, "time taken to scan the files for matches"),
+    "rewriteTimeMs" ->
+      createTimingMetric(sc, "time taken to rewrite the matched files"),
+    "numAddedChangeFiles" -> createMetric(sc, "number of change data capture files generated"),
+    "changeFileBytes" -> createMetric(sc, "total size of change data capture files generated"),
+    "numTouchedRows" -> createMetric(sc, "number of rows touched (copied + updated)"),
+    "numDeletionVectorsAdded" -> createMetric(sc, "number of deletion vectors added"),
+    "numDeletionVectorsRemoved" -> createMetric(sc, "number of deletion vectors removed"),
+    "numDeletionVectorsUpdated" -> createMetric(sc, "number of deletion vectors updated")
+  )
+
+  final override def run(sparkSession: SparkSession): Seq[Row] = {
+    recordDeltaOperation(tahoeFileIndex.deltaLog, "delta.dml.update") {
+      val deltaLog = tahoeFileIndex.deltaLog
+      deltaLog.withNewTransaction(catalogTable) { txn =>
+        DeltaLog.assertRemovable(txn.snapshot)
+        if (hasBeenExecuted(txn, sparkSession)) {
+          sendDriverMetrics(sparkSession, metrics)
+          return Seq.empty
+        }
+        performUpdate(sparkSession, deltaLog, txn)
+      }
+      // Re-cache all cached plans(including this relation itself, if it's cached) that refer to
+      // this data source relation.
+      sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, target)
+    }
+    Seq(Row(metrics("numUpdatedRows").value))
+  }
+
+  private def performUpdate(
+      sparkSession: SparkSession, deltaLog: DeltaLog, txn: OptimisticTransaction): Unit = {
+    import org.apache.spark.sql.delta.implicits._
+
+    var numTouchedFiles: Long = 0
+    var numRewrittenFiles: Long = 0
+    var numAddedBytes: Long = 0
+    var numRemovedBytes: Long = 0
+    var numAddedChangeFiles: Long = 0
+    var changeFileBytes: Long = 0
+    var scanTimeMs: Long = 0
+    var rewriteTimeMs: Long = 0
+    var numDeletionVectorsAdded: Long = 0
+    var numDeletionVectorsRemoved: Long = 0
+    var numDeletionVectorsUpdated: Long = 0
+
+    val startTime = System.nanoTime()
+    val numFilesTotal = txn.snapshot.numOfFiles
+
+    val updateCondition = condition.getOrElse(Literal.TrueLiteral)
+    val (metadataPredicates, dataPredicates) =
+      DeltaTableUtils.splitMetadataAndDataPredicates(
+        updateCondition, txn.metadata.partitionColumns, sparkSession)
+
+    // Should we write the DVs to represent updated rows?
+    val shouldWriteDeletionVectors = shouldWritePersistentDeletionVectors(sparkSession, txn)
+    val candidateFiles = txn.filterFiles(
+      metadataPredicates ++ dataPredicates,
+      keepNumRecords = shouldWriteDeletionVectors)
+
+    val nameToAddFile = generateCandidateFileMap(deltaLog.dataPath, candidateFiles)
+
+    scanTimeMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime)
+
+    val filesToRewrite: Seq[TouchedFileWithDV] = if (candidateFiles.isEmpty) {
+      // Case 1: Do nothing if no row qualifies the partition predicates
+      // that are part of Update condition
+      Nil
+    } else if (dataPredicates.isEmpty) {
+      // Case 2: Update all the rows from the files that are in the specified partitions
+      // when the data filter is empty
+      candidateFiles
+        .map(f => TouchedFileWithDV(f.path, f, newDeletionVector = null, deletedRows = 0L))
+    } else {
+      // Case 3: Find all the affected files using the user-specified condition
+      val fileIndex = new TahoeBatchFileIndex(
+        sparkSession, "update", candidateFiles, deltaLog, tahoeFileIndex.path, txn.snapshot)
+
+      val touchedFilesWithDV = if (shouldWriteDeletionVectors) {
+        // Case 3.1: Find all the affected files via DV path
+        val targetDf = DMLWithDeletionVectorsHelper.createTargetDfForScanningForMatches(
+          sparkSession,
+          target,
+          fileIndex)
+
+        // Does the target table already has DVs enabled? If so, we need to read the table
+        // with deletion vectors.
+        val mustReadDeletionVectors = DeletionVectorUtils.deletionVectorsReadable(txn.snapshot)
+
+        DMLWithDeletionVectorsHelper.findTouchedFiles(
+          sparkSession,
+          txn,
+          mustReadDeletionVectors,
+          deltaLog,
+          targetDf,
+          fileIndex,
+          updateCondition,
+          opName = "UPDATE")
+      } else {
+        // Case 3.2: Find all the affected files using the non-DV path
+        // Keep everything from the resolved target except a new TahoeFileIndex
+        // that only involves the affected files instead of all files.
+        val newTarget = DeltaTableUtils.replaceFileIndex(target, fileIndex)
+        val data = Dataset.ofRows(sparkSession, newTarget)
+        val incrUpdatedCountExpr = IncrementMetric(TrueLiteral, metrics("numUpdatedRows"))
+        val pathsToRewrite =
+          withStatusCode("DELTA", UpdateCommand.FINDING_TOUCHED_FILES_MSG) {
+            // --- modified start
+            data.filter(new Column(updateCondition))
+              .select(input_file_name().as("input_files"))
+              .filter(new Column(incrUpdatedCountExpr))
+              .select(explode(split(col("input_files"), ",")))
+              .distinct()
+              .as[String]
+              .collect()
+            // --- modified end
+          }
+
+        // Wrap AddFile into TouchedFileWithDV that has empty DV.
+        pathsToRewrite
+          .map(getTouchedFile(deltaLog.dataPath, _, nameToAddFile))
+          .map(f => TouchedFileWithDV(f.path, f, newDeletionVector = null, deletedRows = 0L))
+          .toSeq
+      }
+      // Refresh scan time for Case 3, since we performed scan here.
+      scanTimeMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime)
+      touchedFilesWithDV
+    }
+
+    val totalActions = {
+      // When DV is on, we first mask removed rows with DVs and generate (remove, add) pairs.
+      val actionsForExistingFiles = if (shouldWriteDeletionVectors) {
+        // When there's no data predicate, all matched files are removed.
+        if (dataPredicates.isEmpty) {
+          val operationTimestamp = System.currentTimeMillis()
+          filesToRewrite.map(_.fileLogEntry.removeWithTimestamp(operationTimestamp))
+        } else {
+          // When there is data predicate, we generate (remove, add) pairs.
+          val filesToRewriteWithDV = filesToRewrite.filter(_.newDeletionVector != null)
+          val (dvActions, metricMap) = DMLWithDeletionVectorsHelper.processUnmodifiedData(
+            sparkSession,
+            filesToRewriteWithDV,
+            txn.snapshot)
+          metrics("numUpdatedRows").set(metricMap("numModifiedRows"))
+          numDeletionVectorsAdded = metricMap("numDeletionVectorsAdded")
+          numDeletionVectorsRemoved = metricMap("numDeletionVectorsRemoved")
+          numDeletionVectorsUpdated = metricMap("numDeletionVectorsUpdated")
+          numTouchedFiles = metricMap("numRemovedFiles")
+          dvActions
+        }
+      } else {
+        // Without DV we'll leave the job to `rewriteFiles`.
+        Nil
+      }
+
+      // When DV is on, we write out updated rows only. The return value will be only `add` actions.
+      // When DV is off, we write out updated rows plus unmodified rows from the same file, then
+      // return `add` and `remove` actions.
+      val rewriteStartNs = System.nanoTime()
+      val actionsForNewFiles =
+        withStatusCode("DELTA", UpdateCommand.rewritingFilesMsg(filesToRewrite.size)) {
+          if (filesToRewrite.nonEmpty) {
+            rewriteFiles(
+              sparkSession,
+              txn,
+              rootPath = tahoeFileIndex.path,
+              inputLeafFiles = filesToRewrite.map(_.fileLogEntry),
+              nameToAddFileMap = nameToAddFile,
+              condition = updateCondition,
+              generateRemoveFileActions = !shouldWriteDeletionVectors,
+              copyUnmodifiedRows = !shouldWriteDeletionVectors)
+          } else {
+            Nil
+          }
+        }
+      rewriteTimeMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - rewriteStartNs)
+
+      numTouchedFiles = filesToRewrite.length
+      val (addActions, removeActions) = actionsForNewFiles.partition(_.isInstanceOf[AddFile])
+      numRewrittenFiles = addActions.size
+      numAddedBytes = addActions.map(_.getFileSize).sum
+      numRemovedBytes = removeActions.map(_.getFileSize).sum
+
+      actionsForExistingFiles ++ actionsForNewFiles
+    }
+
+    val changeActions = totalActions.collect { case f: AddCDCFile => f }
+    numAddedChangeFiles = changeActions.size
+    changeFileBytes = changeActions.map(_.size).sum
+
+    metrics("numAddedFiles").set(numRewrittenFiles)
+    metrics("numAddedBytes").set(numAddedBytes)
+    metrics("numAddedChangeFiles").set(numAddedChangeFiles)
+    metrics("changeFileBytes").set(changeFileBytes)
+    metrics("numRemovedFiles").set(numTouchedFiles)
+    metrics("numRemovedBytes").set(numRemovedBytes)
+    metrics("executionTimeMs").set(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime))
+    metrics("scanTimeMs").set(scanTimeMs)
+    metrics("rewriteTimeMs").set(rewriteTimeMs)
+    // In the case where the numUpdatedRows is not captured, we can siphon out the metrics from
+    // the BasicWriteStatsTracker. This is for case 2 where the update condition contains only
+    // metadata predicates and so the entire partition is re-written.
+    val outputRows = txn.getMetric("numOutputRows").map(_.value).getOrElse(-1L)
+    if (metrics("numUpdatedRows").value == 0 && outputRows != 0 &&
+      metrics("numCopiedRows").value == 0) {
+      // We know that numTouchedRows = numCopiedRows + numUpdatedRows.
+      // Since an entire partition was re-written, no rows were copied.
+      // So numTouchedRows == numUpdateRows
+      metrics("numUpdatedRows").set(metrics("numTouchedRows").value)
+    } else {
+      // This is for case 3 where the update condition contains both metadata and data predicates
+      // so relevant files will have some rows updated and some rows copied. We don't need to
+      // consider case 1 here, where no files match the update condition, as we know that
+      // `totalActions` is empty.
+      metrics("numCopiedRows").set(
+        metrics("numTouchedRows").value - metrics("numUpdatedRows").value)
+      metrics("numDeletionVectorsAdded").set(numDeletionVectorsAdded)
+      metrics("numDeletionVectorsRemoved").set(numDeletionVectorsRemoved)
+      metrics("numDeletionVectorsUpdated").set(numDeletionVectorsUpdated)
+    }
+    txn.registerSQLMetrics(sparkSession, metrics)
+
+    val finalActions = createSetTransaction(sparkSession, deltaLog).toSeq ++ totalActions
+    txn.commitIfNeeded(
+      actions = finalActions,
+      op = DeltaOperations.Update(condition),
+      tags = RowTracking.addPreservedRowTrackingTagIfNotSet(txn.snapshot))
+    sendDriverMetrics(sparkSession, metrics)
+
+    recordDeltaEvent(
+      deltaLog,
+      "delta.dml.update.stats",
+      data = UpdateMetric(
+        condition = condition.map(_.sql).getOrElse("true"),
+        numFilesTotal,
+        numTouchedFiles,
+        numRewrittenFiles,
+        numAddedChangeFiles,
+        changeFileBytes,
+        scanTimeMs,
+        rewriteTimeMs,
+        numDeletionVectorsAdded,
+        numDeletionVectorsRemoved,
+        numDeletionVectorsUpdated)
+    )
+  }
+
+  /**
+   * Scan all the affected files and write out the updated files.
+   *
+   * When CDF is enabled, includes the generation of CDC preimage and postimage columns for
+   * changed rows.
+   *
+   * @return a list of [[FileAction]]s, consisting of newly-written data and CDC files and old
+   *         files that have been removed.
+   */
+  private def rewriteFiles(
+      spark: SparkSession,
+      txn: OptimisticTransaction,
+      rootPath: Path,
+      inputLeafFiles: Seq[AddFile],
+      nameToAddFileMap: Map[String, AddFile],
+      condition: Expression,
+      generateRemoveFileActions: Boolean,
+      copyUnmodifiedRows: Boolean): Seq[FileAction] = {
+    // Number of total rows that we have seen, i.e. are either copying or updating (sum of both).
+    // This will be used later, along with numUpdatedRows, to determine numCopiedRows.
+    val incrTouchedCountExpr = IncrementMetric(TrueLiteral, metrics("numTouchedRows"))
+
+    // Containing the map from the relative file path to AddFile
+    val baseRelation = buildBaseRelation(
+      spark, txn, "update", rootPath, inputLeafFiles.map(_.path), nameToAddFileMap)
+    val newTarget = DeltaTableUtils.replaceFileIndex(target, baseRelation.location)
+    val (targetDf, finalOutput, finalUpdateExpressions) = UpdateCommand.preserveRowTrackingColumns(
+      targetDfWithoutRowTrackingColumns = Dataset.ofRows(spark, newTarget),
+      snapshot = txn.snapshot,
+      targetOutput = target.output,
+      updateExpressions)
+
+    val targetDfWithEvaluatedCondition = {
+      val evalDf = targetDf.withColumn(UpdateCommand.CONDITION_COLUMN_NAME, new Column(condition))
+      val copyAndUpdateRowsDf = if (copyUnmodifiedRows) {
+        evalDf
+      } else {
+        evalDf.filter(new Column(UpdateCommand.CONDITION_COLUMN_NAME))
+      }
+      copyAndUpdateRowsDf.filter(new Column(incrTouchedCountExpr))
+    }
+
+    val updatedDataFrame = UpdateCommand.withUpdatedColumns(
+      finalOutput,
+      finalUpdateExpressions,
+      condition,
+      targetDfWithEvaluatedCondition,
+      UpdateCommand.shouldOutputCdc(txn))
+
+    val addFiles = txn.writeFiles(updatedDataFrame)
+
+    val removeFiles = if (generateRemoveFileActions) {
+      val operationTimestamp = System.currentTimeMillis()
+      inputLeafFiles.map(_.removeWithTimestamp(operationTimestamp))
+    } else {
+      Nil
+    }
+
+    addFiles ++ removeFiles
+  }
+
+  def shouldWritePersistentDeletionVectors(
+      spark: SparkSession, txn: OptimisticTransaction): Boolean = {
+    spark.conf.get(DeltaSQLConf.UPDATE_USE_PERSISTENT_DELETION_VECTORS) &&
+      DeletionVectorUtils.deletionVectorsWritable(txn.snapshot)
+  }
+}
+
+object UpdateCommand {
+  val FILE_NAME_COLUMN = "_input_file_name_"
+  val CONDITION_COLUMN_NAME = "__condition__"
+  val FINDING_TOUCHED_FILES_MSG: String = "Finding files to rewrite for UPDATE operation"
+
+  def rewritingFilesMsg(numFilesToRewrite: Long): String =
+    s"Rewriting $numFilesToRewrite files for UPDATE operation"
+
+  /**
+   * Whether or not CDC is enabled on this table and, thus, if we should output CDC data during this
+   * UPDATE operation.
+   */
+  def shouldOutputCdc(txn: OptimisticTransaction): Boolean = {
+    DeltaConfigs.CHANGE_DATA_FEED.fromMetaData(txn.metadata)
+  }
+
+  /**
+   * Build the new columns. If the condition matches, generate the new value using
+   * the corresponding UPDATE EXPRESSION; otherwise, keep the original column value.
+   *
+   * When CDC is enabled, includes the generation of CDC pre-image and post-image columns for
+   * changed rows.
+   *
+   * @param originalExpressions the original column values
+   * @param updateExpressions the update transformation to perform on the input DataFrame
+   * @param dfWithEvaluatedCondition source DataFrame on which we will apply the update expressions
+   *                                 with an additional column CONDITION_COLUMN_NAME which is the
+   *                                 true/false value of if the update condition is satisfied
+   * @param condition update condition
+   * @param shouldOutputCdc if we should output CDC data during this UPDATE operation.
+   * @return the updated DataFrame, with extra CDC columns if CDC is enabled
+   */
+  def withUpdatedColumns(
+      originalExpressions: Seq[Attribute],
+      updateExpressions: Seq[Expression],
+      condition: Expression,
+      dfWithEvaluatedCondition: DataFrame,
+      shouldOutputCdc: Boolean): DataFrame = {
+    val resultDf = if (shouldOutputCdc) {
+      val namedUpdateCols = updateExpressions.zip(originalExpressions).map {
+        case (expr, targetCol) => new Column(expr).as(targetCol.name, targetCol.metadata)
+      }
+
+      // Build an array of output rows to be unpacked later. If the condition is matched, we
+      // generate CDC pre and postimages in addition to the final output row; if the condition
+      // isn't matched, we just generate a rewritten no-op row without any CDC events.
+      val preimageCols = originalExpressions.map(new Column(_)) :+
+        lit(CDC_TYPE_UPDATE_PREIMAGE).as(CDC_TYPE_COLUMN_NAME)
+      val postimageCols = namedUpdateCols :+
+        lit(CDC_TYPE_UPDATE_POSTIMAGE).as(CDC_TYPE_COLUMN_NAME)
+      val notCdcCol = new Column(CDC_TYPE_NOT_CDC).as(CDC_TYPE_COLUMN_NAME)
+      val updatedDataCols = namedUpdateCols :+ notCdcCol
+      val noopRewriteCols = originalExpressions.map(new Column(_)) :+ notCdcCol
+      val packedUpdates = array(
+        struct(preimageCols: _*),
+        struct(postimageCols: _*),
+        struct(updatedDataCols: _*)
+      ).expr
+
+      val packedData = if (condition == Literal.TrueLiteral) {
+        packedUpdates
+      } else {
+        If(
+          UnresolvedAttribute(CONDITION_COLUMN_NAME),
+          packedUpdates, // if it should be updated, then use `packagedUpdates`
+          array(struct(noopRewriteCols: _*)).expr) // else, this is a noop rewrite
+      }
+
+      // Explode the packed array, and project back out the final data columns.
+      val finalColumns = (originalExpressions :+ UnresolvedAttribute(CDC_TYPE_COLUMN_NAME)).map {
+        a => col(s"packedData.`${a.name}`").as(a.name, a.metadata)
+      }
+      dfWithEvaluatedCondition
+        .select(explode(new Column(packedData)).as("packedData"))
+        .select(finalColumns: _*)
+    } else {
+      val finalCols = updateExpressions.zip(originalExpressions).map { case (update, original) =>
+        val updated = if (condition == Literal.TrueLiteral) {
+          update
+        } else {
+          If(UnresolvedAttribute(CONDITION_COLUMN_NAME), update, original)
+        }
+        new Column(updated).as(original.name, original.metadata)
+      }
+
+      dfWithEvaluatedCondition.select(finalCols: _*)
+    }
+
+    resultDf.drop(CONDITION_COLUMN_NAME)
+  }
+
+  /**
+   * Preserve the row tracking columns when performing an UPDATE.
+   *
+   * @param targetDfWithoutRowTrackingColumns The target DataFrame on which the UPDATE
+   *                                          operation is to be performed.
+   * @param snapshot                          Snapshot of the Delta table at the start of
+   *                                          the transaction.
+   * @param targetOutput                      The output schema of the target DataFrame.
+   * @param updateExpressions                 The update transformation to perform on the
+   *                                          target DataFrame.
+   * @return
+   * 1. targetDf: The target DataFrame that includes the preserved row tracking columns.
+   * 2. finalOutput: The final output schema, including the preserved row tracking columns.
+   * 3. finalUpdateExpressions: The final update expressions, including transformations
+   * for the preserved row tracking columns.
+   */
+  def preserveRowTrackingColumns(
+      targetDfWithoutRowTrackingColumns: DataFrame,
+      snapshot: Snapshot,
+      targetOutput: Seq[Attribute] = Seq.empty,
+      updateExpressions: Seq[Expression] = Seq.empty):
+    (DataFrame, Seq[Attribute], Seq[Expression]) = {
+    val targetDf = RowTracking.preserveRowTrackingColumns(
+      targetDfWithoutRowTrackingColumns, snapshot)
+
+    val rowIdAttributeOpt = MaterializedRowId.getAttribute(snapshot, targetDf)
+    val rowCommitVersionAttributeOpt =
+      MaterializedRowCommitVersion.getAttribute(snapshot, targetDf)
+    val finalOutput = targetOutput ++ rowIdAttributeOpt ++ rowCommitVersionAttributeOpt
+
+    val finalUpdateExpressions = updateExpressions ++
+      rowIdAttributeOpt ++
+      rowCommitVersionAttributeOpt.map(_ => Literal(null, LongType))
+    (targetDf, finalOutput, finalUpdateExpressions)
+  }
+}
+
+/**
+ * Used to report details about update.
+ *
+ * @param condition: what was the update condition
+ * @param numFilesTotal: how big is the table
+ * @param numTouchedFiles: how many files did we touch
+ * @param numRewrittenFiles: how many files had to be rewritten
+ * @param numAddedChangeFiles: how many change files were generated
+ * @param changeFileBytes: total size of change files generated
+ * @param scanTimeMs: how long did finding take
+ * @param rewriteTimeMs: how long did rewriting take
+ *
+ * @note All the time units are milliseconds.
+ */
+case class UpdateMetric(
+    condition: String,
+    numFilesTotal: Long,
+    numTouchedFiles: Long,
+    numRewrittenFiles: Long,
+    numAddedChangeFiles: Long,
+    changeFileBytes: Long,
+    scanTimeMs: Long,
+    rewriteTimeMs: Long,
+    numDeletionVectorsAdded: Long,
+    numDeletionVectorsRemoved: Long,
+    numDeletionVectorsUpdated: Long
+)
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/VacuumCommand.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/VacuumCommand.scala
new file mode 100644
index 000000000000..987a7c35fa8b
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/VacuumCommand.scala
@@ -0,0 +1,735 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta.commands
+
+// scalastyle:off import.ordering.noEmptyLine
+import java.net.URI
+import java.util.Date
+import java.util.concurrent.TimeUnit
+import scala.collection.JavaConverters._
+import org.apache.spark.sql.delta._
+import org.apache.spark.sql.delta.actions.{AddFile, FileAction, RemoveFile}
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
+import org.apache.spark.sql.delta.util.DeltaFileOperations
+import org.apache.spark.sql.delta.util.DeltaFileOperations.tryDeleteNonRecursive
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
+
+import org.apache.gluten.utils.QueryPlanSelector
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.paths.SparkPath
+import org.apache.spark.sql.{Column, DataFrame, Dataset, Encoder, SparkSession}
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.metric.SQLMetrics.createMetric
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructField, StructType}
+import org.apache.spark.util.{Clock, SerializableConfiguration, SystemClock}
+
+/**
+ * Gluten overwrite Delta:
+ *
+ * This file is copied from Delta 3.2.0. It is modified to overcome the following issues:
+ *   1. In Gluten, part is a directory, but VacuumCommand assumes part is a file. So we need some
+ *      modifications to make it work.
+ *   2. Set the 'gluten.enabledForCurrentThread' to false, now gluten can not support vacuum cmd.
+ */
+
+/**
+ * Vacuums the table by clearing all untracked files and folders within this table.
+ * First lists all the files and directories in the table, and gets the relative paths with
+ * respect to the base of the table. Then it gets the list of all tracked files for this table,
+ * which may or may not be within the table base path, and gets the relative paths of
+ * all the tracked files with respect to the base of the table. Files outside of the table path
+ * will be ignored. Then we take a diff of the files and delete directories that were already empty,
+ * and all files that are within the table that are no longer tracked.
+ */
+object VacuumCommand extends VacuumCommandImpl with Serializable {
+
+  // --- modified start
+  case class FileNameAndSize(path: String, length: Long, isDir: Boolean = false)
+  // --- modified end
+
+  /**
+   * path : fully qualified uri
+   * length: size in bytes
+   * isDir: boolean indicating if it is a directory
+   * modificationTime: file update time in milliseconds
+   */
+  val INVENTORY_SCHEMA = StructType(
+    Seq(
+      StructField("path", StringType),
+      StructField("length", LongType),
+      StructField("isDir", BooleanType),
+      StructField("modificationTime", LongType)
+    ))
+
+  /**
+   * Additional check on retention duration to prevent people from shooting themselves in the foot.
+   */
+  protected def checkRetentionPeriodSafety(
+      spark: SparkSession,
+      retentionMs: Option[Long],
+      configuredRetention: Long): Unit = {
+    require(retentionMs.forall(_ >= 0), "Retention for Vacuum can't be less than 0.")
+    val checkEnabled =
+      spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED)
+    val retentionSafe = retentionMs.forall(_ >= configuredRetention)
+    var configuredRetentionHours = TimeUnit.MILLISECONDS.toHours(configuredRetention)
+    if (TimeUnit.HOURS.toMillis(configuredRetentionHours) < configuredRetention) {
+      configuredRetentionHours += 1
+    }
+    require(!checkEnabled || retentionSafe,
+      s"""Are you sure you would like to vacuum files with such a low retention period? If you have
+        |writers that are currently writing to this table, there is a risk that you may corrupt the
+        |state of your Delta table.
+        |
+        |If you are certain that there are no operations being performed on this table, such as
+        |insert/upsert/delete/optimize, then you may turn off this check by setting:
+        |spark.databricks.delta.retentionDurationCheck.enabled = false
+        |
+        |If you are not sure, please use a value not less than "$configuredRetentionHours hours".
+       """.stripMargin)
+  }
+
+  /**
+   * Helper to compute all valid files based on basePath and Snapshot provided.
+   */
+  private def getValidFilesFromSnapshot(
+      spark: SparkSession,
+      basePath: String,
+      snapshot: Snapshot,
+      retentionMillis: Option[Long],
+      hadoopConf: Broadcast[SerializableConfiguration],
+      clock: Clock,
+      checkAbsolutePathOnly: Boolean): DataFrame = {
+    import org.apache.spark.sql.delta.implicits._
+    require(snapshot.version >= 0, "No state defined for this table. Is this really " +
+      "a Delta table? Refusing to garbage collect.")
+
+    val snapshotTombstoneRetentionMillis = DeltaLog.tombstoneRetentionMillis(snapshot.metadata)
+    checkRetentionPeriodSafety(spark, retentionMillis, snapshotTombstoneRetentionMillis)
+    val deleteBeforeTimestamp = retentionMillis match {
+      case Some(millis) => clock.getTimeMillis() - millis
+      case _ => snapshot.minFileRetentionTimestamp
+    }
+    val relativizeIgnoreError =
+      spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_RELATIVIZE_IGNORE_ERROR)
+
+    val canonicalizedBasePath = SparkPath.fromPathString(basePath).urlEncoded
+    snapshot.stateDS.mapPartitions { actions =>
+      val reservoirBase = new Path(basePath)
+      val fs = reservoirBase.getFileSystem(hadoopConf.value.value)
+      actions.flatMap {
+        _.unwrap match {
+          // Existing tables may not store canonicalized paths, so we check both the canonicalized
+          // and non-canonicalized paths to ensure we don't accidentally delete wrong files.
+          case fa: FileAction if checkAbsolutePathOnly &&
+            !fa.path.contains(basePath) && !fa.path.contains(canonicalizedBasePath) => Nil
+          case tombstone: RemoveFile if tombstone.delTimestamp < deleteBeforeTimestamp => Nil
+          case fa: FileAction =>
+            getValidRelativePathsAndSubdirs(
+              fa,
+              fs,
+              reservoirBase,
+              relativizeIgnoreError
+            )
+          case _ => Nil
+        }
+      }
+    }.toDF("path")
+  }
+
+  def getFilesFromInventory(basePath: String,
+                            partitionColumns: Seq[String],
+                            inventory: DataFrame): Dataset[SerializableFileStatus] = {
+    implicit val fileNameAndSizeEncoder: Encoder[SerializableFileStatus] =
+      org.apache.spark.sql.Encoders.product[SerializableFileStatus]
+
+    // filter out required fields from provided inventory DF
+    val inventorySchema = StructType(
+        inventory.schema.fields.filter(f => INVENTORY_SCHEMA.fields.map(_.name).contains(f.name))
+      )
+    if (inventorySchema != INVENTORY_SCHEMA) {
+      throw DeltaErrors.invalidInventorySchema(INVENTORY_SCHEMA.treeString)
+    }
+
+    inventory
+      .filter(startswith(col("path"), lit(s"$basePath/")))
+      .select(
+        substr(col("path"), lit(basePath.length + 2)).as("path"),
+        col("length"), col("isDir"), col("modificationTime")
+      )
+      .flatMap {
+        row =>
+          val path = row.getString(0)
+          if(!DeltaTableUtils.isHiddenDirectory(partitionColumns, path)) {
+            Seq(SerializableFileStatus(path,
+              row.getLong(1), row.getBoolean(2), row.getLong(3)))
+          } else {
+            None
+          }
+      }
+  }
+
+  /**
+   * Clears all untracked files and folders within this table. If the inventory is not provided
+   * then the command first lists all the files and directories in the table, if inventory is
+   * provided then it will be used for identifying files and directories within the table and
+   * gets the relative paths with respect to the base of the table. Then the command gets the
+   * list of all tracked files for this table, which may or may not be within the table base path,
+   * and gets the relative paths of all the tracked files with respect to the base of the table.
+   * Files outside of the table path will be ignored. Then we take a diff of the files and delete
+   * directories that were already empty, and all files that are within the table that are no longer
+   * tracked.
+   *
+   * @param dryRun If set to true, no files will be deleted. Instead, we will list all files and
+   *               directories that will be cleared.
+   * @param retentionHours An optional parameter to override the default Delta tombstone retention
+   *                       period
+   * @param inventory An optional dataframe of files and directories within the table generated
+   *                  from sources like blob store inventory report
+   * @return A Dataset containing the paths of the files/folders to delete in dryRun mode. Otherwise
+   *         returns the base path of the table.
+   */
+  def gc(
+      spark: SparkSession,
+      deltaLog: DeltaLog,
+      dryRun: Boolean = true,
+      retentionHours: Option[Double] = None,
+      inventory: Option[DataFrame] = None,
+      clock: Clock = new SystemClock): DataFrame = {
+    recordDeltaOperation(deltaLog, "delta.gc") {
+
+      val vacuumStartTime = System.currentTimeMillis()
+      val path = deltaLog.dataPath
+      val deltaHadoopConf = deltaLog.newDeltaHadoopConf()
+      val fs = path.getFileSystem(deltaHadoopConf)
+
+      import org.apache.spark.sql.delta.implicits._
+
+      val snapshot = deltaLog.update()
+      deltaLog.protocolWrite(snapshot.protocol)
+
+      // --- modified start
+      val isMergeTreeFormat = ClickHouseConfig
+        .isMergeTreeFormatEngine(deltaLog.unsafeVolatileMetadata.configuration)
+      // --- modified end
+
+      val snapshotTombstoneRetentionMillis = DeltaLog.tombstoneRetentionMillis(snapshot.metadata)
+      val retentionMillis = retentionHours.map(h => TimeUnit.HOURS.toMillis(math.round(h)))
+      val deleteBeforeTimestamp = retentionMillis match {
+        case Some(millis) => clock.getTimeMillis() - millis
+        case _ => snapshot.minFileRetentionTimestamp
+      }
+      // --- modified start: toGMTString is a deprecated function
+      logInfo(s"Starting garbage collection (dryRun = $dryRun) of untracked files older than " +
+        s"${new Date(deleteBeforeTimestamp).toString} in $path")
+      // --- modified end
+      val hadoopConf = spark.sparkContext.broadcast(
+        new SerializableConfiguration(deltaHadoopConf))
+      val basePath = fs.makeQualified(path).toString
+      val parallelDeleteEnabled =
+        spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_PARALLEL_DELETE_ENABLED)
+      val parallelDeletePartitions =
+        spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_PARALLEL_DELETE_PARALLELISM)
+        .getOrElse(spark.sessionState.conf.numShufflePartitions)
+      val startTimeToIdentifyEligibleFiles = System.currentTimeMillis()
+
+      // --- modified start
+      val originalEnabledGluten =
+        spark.sparkContext.getLocalProperty(QueryPlanSelector.GLUTEN_ENABLE_FOR_THREAD_KEY)
+      // gluten can not support vacuum command
+      spark.sparkContext.setLocalProperty(QueryPlanSelector.GLUTEN_ENABLE_FOR_THREAD_KEY, "false")
+      // --- modified end
+
+      val validFiles =
+        getValidFilesFromSnapshot(
+          spark,
+          basePath,
+          snapshot,
+          retentionMillis,
+          hadoopConf,
+          clock,
+          checkAbsolutePathOnly = false)
+
+      val partitionColumns = snapshot.metadata.partitionSchema.fieldNames
+      val parallelism = spark.sessionState.conf.parallelPartitionDiscoveryParallelism
+      val allFilesAndDirsWithDuplicates = inventory match {
+        case Some(inventoryDF) => getFilesFromInventory(basePath, partitionColumns, inventoryDF)
+        case None => DeltaFileOperations.recursiveListDirs(
+          spark,
+          Seq(basePath),
+          hadoopConf,
+          hiddenDirNameFilter = DeltaTableUtils.isHiddenDirectory(partitionColumns, _),
+          hiddenFileNameFilter = DeltaTableUtils.isHiddenDirectory(partitionColumns, _),
+          fileListingParallelism = Option(parallelism)
+        )
+      }
+      val allFilesAndDirs = allFilesAndDirsWithDuplicates.groupByKey(_.path)
+        .mapGroups { (k, v) =>
+          val duplicates = v.toSeq
+          // of all the duplicates we can return the newest file.
+          duplicates.maxBy(_.modificationTime)
+        }
+
+      recordFrameProfile("Delta", "VacuumCommand.gc") {
+        try {
+          allFilesAndDirs.cache()
+
+          implicit val fileNameAndSizeEncoder =
+            org.apache.spark.sql.Encoders.product[FileNameAndSize]
+
+          val dirCounts = allFilesAndDirs.where(col("isDir")).count() + 1 // +1 for the base path
+          val filesAndDirsPresentBeforeDelete = allFilesAndDirs.count()
+
+          // The logic below is as follows:
+          //   1. We take all the files and directories listed in our reservoir
+          //   2. We filter all files older than our tombstone retention period and directories
+          //   3. We get the subdirectories of all files so that we can find non-empty directories
+          //   4. We groupBy each path, and count to get how many files are in each sub-directory
+          //   5. We subtract all the valid files and tombstones in our state
+          //   6. We filter all paths with a count of 1, which will correspond to files not in the
+          //      state, and empty directories. We can safely delete all of these
+          // --- modified start
+          val diff = if (isMergeTreeFormat) {
+            val diff_tmp = allFilesAndDirs
+              .where(col("modificationTime") < deleteBeforeTimestamp || col("isDir"))
+              .mapPartitions { fileStatusIterator =>
+                val reservoirBase = new Path(basePath)
+                val fs = reservoirBase.getFileSystem(hadoopConf.value.value)
+                fileStatusIterator.flatMap { fileStatus =>
+                  if (fileStatus.isDir) {
+                    Iterator.single(FileNameAndSize(
+                      relativize(fileStatus.getHadoopPath, fs, reservoirBase, isDir = true),
+                      0L,
+                      true))
+                  } else {
+                    val dirs = getAllSubdirs(basePath, fileStatus.path, fs)
+                    val dirsWithSlash = dirs.map { p =>
+                      val relativizedPath = relativize(new Path(p), fs, reservoirBase, isDir = true)
+                      FileNameAndSize(relativizedPath, 0L, true)
+                    }
+                    dirsWithSlash ++ Iterator(
+                      FileNameAndSize(relativize(
+                        fileStatus.getHadoopPath, fs, reservoirBase, isDir = false),
+                        fileStatus.length))
+                  }
+                }
+              }
+              .withColumn(
+                "dir",
+                when(col("isDir"), col("path"))
+                  .otherwise(expr("substring_index(path, '/',size(split(path, '/')) -1)")))
+              .groupBy(col("path"), col("dir"))
+              .agg(count(new Column("*")).as("count"), sum("length").as("length"))
+
+            diff_tmp
+              .join(validFiles, diff_tmp("dir") === validFiles("path"), "leftanti")
+              .where(col("count") === 1)
+          } else {
+            allFilesAndDirs
+              .where(col("modificationTime") < deleteBeforeTimestamp || col("isDir"))
+              .mapPartitions { fileStatusIterator =>
+                val reservoirBase = new Path(basePath)
+                val fs = reservoirBase.getFileSystem(hadoopConf.value.value)
+                fileStatusIterator.flatMap { fileStatus =>
+                  if (fileStatus.isDir) {
+                    Iterator.single(FileNameAndSize(
+                      relativize(fileStatus.getHadoopPath, fs, reservoirBase, isDir = true), 0L))
+                  } else {
+                    val dirs = getAllSubdirs(basePath, fileStatus.path, fs)
+                    val dirsWithSlash = dirs.map { p =>
+                      val relativizedPath = relativize(new Path(p), fs, reservoirBase, isDir = true)
+                      FileNameAndSize(relativizedPath, 0L)
+                    }
+                    dirsWithSlash ++ Iterator(
+                      FileNameAndSize(relativize(
+                        fileStatus.getHadoopPath, fs, reservoirBase, isDir = false),
+                        fileStatus.length))
+                  }
+                }
+              }.groupBy(col("path")).agg(count(new Column("*")).as("count"),
+              sum("length").as("length"))
+              .join(validFiles, Seq("path"), "leftanti")
+              .where(col("count") === 1)
+          }
+          // --- modified end
+
+          val sizeOfDataToDeleteRow = diff.agg(sum("length").cast("long")).first()
+          val sizeOfDataToDelete = if (sizeOfDataToDeleteRow.isNullAt(0)) {
+            0L
+          } else {
+            sizeOfDataToDeleteRow.getLong(0)
+          }
+
+          val diffFiles = diff
+            .select(col("path"))
+            .as[String]
+            .map { relativePath =>
+              assert(!stringToPath(relativePath).isAbsolute,
+                "Shouldn't have any absolute paths for deletion here.")
+              pathToString(DeltaFileOperations.absolutePath(basePath, relativePath))
+            }
+          val timeTakenToIdentifyEligibleFiles =
+            System.currentTimeMillis() - startTimeToIdentifyEligibleFiles
+
+
+          val numFiles = diffFiles.count()
+          if (dryRun) {
+            val stats = DeltaVacuumStats(
+              isDryRun = true,
+              specifiedRetentionMillis = retentionMillis,
+              defaultRetentionMillis = snapshotTombstoneRetentionMillis,
+              minRetainedTimestamp = deleteBeforeTimestamp,
+              dirsPresentBeforeDelete = dirCounts,
+              filesAndDirsPresentBeforeDelete = filesAndDirsPresentBeforeDelete,
+              objectsDeleted = numFiles,
+              sizeOfDataToDelete = sizeOfDataToDelete,
+              timeTakenToIdentifyEligibleFiles = timeTakenToIdentifyEligibleFiles,
+              timeTakenForDelete = 0L,
+              vacuumStartTime = vacuumStartTime,
+              vacuumEndTime = System.currentTimeMillis,
+              numPartitionColumns = partitionColumns.size
+            )
+
+            recordDeltaEvent(deltaLog, "delta.gc.stats", data = stats)
+            logInfo(s"Found $numFiles files ($sizeOfDataToDelete bytes) and directories in " +
+              s"a total of $dirCounts directories that are safe to delete. Vacuum stats: $stats")
+
+            return diffFiles.map(f => stringToPath(f).toString).toDF("path")
+          }
+          logVacuumStart(
+            spark,
+            deltaLog,
+            path,
+            diffFiles,
+            sizeOfDataToDelete,
+            retentionMillis,
+            snapshotTombstoneRetentionMillis)
+
+          val deleteStartTime = System.currentTimeMillis()
+          val filesDeleted = try {
+            delete(diffFiles, spark, basePath,
+              hadoopConf, parallelDeleteEnabled, parallelDeletePartitions)
+          } catch {
+            case t: Throwable =>
+              logVacuumEnd(deltaLog, spark, path)
+              throw t
+          }
+          val timeTakenForDelete = System.currentTimeMillis() - deleteStartTime
+          val stats = DeltaVacuumStats(
+            isDryRun = false,
+            specifiedRetentionMillis = retentionMillis,
+            defaultRetentionMillis = snapshotTombstoneRetentionMillis,
+            minRetainedTimestamp = deleteBeforeTimestamp,
+            dirsPresentBeforeDelete = dirCounts,
+            filesAndDirsPresentBeforeDelete = filesAndDirsPresentBeforeDelete,
+            objectsDeleted = filesDeleted,
+            sizeOfDataToDelete = sizeOfDataToDelete,
+            timeTakenToIdentifyEligibleFiles = timeTakenToIdentifyEligibleFiles,
+            timeTakenForDelete = timeTakenForDelete,
+            vacuumStartTime = vacuumStartTime,
+            vacuumEndTime = System.currentTimeMillis,
+            numPartitionColumns = partitionColumns.size)
+          recordDeltaEvent(deltaLog, "delta.gc.stats", data = stats)
+          logVacuumEnd(deltaLog, spark, path, Some(filesDeleted), Some(dirCounts))
+          logInfo(s"Deleted $filesDeleted files ($sizeOfDataToDelete bytes) and directories in " +
+            s"a total of $dirCounts directories. Vacuum stats: $stats")
+
+
+          spark.createDataset(Seq(basePath)).toDF("path")
+        } finally {
+          allFilesAndDirs.unpersist()
+
+          // --- modified start
+          if (originalEnabledGluten != null) {
+            spark.sparkContext.setLocalProperty(
+              QueryPlanSelector.GLUTEN_ENABLE_FOR_THREAD_KEY, originalEnabledGluten)
+          } else {
+            spark.sparkContext.setLocalProperty(
+              QueryPlanSelector.GLUTEN_ENABLE_FOR_THREAD_KEY, "true")
+          }
+          // --- modified end
+        }
+      }
+    }
+  }
+}
+
+trait VacuumCommandImpl extends DeltaCommand {
+
+  private val supportedFsForLogging = Seq(
+    "wasbs", "wasbss", "abfs", "abfss", "adl", "gs", "file", "hdfs"
+  )
+
+  /**
+   * Returns whether we should record vacuum metrics in the delta log.
+   */
+  private def shouldLogVacuum(
+      spark: SparkSession,
+      deltaLog: DeltaLog,
+      hadoopConf: Configuration,
+      path: Path): Boolean = {
+    val logVacuumConf = spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_LOGGING_ENABLED)
+
+    if (logVacuumConf.nonEmpty) {
+      return logVacuumConf.get
+    }
+
+    val logStore = deltaLog.store
+
+    try {
+      val rawResolvedUri: URI = logStore.resolvePathOnPhysicalStorage(path, hadoopConf).toUri
+      val scheme = rawResolvedUri.getScheme
+      supportedFsForLogging.contains(scheme)
+    } catch {
+      case _: UnsupportedOperationException =>
+        logWarning("Vacuum event logging" +
+          " not enabled on this file system because we cannot detect your cloud storage type.")
+        false
+    }
+  }
+
+  /**
+   * Record Vacuum specific metrics in the commit log at the START of vacuum.
+   *
+   * @param spark - spark session
+   * @param deltaLog - DeltaLog of the table
+   * @param path - the (data) path to the root of the table
+   * @param diff - the list of paths (files, directories) that are safe to delete
+   * @param sizeOfDataToDelete - the amount of data (bytes) to be deleted
+   * @param specifiedRetentionMillis - the optional override retention period (millis) to keep
+   *                                   logically removed files before deleting them
+   * @param defaultRetentionMillis - the default retention period (millis)
+   */
+  protected def logVacuumStart(
+      spark: SparkSession,
+      deltaLog: DeltaLog,
+      path: Path,
+      diff: Dataset[String],
+      sizeOfDataToDelete: Long,
+      specifiedRetentionMillis: Option[Long],
+      defaultRetentionMillis: Long): Unit = {
+    logInfo(s"Deleting untracked files and empty directories in $path. The amount of data to be " +
+      s"deleted is $sizeOfDataToDelete (in bytes)")
+
+    // We perform an empty commit in order to record information about the Vacuum
+    if (shouldLogVacuum(spark, deltaLog, deltaLog.newDeltaHadoopConf(), path)) {
+      val checkEnabled =
+        spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED)
+      val txn = deltaLog.startTransaction()
+      val metrics = Map[String, SQLMetric](
+        "numFilesToDelete" -> createMetric(spark.sparkContext, "number of files to deleted"),
+        "sizeOfDataToDelete" -> createMetric(spark.sparkContext,
+          "The total amount of data to be deleted in bytes")
+      )
+      metrics("numFilesToDelete").set(diff.count())
+      metrics("sizeOfDataToDelete").set(sizeOfDataToDelete)
+      txn.registerSQLMetrics(spark, metrics)
+      txn.commit(actions = Seq(), DeltaOperations.VacuumStart(
+        checkEnabled,
+        specifiedRetentionMillis,
+        defaultRetentionMillis
+      ))
+    }
+  }
+
+  /**
+   * Record Vacuum specific metrics in the commit log at the END of vacuum.
+   *
+   * @param deltaLog - DeltaLog of the table
+   * @param spark - spark session
+   * @param path - the (data) path to the root of the table
+   * @param filesDeleted - if the vacuum completed this will contain the number of files deleted.
+   *                       if the vacuum failed, this will be None.
+   * @param dirCounts - if the vacuum completed this will contain the number of directories
+   *                    vacuumed. if the vacuum failed, this will be None.
+   */
+  protected def logVacuumEnd(
+      deltaLog: DeltaLog,
+      spark: SparkSession,
+      path: Path,
+      filesDeleted: Option[Long] = None,
+      dirCounts: Option[Long] = None): Unit = {
+    if (shouldLogVacuum(spark, deltaLog, deltaLog.newDeltaHadoopConf(), path)) {
+      val txn = deltaLog.startTransaction()
+      val status = if (filesDeleted.isEmpty && dirCounts.isEmpty) { "FAILED" } else { "COMPLETED" }
+      if (filesDeleted.nonEmpty && dirCounts.nonEmpty) {
+        val metrics = Map[String, SQLMetric](
+          "numDeletedFiles" -> createMetric(spark.sparkContext, "number of files deleted."),
+          "numVacuumedDirectories" ->
+            createMetric(spark.sparkContext, "num of directories vacuumed."),
+          "status" -> createMetric(spark.sparkContext, "status of vacuum")
+        )
+        metrics("numDeletedFiles").set(filesDeleted.get)
+        metrics("numVacuumedDirectories").set(dirCounts.get)
+        txn.registerSQLMetrics(spark, metrics)
+      }
+      txn.commit(actions = Seq(), DeltaOperations.VacuumEnd(
+        status
+      ))
+    }
+
+    if (filesDeleted.nonEmpty) {
+      logConsole(s"Deleted ${filesDeleted.get} files and directories in a total " +
+        s"of ${dirCounts.get} directories.")
+    }
+  }
+
+  /**
+   * Attempts to relativize the `path` with respect to the `reservoirBase` and converts the path to
+   * a string.
+   */
+  protected def relativize(
+      path: Path,
+      fs: FileSystem,
+      reservoirBase: Path,
+      isDir: Boolean): String = {
+    pathToString(DeltaFileOperations.tryRelativizePath(fs, reservoirBase, path))
+  }
+
+  /**
+   * Wrapper function for DeltaFileOperations.getAllSubDirectories
+   * returns all subdirectories that `file` has with respect to `base`.
+   */
+  protected def getAllSubdirs(base: String, file: String, fs: FileSystem): Iterator[String] = {
+    DeltaFileOperations.getAllSubDirectories(base, file)._1
+  }
+
+  /**
+   * Attempts to delete the list of candidate files. Returns the number of files deleted.
+   */
+  protected def delete(
+      diff: Dataset[String],
+      spark: SparkSession,
+      basePath: String,
+      hadoopConf: Broadcast[SerializableConfiguration],
+      parallel: Boolean,
+      parallelPartitions: Int): Long = {
+    import org.apache.spark.sql.delta.implicits._
+
+    if (parallel) {
+      diff.repartition(parallelPartitions).mapPartitions { files =>
+        val fs = new Path(basePath).getFileSystem(hadoopConf.value.value)
+        val filesDeletedPerPartition =
+          files.map(p => stringToPath(p)).count(f => tryDeleteNonRecursive(fs, f))
+        Iterator(filesDeletedPerPartition)
+      }.collect().sum
+    } else {
+      val fs = new Path(basePath).getFileSystem(hadoopConf.value.value)
+      val fileResultSet = diff.toLocalIterator().asScala
+      fileResultSet.map(p => stringToPath(p)).count(f => tryDeleteNonRecursive(fs, f))
+    }
+  }
+
+  // scalastyle:off pathfromuri
+  protected def stringToPath(path: String): Path = new Path(new URI(path))
+  // scalastyle:on pathfromuri
+
+  protected def pathToString(path: Path): String = path.toUri.toString
+
+  /** Returns the relative path of a file action or None if the file lives outside of the table. */
+  protected def getActionRelativePath(
+      action: FileAction,
+      fs: FileSystem,
+      basePath: Path,
+      relativizeIgnoreError: Boolean): Option[String] = {
+    val filePath = stringToPath(action.path)
+    if (filePath.isAbsolute) {
+      val maybeRelative =
+        DeltaFileOperations.tryRelativizePath(fs, basePath, filePath, relativizeIgnoreError)
+      if (maybeRelative.isAbsolute) {
+        // This file lives outside the directory of the table.
+        None
+      } else {
+        Some(pathToString(maybeRelative))
+      }
+    } else {
+      Some(pathToString(filePath))
+    }
+  }
+
+
+  /**
+   * Returns the relative paths of all files and subdirectories for this action that must be
+   * retained during GC.
+   */
+  protected def getValidRelativePathsAndSubdirs(
+      action: FileAction,
+      fs: FileSystem,
+      basePath: Path,
+      relativizeIgnoreError: Boolean
+  ): Seq[String] = {
+    val paths = getActionRelativePath(action, fs, basePath, relativizeIgnoreError)
+      .map {
+        relativePath =>
+        Seq(relativePath) ++ getAllSubdirs("/", relativePath, fs)
+      }.getOrElse(Seq.empty)
+
+    val deletionVectorPath =
+      getDeletionVectorRelativePath(action).map(pathToString)
+
+    paths ++ deletionVectorPath.toSeq
+  }
+
+  /**
+   * Returns the path of the on-disk deletion vector if it is stored relative to the
+   * `basePath` otherwise `None`.
+   */
+  protected def getDeletionVectorRelativePath(action: FileAction): Option[Path] = {
+    val dv = action match {
+      case a: AddFile if a.deletionVector != null =>
+        Some(a.deletionVector)
+      case r: RemoveFile if r.deletionVector != null =>
+        Some(r.deletionVector)
+      case _ => None
+    }
+
+    dv match {
+      case Some(dv) if dv.isOnDisk =>
+        if (dv.isRelative) {
+          // We actually want a relative path here.
+          Some(dv.absolutePath(new Path(".")))
+        } else {
+          assert(dv.isAbsolute)
+          // This is never going to be a path relative to `basePath` for DVs.
+          None
+        }
+      case None => None
+    }
+  }
+}
+
+case class DeltaVacuumStats(
+    isDryRun: Boolean,
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    specifiedRetentionMillis: Option[Long],
+    defaultRetentionMillis: Long,
+    minRetainedTimestamp: Long,
+    dirsPresentBeforeDelete: Long,
+    filesAndDirsPresentBeforeDelete: Long,
+    objectsDeleted: Long,
+    sizeOfDataToDelete: Long,
+    timeTakenToIdentifyEligibleFiles: Long,
+    timeTakenForDelete: Long,
+    vacuumStartTime: Long,
+    vacuumEndTime: Long,
+    numPartitionColumns: Long
+)
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/merge/ClassicMergeExecutor.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/merge/ClassicMergeExecutor.scala
new file mode 100644
index 000000000000..42a89d427197
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/commands/merge/ClassicMergeExecutor.scala
@@ -0,0 +1,571 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta.commands.merge
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.delta._
+import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, FileAction}
+import org.apache.spark.sql.delta.commands.{DeletionVectorBitmapGenerator, DMLWithDeletionVectorsHelper, MergeIntoCommandBase}
+import org.apache.spark.sql.delta.commands.cdc.CDCReader.{CDC_TYPE_COLUMN_NAME, CDC_TYPE_NOT_CDC}
+import org.apache.spark.sql.delta.commands.merge.MergeOutputGeneration.{SOURCE_ROW_INDEX_COL, TARGET_ROW_INDEX_COL}
+import org.apache.spark.sql.delta.files.TahoeBatchFileIndex
+import org.apache.spark.sql.delta.util.SetAccumulator
+
+import org.apache.spark.sql.{Column, Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{And, Expression, Literal, Or}
+import org.apache.spark.sql.catalyst.plans.logical.DeltaMergeIntoClause
+import org.apache.spark.sql.functions.{coalesce, col, count, input_file_name, lit, monotonically_increasing_id, sum}
+
+/**
+ * Gluten overwrite Delta:
+ *
+ * This file is copied from Delta 3.2.0. It is modified to overcome the following issues:
+ *   1. In Clickhouse backend, we can't implement input_file_name() correctly, we can only implement
+ *      it so that it return a a list of filenames (concated by ','). In findTouchedFiles func.
+ */
+
+/**
+ * Trait with merge execution in two phases:
+ *
+ * Phase 1: Find the input files in target that are touched by the rows that satisfy
+ *    the condition and verify that no two source rows match with the same target row.
+ *    This is implemented as an inner-join using the given condition (see [[findTouchedFiles]]).
+ *    In the special case that there is no update clause we write all the non-matching
+ *    source data as new files and skip phase 2.
+ *    Issues an error message when the ON search_condition of the MERGE statement can match
+ *    a single row from the target table with multiple rows of the source table-reference.
+ *
+ * Phase 2: Read the touched files again and write new files with updated and/or inserted rows.
+ *    If there are updates, then use an outer join using the given condition to write the
+ *    updates and inserts (see [[writeAllChanges()]]). If there are no matches for updates,
+ *    only inserts, then write them directly (see [[writeInsertsOnlyWhenNoMatches()]]).
+ *
+ *    Note, when deletion vectors are enabled, phase 2 is split into two parts:
+ *    2.a. Read the touched files again and only write modified and new
+ *         rows (see [[writeAllChanges()]]).
+ *    2.b. Read the touched files and generate deletion vectors for the modified
+ *         rows (see [[writeDVs()]]).
+ *
+ * If there are no matches for updates, only inserts, then write them directly
+ * (see [[writeInsertsOnlyWhenNoMatches()]]). This remains the same when DVs are enabled since there
+ * are no modified rows. Furthermore, eee [[InsertOnlyMergeExecutor]] for the optimized executor
+ * used in case there are only inserts.
+ */
+trait ClassicMergeExecutor extends MergeOutputGeneration {
+  self: MergeIntoCommandBase =>
+  import MergeIntoCommandBase._
+
+  /**
+   * Find the target table files that contain the rows that satisfy the merge condition. This is
+   * implemented as an inner-join between the source query/table and the target table using
+   * the merge condition.
+   */
+  protected def findTouchedFiles(
+      spark: SparkSession,
+      deltaTxn: OptimisticTransaction
+    ): (Seq[AddFile], DeduplicateCDFDeletes) = recordMergeOperation(
+      extraOpType = "findTouchedFiles",
+      status = "MERGE operation - scanning files for matches",
+      sqlMetricName = "scanTimeMs") {
+
+    val columnComparator = spark.sessionState.analyzer.resolver
+
+    // Accumulator to collect all the distinct touched files
+    val touchedFilesAccum = new SetAccumulator[String]()
+    spark.sparkContext.register(touchedFilesAccum, TOUCHED_FILES_ACCUM_NAME)
+
+    // Prune non-matching files if we don't need to collect them for NOT MATCHED BY SOURCE clauses.
+    val dataSkippedFiles =
+      if (notMatchedBySourceClauses.isEmpty) {
+        deltaTxn.filterFiles(getTargetOnlyPredicates(spark), keepNumRecords = true)
+      } else {
+        deltaTxn.filterFiles(filters = Seq(Literal.TrueLiteral), keepNumRecords = true)
+      }
+
+    // Join the source and target table using the merge condition to find touched files. An inner
+    // join collects all candidate files for MATCHED clauses, a right outer join also includes
+    // candidates for NOT MATCHED BY SOURCE clauses.
+    // In addition, we attach two columns
+    // - a monotonically increasing row id for target rows to later identify whether the same
+    //     target row is modified by multiple user or not
+    // - the target file name the row is from to later identify the files touched by matched rows
+    val joinType = if (notMatchedBySourceClauses.isEmpty) "inner" else "right_outer"
+
+    // When they are only MATCHED clauses, after the join we prune files that have no rows that
+    // satisfy any of the clause conditions.
+    val matchedPredicate =
+      if (isMatchedOnly) {
+        matchedClauses
+          // An undefined condition (None) is implicitly true
+          .map(_.condition.getOrElse(Literal.TrueLiteral))
+          .reduce((a, b) => Or(a, b))
+      } else Literal.TrueLiteral
+
+    // Compute the columns needed for the inner join.
+    val targetColsNeeded = {
+      condition.references.map(_.name) ++ deltaTxn.snapshot.metadata.partitionColumns ++
+        matchedPredicate.references.map(_.name)
+    }
+
+    val columnsToDrop = deltaTxn.snapshot.metadata.schema.map(_.name)
+      .filterNot { field =>
+        targetColsNeeded.exists { name => columnComparator(name, field) }
+      }
+    val incrSourceRowCountExpr = incrementMetricAndReturnBool("numSourceRows", valueToReturn = true)
+    // We can't use filter() directly on the expression because that will prevent
+    // column pruning. We don't need the SOURCE_ROW_PRESENT_COL so we immediately drop it.
+    val sourceDF = getMergeSource.df
+      .withColumn(SOURCE_ROW_PRESENT_COL, Column(incrSourceRowCountExpr))
+      .filter(SOURCE_ROW_PRESENT_COL)
+      .drop(SOURCE_ROW_PRESENT_COL)
+    val targetPlan =
+      buildTargetPlanWithFiles(
+        spark,
+        deltaTxn,
+        dataSkippedFiles,
+        columnsToDrop)
+    val targetDF = Dataset.ofRows(spark, targetPlan)
+      .withColumn(ROW_ID_COL, monotonically_increasing_id())
+      .withColumn(FILE_NAME_COL, input_file_name())
+
+    val joinToFindTouchedFiles =
+      sourceDF.join(targetDF, Column(condition), joinType)
+
+    // UDFs to records touched files names and add them to the accumulator
+    val recordTouchedFileName =
+      DeltaUDF.intFromStringBoolean { (fileName, shouldRecord) =>
+        if (shouldRecord) {
+          // --- modified start
+          fileName.split(",").foreach(name => touchedFilesAccum.add(name))
+          // --- modified end
+        }
+        1
+      }.asNondeterministic()
+
+    // Process the matches from the inner join to record touched files and find multiple matches
+    val collectTouchedFiles = joinToFindTouchedFiles
+      .select(col(ROW_ID_COL),
+        recordTouchedFileName(col(FILE_NAME_COL), Column(matchedPredicate)).as("one"))
+
+    // Calculate frequency of matches per source row
+    val matchedRowCounts = collectTouchedFiles.groupBy(ROW_ID_COL).agg(sum("one").as("count"))
+
+    // Get multiple matches and simultaneously collect (using touchedFilesAccum) the file names
+    import org.apache.spark.sql.delta.implicits._
+    val (multipleMatchCount, multipleMatchSum) = matchedRowCounts
+      .filter("count > 1")
+      .select(coalesce(count(Column("*")), lit(0)), coalesce(sum("count"), lit(0)))
+      .as[(Long, Long)]
+      .collect()
+      .head
+
+    val hasMultipleMatches = multipleMatchCount > 0
+    throwErrorOnMultipleMatches(hasMultipleMatches, spark)
+    if (hasMultipleMatches) {
+      // This is only allowed for delete-only queries.
+      // This query will count the duplicates for numTargetRowsDeleted in Job 2,
+      // because we count matches after the join and not just the target rows.
+      // We have to compensate for this by subtracting the duplicates later,
+      // so we need to record them here.
+      val duplicateCount = multipleMatchSum - multipleMatchCount
+      multipleMatchDeleteOnlyOvercount = Some(duplicateCount)
+    }
+
+    // Get the AddFiles using the touched file names.
+    val touchedFileNames = touchedFilesAccum.value.iterator().asScala.toSeq
+    logTrace(s"findTouchedFiles: matched files:\n\t${touchedFileNames.mkString("\n\t")}")
+
+    val nameToAddFileMap = generateCandidateFileMap(targetDeltaLog.dataPath, dataSkippedFiles)
+    val touchedAddFiles = touchedFileNames.map(
+      getTouchedFile(targetDeltaLog.dataPath, _, nameToAddFileMap))
+
+    if (metrics("numSourceRows").value == 0 && (dataSkippedFiles.isEmpty ||
+      dataSkippedFiles.forall(_.numLogicalRecords.getOrElse(0) == 0))) {
+      // The target table is empty, and the optimizer optimized away the join entirely OR the
+      // source table is truly empty. In that case, scanning the source table once is the only
+      // way to get the correct metric.
+      val numSourceRows = sourceDF.count()
+      metrics("numSourceRows").set(numSourceRows)
+    }
+
+    metrics("numTargetFilesBeforeSkipping") += deltaTxn.snapshot.numOfFiles
+    metrics("numTargetBytesBeforeSkipping") += deltaTxn.snapshot.sizeInBytes
+    val (afterSkippingBytes, afterSkippingPartitions) =
+      totalBytesAndDistinctPartitionValues(dataSkippedFiles)
+    metrics("numTargetFilesAfterSkipping") += dataSkippedFiles.size
+    metrics("numTargetBytesAfterSkipping") += afterSkippingBytes
+    metrics("numTargetPartitionsAfterSkipping") += afterSkippingPartitions
+    val (removedBytes, removedPartitions) = totalBytesAndDistinctPartitionValues(touchedAddFiles)
+    metrics("numTargetFilesRemoved") += touchedAddFiles.size
+    metrics("numTargetBytesRemoved") += removedBytes
+    metrics("numTargetPartitionsRemovedFrom") += removedPartitions
+    val dedupe = DeduplicateCDFDeletes(
+      hasMultipleMatches && isCdcEnabled(deltaTxn),
+      includesInserts)
+    (touchedAddFiles, dedupe)
+  }
+
+  /**
+   * Helper function that produces an expression by combining a sequence of clauses with OR.
+   * Requires the sequence to be non-empty.
+   */
+  protected def clauseDisjunction(clauses: Seq[DeltaMergeIntoClause]): Expression = {
+    require(clauses.nonEmpty)
+    clauses
+      .map(_.condition.getOrElse(Literal.TrueLiteral))
+      .reduceLeft(Or)
+  }
+
+  /**
+   * Returns the expression that can be used for selecting the modified rows generated
+   * by the merge operation. The expression is to designed to work irrespectively
+   * of the join type used between the source and target tables.
+   *
+   * The expression consists of two parts, one for each of the action clause types that produce
+   * row modifications: MATCHED, NOT MATCHED BY SOURCE. All actions of the same clause type form
+   * a disjunctive clause. The result is then conjucted to an expression that filters the rows
+   * of the particular action clause type. For example:
+   *
+   * MERGE INTO t
+   * USING s
+   * ON s.id = t.id
+   * WHEN MATCHED AND id < 5 THEN ...
+   * WHEN MATCHED AND id > 10 THEN ...
+   * WHEN NOT MATCHED BY SOURCE AND id > 20 THEN ...
+   *
+   * Produces the following expression:
+   *
+   * ((as.id = t.id) AND (id < 5 OR id > 10))
+   * OR
+   * ((SOURCE TABLE IS NULL) AND (id > 20))
+   */
+  protected def generateFilterForModifiedRows(): Expression = {
+    val matchedExpression = if (matchedClauses.nonEmpty) {
+      And(Column(condition).expr, clauseDisjunction(matchedClauses))
+    } else {
+      Literal.FalseLiteral
+    }
+
+    val notMatchedBySourceExpression = if (notMatchedBySourceClauses.nonEmpty) {
+      val combinedClauses = clauseDisjunction(notMatchedBySourceClauses)
+      And(col(SOURCE_ROW_PRESENT_COL).isNull.expr, combinedClauses)
+    } else {
+      Literal.FalseLiteral
+    }
+
+    Or(matchedExpression, notMatchedBySourceExpression)
+  }
+
+  /**
+   * Returns the expression that can be used for selecting the new rows generated
+   * by the merge operation.
+   */
+  protected def generateFilterForNewRows(): Expression = {
+    if (notMatchedClauses.nonEmpty) {
+      val combinedClauses = clauseDisjunction(notMatchedClauses)
+      And(col(TARGET_ROW_PRESENT_COL).isNull.expr, combinedClauses)
+    } else {
+      Literal.FalseLiteral
+    }
+  }
+
+  /**
+   * Write new files by reading the touched files and updating/inserting data using the source
+   * query/table. This is implemented using a full-outer-join using the merge condition.
+   *
+   * Note that unlike the insert-only code paths with just one control column ROW_DROPPED_COL, this
+   * method has a second control column CDC_TYPE_COL_NAME used for handling CDC when enabled.
+   */
+  protected def writeAllChanges(
+      spark: SparkSession,
+      deltaTxn: OptimisticTransaction,
+      filesToRewrite: Seq[AddFile],
+      deduplicateCDFDeletes: DeduplicateCDFDeletes,
+      writeUnmodifiedRows: Boolean): Seq[FileAction] = recordMergeOperation(
+        extraOpType = if (!writeUnmodifiedRows) {
+            "writeModifiedRowsOnly"
+          } else if (shouldOptimizeMatchedOnlyMerge(spark)) {
+            "writeAllUpdatesAndDeletes"
+          } else {
+            "writeAllChanges"
+          },
+        status = s"MERGE operation - Rewriting ${filesToRewrite.size} files",
+        sqlMetricName = "rewriteTimeMs") {
+
+      val cdcEnabled = isCdcEnabled(deltaTxn)
+
+      require(
+        !deduplicateCDFDeletes.enabled || cdcEnabled,
+        "CDF delete duplication is enabled but overall the CDF generation is disabled")
+
+    // Generate a new target dataframe that has same output attributes exprIds as the target plan.
+    // This allows us to apply the existing resolved update/insert expressions.
+    val targetPlan = buildTargetPlanWithFiles(
+      spark,
+      deltaTxn,
+      filesToRewrite,
+      columnsToDrop = Nil)
+    val baseTargetDF = RowTracking.preserveRowTrackingColumns(
+      dfWithoutRowTrackingColumns = Dataset.ofRows(spark, targetPlan),
+      snapshot = deltaTxn.snapshot)
+
+    val joinType = if (writeUnmodifiedRows) {
+      if (shouldOptimizeMatchedOnlyMerge(spark)) {
+        "rightOuter"
+      } else {
+        "fullOuter"
+      }
+    } else {
+      // Since we do not need to write unmodified rows, we can perform stricter joins.
+      if (isMatchedOnly) {
+        "inner"
+      } else if (notMatchedBySourceClauses.isEmpty) {
+        "leftOuter"
+      } else if (notMatchedClauses.isEmpty) {
+        "rightOuter"
+      } else {
+        "fullOuter"
+      }
+    }
+
+    logDebug(s"""writeAllChanges using $joinType join:
+       |  source.output: ${source.outputSet}
+       |  target.output: ${target.outputSet}
+       |  condition: $condition
+       |  newTarget.output: ${baseTargetDF.queryExecution.logical.outputSet}
+       """.stripMargin)
+
+    // Expressions to update metrics
+    val incrSourceRowCountExpr = incrementMetricAndReturnBool(
+      "numSourceRowsInSecondScan", valueToReturn = true)
+    val incrNoopCountExpr = incrementMetricAndReturnBool(
+      "numTargetRowsCopied", valueToReturn = false)
+
+    // Apply an outer join to find both, matches and non-matches. We are adding two boolean fields
+    // with value `true`, one to each side of the join. Whether this field is null or not after
+    // the outer join, will allow us to identify whether the joined row was a
+    // matched inner result or an unmatched result with null on one side.
+    val joinedBaseDF = {
+      var sourceDF = getMergeSource.df
+      if (deduplicateCDFDeletes.enabled && deduplicateCDFDeletes.includesInserts) {
+        // Add row index for the source rows to identify inserted rows during the cdf deleted rows
+        // deduplication. See [[deduplicateCDFDeletes()]]
+        sourceDF = sourceDF.withColumn(SOURCE_ROW_INDEX_COL, monotonically_increasing_id())
+      }
+      val left = sourceDF
+          .withColumn(SOURCE_ROW_PRESENT_COL, Column(incrSourceRowCountExpr))
+          // In some cases, the optimizer (incorrectly) decides to omit the metrics column.
+          // This causes issues in the source determinism validation. We work around the issue by
+          // adding a redundant dummy filter to make sure the column is not pruned.
+          .filter(SOURCE_ROW_PRESENT_COL)
+
+      val targetDF = baseTargetDF
+        .withColumn(TARGET_ROW_PRESENT_COL, lit(true))
+      val right = if (deduplicateCDFDeletes.enabled) {
+        targetDF.withColumn(TARGET_ROW_INDEX_COL, monotonically_increasing_id())
+      } else {
+        targetDF
+      }
+      left.join(right, Column(condition), joinType)
+    }
+
+    val joinedDF =
+      if (writeUnmodifiedRows) {
+        joinedBaseDF
+      } else {
+        val filter = Or(generateFilterForModifiedRows(), generateFilterForNewRows())
+        joinedBaseDF.filter(Column(filter))
+      }
+
+    // Precompute conditions in matched and not matched clauses and generate
+    // the joinedDF with precomputed columns and clauses with rewritten conditions.
+    val (joinedAndPrecomputedConditionsDF, clausesWithPrecompConditions) =
+        generatePrecomputedConditionsAndDF(
+          joinedDF,
+          clauses = matchedClauses ++ notMatchedClauses ++ notMatchedBySourceClauses)
+
+    // In case Row IDs are preserved, get the attribute expression of the Row ID column.
+    val rowIdColumnExpressionOpt =
+      MaterializedRowId.getAttribute(deltaTxn.snapshot, joinedAndPrecomputedConditionsDF)
+
+    val rowCommitVersionColumnExpressionOpt =
+      MaterializedRowCommitVersion.getAttribute(deltaTxn.snapshot, joinedAndPrecomputedConditionsDF)
+
+    // The target output columns need to be marked as nullable here, as they are going to be used
+    // to reference the output of an outer join.
+    val targetWriteCols = postEvolutionTargetExpressions(makeNullable = true)
+
+    // If there are N columns in the target table, the full outer join output will have:
+    // - N columns for target table
+    // - Two optional Row ID / Row commit version preservation columns with their physical name.
+    // - ROW_DROPPED_COL to define whether the generated row should be dropped or written
+    // - if CDC is enabled, also CDC_TYPE_COLUMN_NAME with the type of change being performed
+    //   in a particular row
+    // (N+1 or N+2 columns depending on CDC disabled / enabled)
+    val outputColNames =
+      targetWriteCols.map(_.name) ++
+        rowIdColumnExpressionOpt.map(_.name) ++
+        rowCommitVersionColumnExpressionOpt.map(_.name) ++
+        Seq(ROW_DROPPED_COL) ++
+        (if (cdcEnabled) Some(CDC_TYPE_COLUMN_NAME) else None)
+
+    // Copy expressions to copy the existing target row and not drop it (ROW_DROPPED_COL=false),
+    // and in case CDC is enabled, set it to CDC_TYPE_NOT_CDC.
+    // (N+1 or N+2 or N+3 columns depending on CDC disabled / enabled and if Row IDs are preserved)
+    val noopCopyExprs =
+      targetWriteCols ++
+        rowIdColumnExpressionOpt ++
+        rowCommitVersionColumnExpressionOpt ++
+        Seq(incrNoopCountExpr) ++
+        (if (cdcEnabled) Seq(CDC_TYPE_NOT_CDC) else Seq())
+
+    // Generate output columns.
+    val outputCols = generateWriteAllChangesOutputCols(
+      targetWriteCols,
+      rowIdColumnExpressionOpt,
+      rowCommitVersionColumnExpressionOpt,
+      outputColNames,
+      noopCopyExprs,
+      clausesWithPrecompConditions,
+      cdcEnabled
+    )
+
+    val preOutputDF = if (cdcEnabled) {
+      generateCdcAndOutputRows(
+          joinedAndPrecomputedConditionsDF,
+          outputCols,
+          outputColNames,
+          noopCopyExprs,
+          rowIdColumnExpressionOpt.map(_.name),
+          rowCommitVersionColumnExpressionOpt.map(_.name),
+          deduplicateCDFDeletes)
+    } else {
+      // change data capture is off, just output the normal data
+      joinedAndPrecomputedConditionsDF
+        .select(outputCols: _*)
+    }
+    // The filter ensures we only consider rows that are not dropped.
+    // The drop ensures that the dropped flag does not leak out to the output.
+    val outputDF = preOutputDF
+      .filter(s"$ROW_DROPPED_COL = false")
+      .drop(ROW_DROPPED_COL)
+
+    logDebug("writeAllChanges: join output plan:\n" + outputDF.queryExecution)
+
+    // Write to Delta
+    val newFiles = writeFiles(spark, deltaTxn, outputDF)
+
+    // Update metrics
+    val (addedBytes, addedPartitions) = totalBytesAndDistinctPartitionValues(newFiles)
+    metrics("numTargetFilesAdded") += newFiles.count(_.isInstanceOf[AddFile])
+    metrics("numTargetChangeFilesAdded") += newFiles.count(_.isInstanceOf[AddCDCFile])
+    metrics("numTargetChangeFileBytes") += newFiles.collect{ case f: AddCDCFile => f.size }.sum
+    metrics("numTargetBytesAdded") += addedBytes
+    metrics("numTargetPartitionsAddedTo") += addedPartitions
+    if (multipleMatchDeleteOnlyOvercount.isDefined) {
+      // Compensate for counting duplicates during the query.
+      val actualRowsDeleted =
+        metrics("numTargetRowsDeleted").value - multipleMatchDeleteOnlyOvercount.get
+      assert(actualRowsDeleted >= 0)
+      metrics("numTargetRowsDeleted").set(actualRowsDeleted)
+      val actualRowsMatchedDeleted =
+        metrics("numTargetRowsMatchedDeleted").value - multipleMatchDeleteOnlyOvercount.get
+      assert(actualRowsMatchedDeleted >= 0)
+      metrics("numTargetRowsMatchedDeleted").set(actualRowsMatchedDeleted)
+    }
+
+    newFiles
+  }
+
+  /**
+   * Writes Deletion Vectors for rows modified by the merge operation.
+   */
+  protected def writeDVs(
+    spark: SparkSession,
+    deltaTxn: OptimisticTransaction,
+    filesToRewrite: Seq[AddFile]): Seq[FileAction] = recordMergeOperation(
+      extraOpType = "writeDeletionVectors",
+      status = s"MERGE operation - Rewriting Deletion Vectors to ${filesToRewrite.size} files",
+      sqlMetricName = "rewriteTimeMs") {
+
+    val fileIndex = new TahoeBatchFileIndex(
+      spark,
+      actionType = "merge",
+      addFiles = filesToRewrite,
+      deltaLog = deltaTxn.deltaLog,
+      path = deltaTxn.deltaLog.dataPath,
+      snapshot = deltaTxn.snapshot)
+
+    val targetDF = DMLWithDeletionVectorsHelper.createTargetDfForScanningForMatches(
+      spark,
+      target,
+      fileIndex)
+
+    // For writing DVs we are only interested in the target table. When there are no
+    // notMatchedBySource clauses an inner join is sufficient. Otherwise, we need an rightOuter
+    // join to include target rows that are not matched.
+    val joinType = if (notMatchedBySourceClauses.isEmpty) {
+      "inner"
+    } else {
+      "rightOuter"
+    }
+
+    val joinedDF = getMergeSource.df
+      .withColumn(SOURCE_ROW_PRESENT_COL, lit(true))
+      .join(targetDF, Column(condition), joinType)
+
+    val modifiedRowsFilter = generateFilterForModifiedRows()
+    val matchedDVResult =
+      DeletionVectorBitmapGenerator.buildRowIndexSetsForFilesMatchingCondition(
+        spark,
+        deltaTxn,
+        tableHasDVs = true,
+        targetDf = joinedDF,
+        candidateFiles = filesToRewrite,
+        condition = modifiedRowsFilter
+      )
+
+    val nameToAddFileMap = generateCandidateFileMap(targetDeltaLog.dataPath, filesToRewrite)
+
+    val touchedFilesWithDVs = DMLWithDeletionVectorsHelper
+      .findFilesWithMatchingRows(deltaTxn, nameToAddFileMap, matchedDVResult)
+
+    val (dvActions, metricsMap) = DMLWithDeletionVectorsHelper.processUnmodifiedData(
+      spark,
+      touchedFilesWithDVs,
+      deltaTxn.snapshot)
+
+    metrics("numTargetDeletionVectorsAdded")
+      .set(metricsMap.getOrElse("numDeletionVectorsAdded", 0L))
+    metrics("numTargetDeletionVectorsRemoved")
+      .set(metricsMap.getOrElse("numDeletionVectorsRemoved", 0L))
+    metrics("numTargetDeletionVectorsUpdated")
+      .set(metricsMap.getOrElse("numDeletionVectorsUpdated", 0L))
+
+    // When DVs are enabled we override metrics related to removed files.
+    metrics("numTargetFilesRemoved").set(metricsMap.getOrElse("numRemovedFiles", 0L))
+
+    val fullyRemovedFiles = touchedFilesWithDVs.filter(_.isFullyReplaced()).map(_.fileLogEntry)
+    val (removedBytes, removedPartitions) = totalBytesAndDistinctPartitionValues(fullyRemovedFiles)
+    metrics("numTargetBytesRemoved").set(removedBytes)
+    metrics("numTargetPartitionsRemovedFrom").set(removedPartitions)
+
+    dvActions
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/files/MergeTreeCommitProtocol.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/files/MergeTreeCommitProtocol.scala
new file mode 100644
index 000000000000..33dbce138a42
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/delta/files/MergeTreeCommitProtocol.scala
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta.files
+
+// scalastyle:off import.ordering.noEmptyLine
+import java.util.UUID
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.delta.DeltaErrors
+import org.apache.spark.sql.delta.actions.{AddCDCFile, AddFile, FileAction}
+import org.apache.spark.sql.delta.commands.cdc.CDCReader.{CDC_LOCATION, CDC_PARTITION_COL}
+import org.apache.spark.sql.delta.util.{DateFormatter, PartitionUtils, TimestampFormatter, Utils => DeltaUtils}
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
+import org.apache.spark.sql.catalyst.expressions.Cast
+import org.apache.spark.sql.types.StringType
+
+/**
+ * This file is copied from the DelayedCommitProtocol of the Delta 3.2.0
+ * and renamed to MergeTreeCommitProtocol.
+ * It is modified to overcome the following issues:
+ *   1. the function commitTask will return TaskCommitMessage(Nil),
+ *      the FileStatus list will be get from the CH backend.
+ */
+
+/**
+ * Writes out the files to `path` and returns a list of them in `addedStatuses`. Includes
+ * special handling for partitioning on [[CDC_PARTITION_COL]] for
+ * compatibility between enabled and disabled CDC; partitions with a value of false in this
+ * column produce no corresponding partitioning directory.
+ * @param path The base path files will be written
+ * @param randomPrefixLength The length of random subdir name under 'path' that files been written
+ * @param subdir The immediate subdir under path; If randomPrefixLength and subdir both exist, file
+ *               path will be path/subdir/[rand str of randomPrefixLength]/file
+ */
+class MergeTreeCommitProtocol(
+      jobId: String,
+      path: String,
+      randomPrefixLength: Option[Int],
+      subdir: Option[String])
+  extends FileCommitProtocol with Serializable with Logging {
+  // Track the list of files added by a task, only used on the executors.
+  @transient protected var addedFiles: ArrayBuffer[(Map[String, String], String)] = _
+
+  // Track the change files added, only used on the driver. Files are sorted between this buffer
+  // and addedStatuses based on the value of the [[CDC_TYPE_COLUMN_NAME]] partition column - a
+  // file goes to addedStatuses if the value is CDC_TYPE_NOT_CDC and changeFiles otherwise.
+  @transient val changeFiles = new ArrayBuffer[AddCDCFile]
+
+  // Track the overall files added, only used on the driver.
+  //
+  // In rare cases, some of these AddFiles can be empty (i.e. contain no logical records).
+  // If the caller wishes to have only non-empty AddFiles, they must collect stats and perform
+  // the filter themselves. See TransactionalWrite::writeFiles. This filter will be best-effort,
+  // since there's no guarantee the stats will exist.
+  @transient val addedStatuses = new ArrayBuffer[AddFile]
+
+  val timestampPartitionPattern = "yyyy-MM-dd HH:mm:ss[.S]"
+
+  // Constants for CDC partition manipulation. Used only in newTaskTempFile(), but we define them
+  // here to avoid building a new redundant regex for every file.
+  protected val cdcPartitionFalse = s"${CDC_PARTITION_COL}=false"
+  protected val cdcPartitionTrue = s"${CDC_PARTITION_COL}=true"
+  protected val cdcPartitionTrueRegex = cdcPartitionTrue.r
+
+  override def setupJob(jobContext: JobContext): Unit = {
+
+  }
+
+  /**
+   * Commits a job after the writes succeed. Must be called on the driver. Partitions the written
+   * files into [[AddFile]]s and [[AddCDCFile]]s as these metadata actions are treated differently
+   * by [[TransactionalWrite]] (i.e. AddFile's may have additional statistics injected)
+   */
+  override def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit = {
+    val (addFiles, changeFiles) = taskCommits.flatMap(_.obj.asInstanceOf[Seq[_]])
+      .partition {
+        case _: AddFile => true
+        case _: AddCDCFile => false
+        case other =>
+          throw DeltaErrors.unrecognizedFileAction(s"$other", s"${other.getClass}")
+      }
+
+    // we cannot add type information above because of type erasure
+    addedStatuses ++= addFiles.map(_.asInstanceOf[AddFile])
+    this.changeFiles ++= changeFiles.map(_.asInstanceOf[AddCDCFile]).toArray[AddCDCFile]
+  }
+
+  override def abortJob(jobContext: JobContext): Unit = {
+    // TODO: Best effort cleanup
+  }
+
+  override def setupTask(taskContext: TaskAttemptContext): Unit = {
+    addedFiles = new ArrayBuffer[(Map[String, String], String)]
+  }
+
+  protected def getFileName(
+      taskContext: TaskAttemptContext,
+      ext: String,
+      partitionValues: Map[String, String]): String = {
+    // The file name looks like part-r-00000-2dd664f9-d2c4-4ffe-878f-c6c70c1fb0cb_00003.gz.parquet
+    // Note that %05d does not truncate the split number, so if we have more than 100000 tasks,
+    // the file name is fine and won't overflow.
+    val split = taskContext.getTaskAttemptID.getTaskID.getId
+    val uuid = UUID.randomUUID.toString
+    // CDC files (CDC_PARTITION_COL = true) are named with "cdc-..." instead of "part-...".
+    if (partitionValues.get(CDC_PARTITION_COL).contains("true")) {
+      f"cdc-$split%05d-$uuid$ext"
+    } else {
+      f"part-$split%05d-$uuid$ext"
+    }
+  }
+
+  protected def parsePartitions(dir: String): Map[String, String] = {
+    // TODO: timezones?
+    // TODO: enable validatePartitionColumns?
+    val dateFormatter = DateFormatter()
+    val timestampFormatter =
+      TimestampFormatter(timestampPartitionPattern, java.util.TimeZone.getDefault)
+    val parsedPartition =
+      PartitionUtils
+        .parsePartition(
+          new Path(dir),
+          typeInference = false,
+          Set.empty,
+          Map.empty,
+          validatePartitionColumns = false,
+          java.util.TimeZone.getDefault,
+          dateFormatter,
+          timestampFormatter)
+        ._1
+        .get
+    parsedPartition
+        .columnNames
+        .zip(
+          parsedPartition
+            .literals
+            .map(l => Cast(l, StringType).eval())
+            .map(Option(_).map(_.toString).orNull))
+        .toMap
+  }
+
+  /**
+   * Notifies the commit protocol to add a new file, and gets back the full path that should be
+   * used.
+   *
+   * Includes special logic for CDC files and paths. Specifically, if the directory `dir` contains
+   * the CDC partition `__is_cdc=true` then
+   * - the file name begins with `cdc-` instead of `part-`
+   * - the directory has the `__is_cdc=true` partition removed and is placed in the `_changed_data`
+   *   folder
+   */
+  override def newTaskTempFile(
+      taskContext: TaskAttemptContext, dir: Option[String], ext: String): String = {
+    val partitionValues = dir.map(parsePartitions).getOrElse(Map.empty[String, String])
+    val filename = getFileName(taskContext, ext, partitionValues)
+    val relativePath = randomPrefixLength.map { prefixLength =>
+      DeltaUtils.getRandomPrefix(prefixLength) // Generate a random prefix as a first choice
+    }.orElse {
+      dir // or else write into the partition directory if it is partitioned
+    }.map { subDir =>
+      // Do some surgery on the paths we write out to eliminate the CDC_PARTITION_COL. Non-CDC
+      // data is written to the base location, while CDC data is written to a special folder
+      // _change_data.
+      // The code here gets a bit complicated to accommodate two corner cases: an empty subdir
+      // can't be passed to new Path() at all, and a single-level subdir won't have a trailing
+      // slash.
+      if (subDir == cdcPartitionFalse) {
+        new Path(filename)
+      } else if (subDir.startsWith(cdcPartitionTrue)) {
+        val cleanedSubDir = cdcPartitionTrueRegex.replaceFirstIn(subDir, CDC_LOCATION)
+        new Path(cleanedSubDir, filename)
+      } else if (subDir.startsWith(cdcPartitionFalse)) {
+        // We need to remove the trailing slash in addition to the directory - otherwise
+        // it'll be interpreted as an absolute path and fail.
+        val cleanedSubDir = subDir.stripPrefix(cdcPartitionFalse + "/")
+        new Path(cleanedSubDir, filename)
+      } else {
+        new Path(subDir, filename)
+      }
+    }.getOrElse(new Path(filename)) // or directly write out to the output path
+
+    val relativePathWithSubdir = subdir.map(new Path(_, relativePath)).getOrElse(relativePath)
+    addedFiles.append((partitionValues, relativePathWithSubdir.toUri.toString))
+    new Path(path, relativePathWithSubdir).toString
+  }
+
+  override def newTaskTempFileAbsPath(
+      taskContext: TaskAttemptContext, absoluteDir: String, ext: String): String = {
+    throw DeltaErrors.unsupportedAbsPathAddFile(s"$this")
+  }
+
+  protected def buildActionFromAddedFile(
+      f: (Map[String, String], String),
+      stat: FileStatus,
+      taskContext: TaskAttemptContext): FileAction = {
+    // The partitioning in the Delta log action will be read back as part of the data, so our
+    // virtual CDC_PARTITION_COL needs to be stripped out.
+    val partitioning = f._1.filter { case (k, v) => k != CDC_PARTITION_COL }
+    f._1.get(CDC_PARTITION_COL) match {
+      case Some("true") =>
+        val partitioning = f._1.filter { case (k, v) => k != CDC_PARTITION_COL }
+        AddCDCFile(f._2, partitioning, stat.getLen)
+      case _ =>
+        val addFile = AddFile(f._2, partitioning, stat.getLen, stat.getModificationTime, true)
+        addFile
+    }
+  }
+
+  override def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage = {
+    // --- modified start
+    /* if (addedFiles.nonEmpty) {
+      val fs = new Path(path, addedFiles.head._2).getFileSystem(taskContext.getConfiguration)
+      val statuses: Seq[FileAction] = addedFiles.map { f =>
+        // scalastyle:off pathfromuri
+        val filePath = new Path(path, new Path(new URI(f._2)))
+        // scalastyle:on pathfromuri
+        val stat = fs.getFileStatus(filePath)
+
+        buildActionFromAddedFile(f, stat, taskContext)
+      }.toSeq
+
+      new TaskCommitMessage(statuses)
+    } else {
+      new TaskCommitMessage(Nil)
+    } */
+    // --- modified end
+    new TaskCommitMessage(Nil)
+  }
+
+  override def abortTask(taskContext: TaskAttemptContext): Unit = {
+    // TODO: we can also try delete the addedFiles as a best-effort cleanup.
+  }
+}
+
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala
new file mode 100644
index 000000000000..fcf1cee66671
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseDataSource.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.clickhouse
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.delta._
+import org.apache.spark.sql.delta.catalog.ClickHouseTableV2
+import org.apache.spark.sql.delta.commands.WriteIntoDelta
+import org.apache.spark.sql.delta.commands.cdc.CDCReader
+import org.apache.spark.sql.delta.sources.{DeltaDataSource, DeltaSourceUtils, DeltaSQLConf}
+import org.apache.spark.sql.sources.BaseRelation
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+import org.apache.hadoop.fs.Path
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+/** A DataSource V1 for integrating Delta into Spark SQL batch and Streaming APIs. */
+class ClickHouseDataSource extends DeltaDataSource {
+
+  override def shortName(): String = {
+    ClickHouseConfig.NAME
+  }
+
+  override def getTable(
+      schema: StructType,
+      partitioning: Array[Transform],
+      properties: java.util.Map[String, String]): Table = {
+    val options = new CaseInsensitiveStringMap(properties)
+    val path = options.get("path")
+    if (path == null) throw DeltaErrors.pathNotSpecifiedException
+    new ClickHouseTableV2(
+      SparkSession.active,
+      new Path(path),
+      options = properties.asScala.toMap,
+      clickhouseExtensionOptions = ClickHouseConfig
+        .createMergeTreeConfigurations(
+          ClickHouseConfig
+            .getMergeTreeConfigurations(properties)
+            .asJava)
+    )
+  }
+
+  override def createRelation(
+      sqlContext: SQLContext,
+      mode: SaveMode,
+      parameters: Map[String, String],
+      data: DataFrame): BaseRelation = {
+    val path = parameters.getOrElse("path", throw DeltaErrors.pathNotSpecifiedException)
+    val partitionColumns = parameters
+      .get(DeltaSourceUtils.PARTITIONING_COLUMNS_KEY)
+      .map(DeltaDataSource.decodePartitioningColumns)
+      .getOrElse(Nil)
+
+    val deltaLog = DeltaLog.forTable(sqlContext.sparkSession, new Path(path), parameters)
+    // need to use the latest snapshot
+    val configs = if (deltaLog.update().version < 0) {
+      // when creating table, save the clickhouse config to the delta metadata
+      val clickHouseTableV2 = ClickHouseTableV2.getTable(deltaLog)
+      clickHouseTableV2.properties().asScala.toMap ++ DeltaConfigs
+        .validateConfigurations(parameters.filterKeys(_.startsWith("delta.")).toMap)
+    } else {
+      DeltaConfigs.validateConfigurations(parameters.filterKeys(_.startsWith("delta.")).toMap)
+    }
+    WriteIntoDelta(
+      deltaLog = deltaLog,
+      mode = mode,
+      new DeltaOptions(parameters, sqlContext.sparkSession.sessionState.conf),
+      partitionColumns = partitionColumns,
+      configuration = configs,
+      data = data
+    ).run(sqlContext.sparkSession)
+
+    deltaLog.createRelation()
+  }
+
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    recordFrameProfile("Delta", "DeltaDataSource.createRelation") {
+      val maybePath = parameters.getOrElse("path", throw DeltaErrors.pathNotSpecifiedException)
+
+      // Log any invalid options that are being passed in
+      DeltaOptions.verifyOptions(CaseInsensitiveMap(parameters))
+
+      val timeTravelByParams = DeltaDataSource.getTimeTravelVersion(parameters)
+      var cdcOptions: mutable.Map[String, String] = mutable.Map.empty
+      val caseInsensitiveParams = new CaseInsensitiveStringMap(parameters.asJava)
+      if (CDCReader.isCDCRead(caseInsensitiveParams)) {
+        cdcOptions = mutable.Map[String, String](DeltaDataSource.CDC_ENABLED_KEY -> "true")
+        if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_START_VERSION_KEY)) {
+          cdcOptions(DeltaDataSource.CDC_START_VERSION_KEY) =
+            caseInsensitiveParams.get(DeltaDataSource.CDC_START_VERSION_KEY)
+        }
+        if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_START_TIMESTAMP_KEY)) {
+          cdcOptions(DeltaDataSource.CDC_START_TIMESTAMP_KEY) =
+            caseInsensitiveParams.get(DeltaDataSource.CDC_START_TIMESTAMP_KEY)
+        }
+        if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_END_VERSION_KEY)) {
+          cdcOptions(DeltaDataSource.CDC_END_VERSION_KEY) =
+            caseInsensitiveParams.get(DeltaDataSource.CDC_END_VERSION_KEY)
+        }
+        if (caseInsensitiveParams.containsKey(DeltaDataSource.CDC_END_TIMESTAMP_KEY)) {
+          cdcOptions(DeltaDataSource.CDC_END_TIMESTAMP_KEY) =
+            caseInsensitiveParams.get(DeltaDataSource.CDC_END_TIMESTAMP_KEY)
+        }
+      }
+      val dfOptions: Map[String, String] =
+        if (
+          sqlContext.sparkSession.sessionState.conf.getConf(
+            DeltaSQLConf.LOAD_FILE_SYSTEM_CONFIGS_FROM_DATAFRAME_OPTIONS)
+        ) {
+          parameters ++ cdcOptions
+        } else {
+          cdcOptions.toMap
+        }
+      (new ClickHouseTableV2(
+        sqlContext.sparkSession,
+        new Path(maybePath),
+        timeTravelOpt = timeTravelByParams,
+        options = dfOptions
+      )).toBaseRelation
+    }
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala
new file mode 100644
index 000000000000..dde7013962d0
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala
@@ -0,0 +1,734 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.clickhouse
+
+import org.apache.gluten.sql.shims.SparkShimLoader
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException}
+import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.TableCapability.V1_BATCH_WRITE
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, V1Write, WriteBuilder}
+import org.apache.spark.sql.delta.{DeltaConfigs, DeltaErrors, DeltaLog, DeltaOptions, DeltaTableUtils}
+import org.apache.spark.sql.delta.DeltaTableIdentifier.gluePermissionError
+import org.apache.spark.sql.delta.catalog.{BucketTransform, ClickHouseTableV2, DeltaTableV2, TempClickHouseTableV2}
+import org.apache.spark.sql.delta.commands.{CreateDeltaTableCommand, TableCreationModes, WriteIntoDelta}
+import org.apache.spark.sql.delta.metering.DeltaLogging
+import org.apache.spark.sql.delta.skipping.clustering.ClusteredTableUtils
+import org.apache.spark.sql.delta.skipping.clustering.temp.{ClusterBySpec, ClusterByTransform => TempClusterByTransform}
+import org.apache.spark.sql.delta.sources.{DeltaSourceUtils, DeltaSQLConf}
+import org.apache.spark.sql.delta.stats.StatisticsCollection
+import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
+import org.apache.spark.sql.execution.datasources.v2.clickhouse.utils.CHDataSourceUtils
+import org.apache.spark.sql.sources.InsertableRelation
+import org.apache.spark.sql.types.StructType
+
+import org.apache.hadoop.fs.Path
+
+import java.util
+import java.util.Locale
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+class ClickHouseSparkCatalog
+  extends DelegatingCatalogExtension
+  with StagingTableCatalog
+  with SupportsPathIdentifier
+  with DeltaLogging {
+
+  val spark = SparkSession.active
+
+  private def createCatalogTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]
+  ): Table = {
+    super.createTable(ident, schema, partitions, properties)
+  }
+
+  override def createTable(
+      ident: Identifier,
+      columns: Array[org.apache.spark.sql.connector.catalog.Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    createTable(
+      ident,
+      org.apache.spark.sql.connector.catalog.CatalogV2Util.v2ColumnsToStructType(columns),
+      partitions,
+      properties)
+  }
+
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    if (CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties))) {
+      createClickHouseTable(
+        ident,
+        schema,
+        partitions,
+        properties,
+        Map.empty,
+        sourceQuery = None,
+        TableCreationModes.Create)
+    } else if (DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))) {
+      createDeltaTable(
+        ident,
+        schema,
+        partitions,
+        properties,
+        Map.empty,
+        sourceQuery = None,
+        TableCreationModes.Create
+      )
+    } else {
+      createCatalogTable(ident, schema, partitions, properties)
+    }
+  }
+
+  /**
+   * Creates a ClickHouse table
+   *
+   * @param ident
+   *   The identifier of the table
+   * @param schema
+   *   The schema of the table
+   * @param partitions
+   *   The partition transforms for the table
+   * @param allTableProperties
+   *   The table properties that configure the behavior of the table or provide information about
+   *   the table
+   * @param writeOptions
+   *   Options specific to the write during table creation or replacement
+   * @param sourceQuery
+   *   A query if this CREATE request came from a CTAS or RTAS
+   * @param operation
+   *   The specific table creation mode, whether this is a Create/Replace/Create or Replace
+   */
+  private def createClickHouseTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      allTableProperties: util.Map[String, String],
+      writeOptions: Map[String, String],
+      sourceQuery: Option[DataFrame],
+      operation: TableCreationModes.CreationMode): Table = {
+    val (partitionColumns, maybeBucketSpec) =
+      SparkShimLoader.getSparkShims.convertPartitionTransforms(partitions)
+    var newSchema = schema
+    var newPartitionColumns = partitionColumns
+    var newBucketSpec = maybeBucketSpec
+
+    // Delta does not support bucket feature, so save the bucket infos into properties if exists.
+    val tableProperties =
+      ClickHouseConfig.createMergeTreeConfigurations(allTableProperties, newBucketSpec)
+
+    val isByPath = isPathIdentifier(ident)
+    val location = if (isByPath) {
+      Option(ident.name())
+    } else {
+      Option(allTableProperties.get("location"))
+    }
+    val locUriOpt = location.map(CatalogUtils.stringToURI)
+    val storage = DataSource
+      .buildStorageFormatFromOptions(writeOptions)
+      .copy(locationUri = locUriOpt)
+    val tableType =
+      if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
+    val id = {
+      TableIdentifier(ident.name(), ident.namespace().lastOption)
+    }
+    val existingTableOpt = getExistingTableIfExists(id)
+    val loc = new Path(locUriOpt.getOrElse(spark.sessionState.catalog.defaultTablePath(id)))
+    val commentOpt = Option(allTableProperties.get("comment"))
+
+    val tableDesc = new CatalogTable(
+      identifier = id,
+      tableType = tableType,
+      storage = storage,
+      schema = newSchema,
+      provider = Some(ClickHouseConfig.ALT_NAME),
+      partitionColumnNames = newPartitionColumns,
+      bucketSpec = newBucketSpec,
+      properties = tableProperties,
+      comment = commentOpt
+    )
+
+    val withDb = verifyTableAndSolidify(tableDesc, None, isMergeTree = true)
+
+    val writer = sourceQuery.map {
+      df =>
+        WriteIntoDelta(
+          DeltaLog.forTable(spark, loc),
+          operation.mode,
+          new DeltaOptions(withDb.storage.properties, spark.sessionState.conf),
+          withDb.partitionColumnNames,
+          withDb.properties ++ commentOpt.map("comment" -> _),
+          df,
+          schemaInCatalog = if (newSchema != schema) Some(newSchema) else None
+        )
+    }
+    try {
+      ClickHouseTableV2.temporalThreadLocalCHTable.set(
+        new TempClickHouseTableV2(spark, Some(withDb)))
+
+      CreateDeltaTableCommand(
+        withDb,
+        existingTableOpt,
+        operation.mode,
+        writer,
+        operation = operation,
+        tableByPath = isByPath).run(spark)
+    } finally {
+      ClickHouseTableV2.temporalThreadLocalCHTable.remove()
+    }
+
+    logInfo(s"create table ${ident.toString} successfully.")
+    loadTable(ident)
+  }
+
+  /**
+   * Creates a Delta table
+   *
+   * @param ident
+   *   The identifier of the table
+   * @param schema
+   *   The schema of the table
+   * @param partitions
+   *   The partition transforms for the table
+   * @param allTableProperties
+   *   The table properties that configure the behavior of the table or provide information about
+   *   the table
+   * @param writeOptions
+   *   Options specific to the write during table creation or replacement
+   * @param sourceQuery
+   *   A query if this CREATE request came from a CTAS or RTAS
+   * @param operation
+   *   The specific table creation mode, whether this is a Create/Replace/Create or Replace
+   */
+  private def createDeltaTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      allTableProperties: util.Map[String, String],
+      writeOptions: Map[String, String],
+      sourceQuery: Option[DataFrame],
+      operation: TableCreationModes.CreationMode
+  ): Table = {
+    // These two keys are tableProperties in data source v2 but not in v1, so we have to filter
+    // them out. Otherwise property consistency checks will fail.
+    val tableProperties = allTableProperties.asScala.filterKeys {
+      case TableCatalog.PROP_LOCATION => false
+      case TableCatalog.PROP_PROVIDER => false
+      case TableCatalog.PROP_COMMENT => false
+      case TableCatalog.PROP_OWNER => false
+      case TableCatalog.PROP_EXTERNAL => false
+      case "path" => false
+      case "option.path" => false
+      case _ => true
+    }.toMap
+    val (partitionColumns, maybeBucketSpec, maybeClusterBySpec) = convertTransforms(partitions)
+    var newSchema = schema
+    var newPartitionColumns = partitionColumns
+    var newBucketSpec = maybeBucketSpec
+    val conf = spark.sessionState.conf
+    allTableProperties.asScala
+      .get(DeltaConfigs.DATA_SKIPPING_STATS_COLUMNS.key)
+      .foreach(StatisticsCollection.validateDeltaStatsColumns(schema, partitionColumns, _))
+    val isByPath = isPathIdentifier(ident)
+    if (
+      isByPath && !conf.getConf(DeltaSQLConf.DELTA_LEGACY_ALLOW_AMBIGUOUS_PATHS)
+      && allTableProperties.containsKey("location")
+      // The location property can be qualified and different from the path in the identifier, so
+      // we check `endsWith` here.
+      && Option(allTableProperties.get("location")).exists(!_.endsWith(ident.name()))
+    ) {
+      throw DeltaErrors.ambiguousPathsInCreateTableException(
+        ident.name(),
+        allTableProperties.get("location"))
+    }
+    val location = if (isByPath) {
+      Option(ident.name())
+    } else {
+      Option(allTableProperties.get("location"))
+    }
+    val id = {
+      TableIdentifier(ident.name(), ident.namespace().lastOption)
+    }
+    var locUriOpt = location.map(CatalogUtils.stringToURI)
+    val existingTableOpt = getExistingTableIfExists(id)
+    val loc = locUriOpt
+      .orElse(existingTableOpt.flatMap(_.storage.locationUri))
+      .getOrElse(spark.sessionState.catalog.defaultTablePath(id))
+    val storage = DataSource
+      .buildStorageFormatFromOptions(writeOptions)
+      .copy(locationUri = Option(loc))
+    val tableType =
+      if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
+    val commentOpt = Option(allTableProperties.get("comment"))
+
+    var tableDesc = new CatalogTable(
+      identifier = id,
+      tableType = tableType,
+      storage = storage,
+      schema = newSchema,
+      provider = Some(DeltaSourceUtils.ALT_NAME),
+      partitionColumnNames = newPartitionColumns,
+      bucketSpec = newBucketSpec,
+      properties = tableProperties,
+      comment = commentOpt
+    )
+
+    val withDb =
+      verifyTableAndSolidify(
+        tableDesc,
+        None,
+        maybeClusterBySpec
+      )
+
+    val writer = sourceQuery.map {
+      df =>
+        WriteIntoDelta(
+          DeltaLog.forTable(spark, new Path(loc)),
+          operation.mode,
+          new DeltaOptions(withDb.storage.properties, spark.sessionState.conf),
+          withDb.partitionColumnNames,
+          withDb.properties ++ commentOpt.map("comment" -> _),
+          df,
+          Some(tableDesc),
+          schemaInCatalog = if (newSchema != schema) Some(newSchema) else None
+        )
+    }
+
+    CreateDeltaTableCommand(
+      withDb,
+      existingTableOpt,
+      operation.mode,
+      writer,
+      operation,
+      tableByPath = isByPath).run(spark)
+
+    loadTable(ident)
+  }
+
+  /** Performs checks on the parameters provided for table creation for a ClickHouse table. */
+  private def verifyTableAndSolidify(
+      tableDesc: CatalogTable,
+      query: Option[LogicalPlan],
+      maybeClusterBySpec: Option[ClusterBySpec] = None,
+      isMergeTree: Boolean = false): CatalogTable = {
+
+    if (!isMergeTree && tableDesc.bucketSpec.isDefined) {
+      throw DeltaErrors.operationNotSupportedException("Bucketing", tableDesc.identifier)
+    }
+
+    val schema = query
+      .map {
+        plan =>
+          assert(tableDesc.schema.isEmpty, "Can't specify table schema in CTAS.")
+          plan.schema.asNullable
+      }
+      .getOrElse(tableDesc.schema)
+
+    PartitioningUtils.validatePartitionColumn(
+      schema,
+      tableDesc.partitionColumnNames,
+      caseSensitive = false
+    ) // Delta is case insensitive
+
+    var validatedConfigurations = if (isMergeTree) {
+      tableDesc.properties
+    } else {
+      DeltaConfigs.validateConfigurations(tableDesc.properties)
+    }
+
+    ClusteredTableUtils.validateExistingTableFeatureProperties(validatedConfigurations)
+
+    // Add needed configs for Clustered table.
+    if (maybeClusterBySpec.nonEmpty) {
+      validatedConfigurations = validatedConfigurations ++
+        ClusteredTableUtils.getClusteringColumnsAsProperty(maybeClusterBySpec) ++
+        ClusteredTableUtils.getTableFeatureProperties(validatedConfigurations)
+    }
+
+    val db = tableDesc.identifier.database.getOrElse(catalog.getCurrentDatabase)
+    val tableIdentWithDB = tableDesc.identifier.copy(database = Some(db))
+    tableDesc.copy(
+      identifier = tableIdentWithDB,
+      schema = schema,
+      properties = validatedConfigurations)
+  }
+
+  /** Checks if a table already exists for the provided identifier. */
+  def getExistingTableIfExists(table: TableIdentifier): Option[CatalogTable] = {
+    // If this is a path identifier, we cannot return an existing CatalogTable. The Create command
+    // will check the file system itself
+    if (isPathIdentifier(table)) return None
+    val tableExists = catalog.tableExists(table)
+    if (tableExists) {
+      val oldTable = catalog.getTableMetadata(table)
+      if (oldTable.tableType == CatalogTableType.VIEW) {
+        throw new AnalysisException(s"$table is a view. You may not write data into a view.")
+      }
+      if (
+        !DeltaSourceUtils.isDeltaTable(oldTable.provider) &&
+        !CHDataSourceUtils.isClickHouseTable(oldTable.provider)
+      ) {
+        throw DeltaErrors.notADeltaTable(table.table)
+      }
+      Some(oldTable)
+    } else {
+      None
+    }
+  }
+
+  private def getProvider(properties: util.Map[String, String]): String = {
+    Option(properties.get("provider")).getOrElse(ClickHouseConfig.NAME)
+  }
+
+  override def loadTable(ident: Identifier): Table = {
+    try {
+      super.loadTable(ident) match {
+        case v1: V1Table if CHDataSourceUtils.isClickHouseTable(v1.catalogTable) =>
+          new ClickHouseTableV2(
+            spark,
+            new Path(v1.catalogTable.location),
+            catalogTable = Some(v1.catalogTable),
+            tableIdentifier = Some(ident.toString))
+        case v1: V1Table if DeltaTableUtils.isDeltaTable(v1.catalogTable) =>
+          DeltaTableV2(
+            spark,
+            new Path(v1.catalogTable.location),
+            catalogTable = Some(v1.catalogTable),
+            tableIdentifier = Some(ident.toString))
+        case o =>
+          o
+      }
+    } catch {
+      case _: NoSuchDatabaseException | _: NoSuchNamespaceException | _: NoSuchTableException
+          if isPathIdentifier(ident) =>
+        newDeltaPathTable(ident)
+      case e: AnalysisException if gluePermissionError(e) && isPathIdentifier(ident) =>
+        logWarning(
+          "Received an access denied error from Glue. Assuming this " +
+            s"identifier ($ident) is path based.",
+          e)
+        newDeltaPathTable(ident)
+    }
+  }
+
+  private def newDeltaPathTable(ident: Identifier): DeltaTableV2 = {
+    if (hasClickHouseNamespace(ident)) {
+      new ClickHouseTableV2(spark, new Path(ident.name()))
+    } else {
+      DeltaTableV2(spark, new Path(ident.name()))
+    }
+  }
+
+  /** support to delete mergetree data from the external table */
+  override def purgeTable(ident: Identifier): Boolean = {
+    try {
+      loadTable(ident) match {
+        case t: ClickHouseTableV2 =>
+          val tableType = t.properties().getOrDefault("Type", "")
+          // file-based or external table
+          val isExternal = tableType.isEmpty || tableType.equalsIgnoreCase("external")
+          val tablePath = t.rootPath
+          // first delete the table metadata
+          val deletedTable = super.dropTable(ident)
+          if (deletedTable && isExternal) {
+            val fs = tablePath.getFileSystem(spark.sessionState.newHadoopConf())
+            // delete all data if there is a external table
+            fs.delete(tablePath, true)
+          }
+          true
+        case _ => super.purgeTable(ident)
+      }
+    } catch {
+      case _: Exception =>
+        false
+    }
+  }
+
+  override def stageReplace(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    recordFrameProfile("DeltaCatalog", "stageReplace") {
+      if (
+        CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties)) ||
+        DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))
+      ) {
+        new StagedDeltaTableV2(ident, schema, partitions, properties, TableCreationModes.Replace)
+      } else {
+        super.dropTable(ident)
+        val table = createCatalogTable(ident, schema, partitions, properties)
+        BestEffortStagedTable(ident, table, this)
+      }
+    }
+
+  override def stageCreateOrReplace(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    recordFrameProfile("DeltaCatalog", "stageCreateOrReplace") {
+      if (
+        CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties)) ||
+        DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))
+      ) {
+        new StagedDeltaTableV2(
+          ident,
+          schema,
+          partitions,
+          properties,
+          TableCreationModes.CreateOrReplace)
+      } else {
+        try super.dropTable(ident)
+        catch {
+          case _: NoSuchDatabaseException => // this is fine
+          case _: NoSuchTableException => // this is fine
+        }
+        val table = createCatalogTable(ident, schema, partitions, properties)
+        BestEffortStagedTable(ident, table, this)
+      }
+    }
+
+  override def stageCreate(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    recordFrameProfile("DeltaCatalog", "stageCreate") {
+      if (
+        CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties)) ||
+        DeltaSourceUtils.isDeltaDataSourceName(getProvider(properties))
+      ) {
+        new StagedDeltaTableV2(ident, schema, partitions, properties, TableCreationModes.Create)
+      } else {
+        val table = createCatalogTable(ident, schema, partitions, properties)
+        BestEffortStagedTable(ident, table, this)
+      }
+    }
+
+  // Copy of V2SessionCatalog.convertTransforms, which is private.
+  private def convertTransforms(
+      partitions: Seq[Transform]): (Seq[String], Option[BucketSpec], Option[ClusterBySpec]) = {
+    val identityCols = new mutable.ArrayBuffer[String]
+    var bucketSpec = Option.empty[BucketSpec]
+    var clusterBySpec = Option.empty[ClusterBySpec]
+
+    partitions.map {
+      case IdentityTransform(FieldReference(Seq(col))) =>
+        identityCols += col
+
+      case BucketTransform(numBuckets, bucketCols, sortCols) =>
+        bucketSpec = Some(
+          BucketSpec(
+            numBuckets,
+            bucketCols.map(_.fieldNames.head),
+            sortCols.map(_.fieldNames.head)))
+      case TempClusterByTransform(columnNames) =>
+        if (clusterBySpec.nonEmpty) {
+          // Parser guarantees that it only passes down one TempClusterByTransform.
+          throw SparkException.internalError("Cannot have multiple cluster by transforms.")
+        }
+        clusterBySpec = Some(ClusterBySpec(columnNames))
+
+      case transform =>
+        throw DeltaErrors.operationNotSupportedException(s"Partitioning by expressions")
+    }
+    // Parser guarantees that partition and cluster by can't both exist.
+    assert(!(identityCols.toSeq.nonEmpty && clusterBySpec.nonEmpty))
+    // Parser guarantees that bucketing and cluster by can't both exist.
+    assert(!(bucketSpec.nonEmpty && clusterBySpec.nonEmpty))
+
+    (identityCols.toSeq, bucketSpec, clusterBySpec)
+  }
+
+  /**
+   * A staged delta table, which creates a HiveMetaStore entry and appends data if this was a
+   * CTAS/RTAS command. We have a ugly way of using this API right now, but it's the best way to
+   * maintain old behavior compatibility between Databricks Runtime and OSS Delta Lake.
+   */
+  private class StagedDeltaTableV2(
+      ident: Identifier,
+      override val schema: StructType,
+      val partitions: Array[Transform],
+      override val properties: util.Map[String, String],
+      operation: TableCreationModes.CreationMode)
+    extends StagedTable
+    with SupportsWrite {
+
+    private var asSelectQuery: Option[DataFrame] = None
+    private var writeOptions: Map[String, String] = Map.empty
+
+    override def commitStagedChanges(): Unit =
+      recordFrameProfile("DeltaCatalog", "commitStagedChanges") {
+        val conf = spark.sessionState.conf
+        val props = new util.HashMap[String, String]()
+        // Options passed in through the SQL API will show up both with an "option." prefix and
+        // without in Spark 3.1, so we need to remove those from the properties
+        val optionsThroughProperties = properties.asScala.collect {
+          case (k, _) if k.startsWith("option.") => k.stripPrefix("option.")
+        }.toSet
+        val sqlWriteOptions = new util.HashMap[String, String]()
+        properties.asScala.foreach {
+          case (k, v) =>
+            if (!k.startsWith("option.") && !optionsThroughProperties.contains(k)) {
+              // Do not add to properties
+              props.put(k, v)
+            } else if (optionsThroughProperties.contains(k)) {
+              sqlWriteOptions.put(k, v)
+            }
+        }
+        if (writeOptions.isEmpty && !sqlWriteOptions.isEmpty) {
+          writeOptions = sqlWriteOptions.asScala.toMap
+        }
+        if (conf.getConf(DeltaSQLConf.DELTA_LEGACY_STORE_WRITER_OPTIONS_AS_PROPS)) {
+          // Legacy behavior
+          writeOptions.foreach { case (k, v) => props.put(k, v) }
+        } else {
+          writeOptions.foreach {
+            case (k, v) =>
+              // Continue putting in Delta prefixed options to avoid breaking workloads
+              if (k.toLowerCase(Locale.ROOT).startsWith("delta.")) {
+                props.put(k, v)
+              }
+          }
+        }
+        if (CHDataSourceUtils.isClickHouseDataSourceName(getProvider(properties))) {
+          createClickHouseTable(
+            ident,
+            schema,
+            partitions,
+            props,
+            writeOptions,
+            asSelectQuery,
+            operation)
+        } else {
+          createDeltaTable(ident, schema, partitions, props, writeOptions, asSelectQuery, operation)
+        }
+      }
+
+    override def name(): String = ident.name()
+
+    override def abortStagedChanges(): Unit = {}
+
+    override def capabilities(): util.Set[TableCapability] = Set(V1_BATCH_WRITE).asJava
+
+    override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+      writeOptions = info.options.asCaseSensitiveMap().asScala.toMap
+      new DeltaV1WriteBuilder
+    }
+
+    /*
+     * WriteBuilder for creating a Delta table.
+     */
+    private class DeltaV1WriteBuilder extends WriteBuilder {
+      override def build(): V1Write = new V1Write {
+        override def toInsertableRelation(): InsertableRelation = {
+          new InsertableRelation {
+            override def insert(data: DataFrame, overwrite: Boolean): Unit = {
+              asSelectQuery = Option(data)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  private case class BestEffortStagedTable(ident: Identifier, table: Table, catalog: TableCatalog)
+    extends StagedTable
+    with SupportsWrite {
+    override def abortStagedChanges(): Unit = catalog.dropTable(ident)
+
+    override def commitStagedChanges(): Unit = {}
+
+    // Pass through
+    override def name(): String = table.name()
+
+    @deprecated
+    override def schema(): StructType = table.schema()
+    override def partitioning(): Array[Transform] = table.partitioning()
+    override def capabilities(): util.Set[TableCapability] = table.capabilities()
+    override def properties(): util.Map[String, String] = table.properties()
+
+    override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = table match {
+      case supportsWrite: SupportsWrite => supportsWrite.newWriteBuilder(info)
+      case _ => throw DeltaErrors.unsupportedWriteStagedTable(name)
+    }
+  }
+}
+
+/**
+ * A trait for handling table access through clickhouse.`/some/path`. This is a stop-gap solution
+ * until PathIdentifiers are implemented in Apache Spark.
+ */
+trait SupportsPathIdentifier extends TableCatalog {
+  self: ClickHouseSparkCatalog =>
+
+  protected lazy val catalog: SessionCatalog = spark.sessionState.catalog
+
+  override def tableExists(ident: Identifier): Boolean = {
+    if (isPathIdentifier(ident)) {
+      val path = new Path(ident.name())
+      val fs = path.getFileSystem(spark.sessionState.newHadoopConf())
+      fs.exists(path) && fs.listStatus(path).nonEmpty
+    } else {
+      super.tableExists(ident)
+    }
+  }
+
+  protected def isPathIdentifier(ident: Identifier): Boolean = {
+    // Should be a simple check of a special PathIdentifier class in the future
+    try {
+      supportSQLOnFile && (hasClickHouseNamespace(ident) || hasDeltaNamespace(ident)) &&
+      new Path(ident.name()).isAbsolute
+    } catch {
+      case _: IllegalArgumentException => false
+    }
+  }
+
+  protected def isPathIdentifier(table: CatalogTable): Boolean = {
+    isPathIdentifier(table.identifier)
+  }
+
+  protected def isPathIdentifier(tableIdentifier: TableIdentifier): Boolean = {
+    isPathIdentifier(Identifier.of(tableIdentifier.database.toArray, tableIdentifier.table))
+  }
+
+  private def supportSQLOnFile: Boolean = spark.sessionState.conf.runSQLonFile
+
+  protected def hasClickHouseNamespace(ident: Identifier): Boolean = {
+    ident.namespace().length == 1 &&
+    CHDataSourceUtils.isClickHouseDataSourceName(ident.namespace().head)
+  }
+
+  protected def hasDeltaNamespace(ident: Identifier): Boolean = {
+    ident.namespace().length == 1 && DeltaSourceUtils.isDeltaDataSourceName(ident.namespace().head)
+  }
+}
diff --git a/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/source/DeltaMergeTreeFileFormat.scala b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/source/DeltaMergeTreeFileFormat.scala
new file mode 100644
index 000000000000..dc1d1072816a
--- /dev/null
+++ b/backends-clickhouse/src/main/delta-32/org/apache/spark/sql/execution/datasources/v2/clickhouse/source/DeltaMergeTreeFileFormat.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.clickhouse.source
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.delta.DeltaParquetFileFormat
+import org.apache.spark.sql.delta.actions.{Metadata, Protocol}
+import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFactory}
+import org.apache.spark.sql.execution.datasources.v1.GlutenMergeTreeWriterInjects
+import org.apache.spark.sql.types.StructType
+
+import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+class DeltaMergeTreeFileFormat(protocol: Protocol, metadata: Metadata)
+  extends DeltaParquetFileFormat(protocol, metadata) {
+
+  protected var database = ""
+  protected var tableName = ""
+  protected var snapshotId = ""
+  protected var orderByKeyOption: Option[Seq[String]] = None
+  protected var lowCardKeyOption: Option[Seq[String]] = None
+  protected var minmaxIndexKeyOption: Option[Seq[String]] = None
+  protected var bfIndexKeyOption: Option[Seq[String]] = None
+  protected var setIndexKeyOption: Option[Seq[String]] = None
+  protected var primaryKeyOption: Option[Seq[String]] = None
+  protected var partitionColumns: Seq[String] = Seq.empty[String]
+  protected var clickhouseTableConfigs: Map[String, String] = Map.empty
+
+  // scalastyle:off argcount
+  def this(
+      protocol: Protocol,
+      metadata: Metadata,
+      database: String,
+      tableName: String,
+      snapshotId: String,
+      orderByKeyOption: Option[Seq[String]],
+      lowCardKeyOption: Option[Seq[String]],
+      minmaxIndexKeyOption: Option[Seq[String]],
+      bfIndexKeyOption: Option[Seq[String]],
+      setIndexKeyOption: Option[Seq[String]],
+      primaryKeyOption: Option[Seq[String]],
+      clickhouseTableConfigs: Map[String, String],
+      partitionColumns: Seq[String]) {
+    this(protocol, metadata)
+    this.database = database
+    this.tableName = tableName
+    this.snapshotId = snapshotId
+    this.orderByKeyOption = orderByKeyOption
+    this.lowCardKeyOption = lowCardKeyOption
+    this.minmaxIndexKeyOption = minmaxIndexKeyOption
+    this.bfIndexKeyOption = bfIndexKeyOption
+    this.setIndexKeyOption = setIndexKeyOption
+    this.primaryKeyOption = primaryKeyOption
+    this.clickhouseTableConfigs = clickhouseTableConfigs
+    this.partitionColumns = partitionColumns
+  }
+  // scalastyle:on argcount
+
+  override def shortName(): String = "mergetree"
+
+  override def toString(): String = "MergeTree"
+
+  override def equals(other: Any): Boolean = {
+    other match {
+      case ff: DeltaMergeTreeFileFormat =>
+        ff.columnMappingMode == columnMappingMode &&
+        ff.referenceSchema == referenceSchema &&
+        ff.optimizationsEnabled == optimizationsEnabled
+      case _ => false
+    }
+  }
+
+  override def hashCode(): Int = getClass.getCanonicalName.hashCode()
+
+  override def prepareWrite(
+      sparkSession: SparkSession,
+      job: Job,
+      options: Map[String, String],
+      dataSchema: StructType): OutputWriterFactory = {
+    // pass compression to job conf so that the file extension can be aware of it.
+    // val conf = ContextUtil.getConfiguration(job)
+    val nativeConf =
+      GlutenMergeTreeWriterInjects
+        .getInstance()
+        .nativeConf(options, "")
+
+    new OutputWriterFactory {
+      override def getFileExtension(context: TaskAttemptContext): String = {
+        ".mergetree"
+      }
+
+      override def newInstance(
+          path: String,
+          dataSchema: StructType,
+          context: TaskAttemptContext): OutputWriter = {
+        GlutenMergeTreeWriterInjects
+          .getInstance()
+          .createOutputWriter(
+            path,
+            database,
+            tableName,
+            snapshotId,
+            orderByKeyOption,
+            lowCardKeyOption,
+            minmaxIndexKeyOption,
+            bfIndexKeyOption,
+            setIndexKeyOption,
+            primaryKeyOption,
+            partitionColumns,
+            metadata.schema,
+            clickhouseTableConfigs,
+            context,
+            nativeConf
+          )
+      }
+    }
+  }
+}
diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHIteratorApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHIteratorApi.scala
index 1221710bce6b..5f13b96a3671 100644
--- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHIteratorApi.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHIteratorApi.scala
@@ -117,7 +117,7 @@ class CHIteratorApi extends IteratorApi with Logging with LogLevelUtil {
         val partitionColumns = new JArrayList[JMap[String, String]]
         f.files.foreach {
           file =>
-            paths.add(new URI(file.filePath).toASCIIString)
+            paths.add(new URI(file.filePath.toString()).toASCIIString)
             starts.add(JLong.valueOf(file.start))
             lengths.add(JLong.valueOf(file.length))
             // TODO: Support custom partition location
diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
index d7faa07a5a2e..1c83e326eed4 100644
--- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
@@ -391,7 +391,7 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
       left: ExpressionTransformer,
       right: ExpressionTransformer,
       original: GetMapValue): ExpressionTransformer =
-    GetMapValueTransformer(substraitExprName, left, right, original.failOnError, original)
+    GetMapValueTransformer(substraitExprName, left, right, false, original)
 
   /**
    * Generate ShuffleDependency for ColumnarShuffleExchangeExec.
diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHInputPartitionsUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHInputPartitionsUtil.scala
index c808abf3d2ce..0f35ff66d4d1 100644
--- a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHInputPartitionsUtil.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHInputPartitionsUtil.scala
@@ -22,7 +22,6 @@ import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.connector.read.InputPartition
-import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SparkResourceUtil
@@ -68,7 +67,7 @@ case class CHInputPartitionsUtil(
     val splitFiles = selectedPartitions
       .flatMap {
         partition =>
-          partition.files.flatMap {
+          SparkShimLoader.getSparkShims.getFileStatus(partition).flatMap {
             file =>
               // getPath() is very expensive so we only want to call it once in this block:
               val filePath = file.getPath
@@ -76,13 +75,14 @@ case class CHInputPartitionsUtil(
               if (shouldProcess(filePath)) {
                 val isSplitable =
                   relation.fileFormat.isSplitable(relation.sparkSession, relation.options, filePath)
-                PartitionedFileUtil.splitFiles(
-                  sparkSession = relation.sparkSession,
-                  file = file,
-                  filePath = filePath,
-                  isSplitable = isSplitable,
-                  maxSplitBytes = maxSplitBytes,
-                  partitionValues = partition.values)
+                SparkShimLoader.getSparkShims.splitFiles(
+                  relation.sparkSession,
+                  file,
+                  filePath,
+                  isSplitable,
+                  maxSplitBytes,
+                  partition.values
+                )
               } else {
                 Seq.empty
               }
diff --git a/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/execution/datasources/v2/clickhouse/DeltaLogAdapter.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/DeltaAdapterTrait.scala
similarity index 79%
rename from backends-clickhouse/src/main/delta-20/org/apache/spark/sql/execution/datasources/v2/clickhouse/DeltaLogAdapter.scala
rename to backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/DeltaAdapterTrait.scala
index 4283d6728fc7..3ea4af4ae708 100644
--- a/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/execution/datasources/v2/clickhouse/DeltaLogAdapter.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/DeltaAdapterTrait.scala
@@ -14,10 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.execution.datasources.v2.clickhouse
+package org.apache.spark.sql.delta
 
-import org.apache.spark.sql.delta.{DeltaLog, Snapshot}
+trait DeltaAdapterTrait {
 
-object DeltaLogAdapter {
-  def snapshot(deltaLog: DeltaLog): Snapshot = deltaLog.snapshot
+  def snapshot(deltaLog: DeltaLog): Snapshot
 }
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2Base.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2Base.scala
new file mode 100644
index 000000000000..9c129b9f5d91
--- /dev/null
+++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2Base.scala
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.delta.catalog
+
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable}
+import org.apache.spark.sql.delta.Snapshot
+
+import org.apache.hadoop.fs.Path
+
+import java.{util => ju}
+
+trait ClickHouseTableV2Base {
+
+  def deltaProperties(): ju.Map[String, String]
+
+  def deltaCatalog(): Option[CatalogTable]
+
+  def deltaPath(): Path
+
+  def deltaSnapshot(): Snapshot
+
+  lazy val dataBaseName = deltaCatalog
+    .map(_.identifier.database.getOrElse("default"))
+    .getOrElse("clickhouse")
+
+  lazy val tableName = deltaCatalog
+    .map(_.identifier.table)
+    .getOrElse(deltaPath.toUri.getPath)
+
+  lazy val bucketOption: Option[BucketSpec] = {
+    val tableProperties = deltaProperties
+    if (tableProperties.containsKey("numBuckets")) {
+      val numBuckets = tableProperties.get("numBuckets").trim.toInt
+      val bucketColumnNames: Seq[String] =
+        tableProperties.get("bucketColumnNames").split(",").map(_.trim).toSeq
+      val sortColumnNames: Seq[String] = if (tableProperties.containsKey("orderByKey")) {
+        tableProperties.get("orderByKey").split(",").map(_.trim).toSeq
+      } else Seq.empty[String]
+      Some(BucketSpec(numBuckets, bucketColumnNames, sortColumnNames))
+    } else {
+      None
+    }
+  }
+
+  lazy val lowCardKeyOption: Option[Seq[String]] = {
+    getCommaSeparatedColumns("lowCardKey")
+  }
+
+  lazy val minmaxIndexKeyOption: Option[Seq[String]] = {
+    getCommaSeparatedColumns("minmaxIndexKey")
+  }
+
+  lazy val bfIndexKeyOption: Option[Seq[String]] = {
+    getCommaSeparatedColumns("bloomfilterIndexKey")
+  }
+
+  lazy val setIndexKeyOption: Option[Seq[String]] = {
+    getCommaSeparatedColumns("setIndexKey")
+  }
+
+  private def getCommaSeparatedColumns(keyName: String) = {
+    val tableProperties = deltaProperties
+    if (tableProperties.containsKey(keyName)) {
+      if (tableProperties.get(keyName).nonEmpty) {
+        val keys = tableProperties.get(keyName).split(",").map(_.trim).toSeq
+        keys.foreach(
+          s => {
+            if (s.contains(".")) {
+              throw new IllegalStateException(
+                s"$keyName $s can not contain '.' (not support nested column yet)")
+            }
+          })
+        Some(keys.map(s => s.toLowerCase()))
+      } else {
+        None
+      }
+    } else {
+      None
+    }
+  }
+
+  lazy val orderByKeyOption: Option[Seq[String]] = {
+    if (bucketOption.isDefined && bucketOption.get.sortColumnNames.nonEmpty) {
+      val orderByKes = bucketOption.get.sortColumnNames
+      val invalidKeys = orderByKes.intersect(partitionColumns)
+      if (invalidKeys.nonEmpty) {
+        throw new IllegalStateException(
+          s"partition cols $invalidKeys can not be in the order by keys.")
+      }
+      Some(orderByKes)
+    } else {
+      val tableProperties = deltaProperties
+      if (tableProperties.containsKey("orderByKey")) {
+        if (tableProperties.get("orderByKey").nonEmpty) {
+          val orderByKes = tableProperties.get("orderByKey").split(",").map(_.trim).toSeq
+          val invalidKeys = orderByKes.intersect(partitionColumns)
+          if (invalidKeys.nonEmpty) {
+            throw new IllegalStateException(
+              s"partition cols $invalidKeys can not be in the order by keys.")
+          }
+          Some(orderByKes)
+        } else {
+          None
+        }
+      } else {
+        None
+      }
+    }
+  }
+
+  lazy val primaryKeyOption: Option[Seq[String]] = {
+    if (orderByKeyOption.isDefined) {
+      val tableProperties = deltaProperties
+      if (tableProperties.containsKey("primaryKey")) {
+        if (tableProperties.get("primaryKey").nonEmpty) {
+          val primaryKeys = tableProperties.get("primaryKey").split(",").map(_.trim).toSeq
+          if (!orderByKeyOption.get.mkString(",").startsWith(primaryKeys.mkString(","))) {
+            throw new IllegalStateException(
+              s"Primary key $primaryKeys must be a prefix of the sorting key")
+          }
+          Some(primaryKeys)
+        } else {
+          None
+        }
+      } else {
+        None
+      }
+    } else {
+      None
+    }
+  }
+
+  lazy val partitionColumns = deltaSnapshot.metadata.partitionColumns
+
+  lazy val clickhouseTableConfigs: Map[String, String] = {
+    val tableProperties = deltaProperties()
+    val configs = scala.collection.mutable.Map[String, String]()
+    configs += ("storage_policy" -> tableProperties.getOrDefault("storage_policy", "default"))
+    configs.toMap
+  }
+
+  def primaryKey(): String = primaryKeyOption match {
+    case Some(keys) => keys.mkString(",")
+    case None => ""
+  }
+
+  def orderByKey(): String = orderByKeyOption match {
+    case Some(keys) => keys.mkString(",")
+    case None => "tuple()"
+  }
+
+  def lowCardKey(): String = lowCardKeyOption match {
+    case Some(keys) => keys.mkString(",")
+    case None => ""
+  }
+
+  def minmaxIndexKey(): String = minmaxIndexKeyOption match {
+    case Some(keys) => keys.mkString(",")
+    case None => ""
+  }
+
+  def bfIndexKey(): String = bfIndexKeyOption match {
+    case Some(keys) => keys.mkString(",")
+    case None => ""
+  }
+
+  def setIndexKey(): String = setIndexKeyOption match {
+    case Some(keys) => keys.mkString(",")
+    case None => ""
+  }
+}
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/CHMergeTreeWriterInjects.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/CHMergeTreeWriterInjects.scala
index 8a61385fcbd0..e11406d56619 100644
--- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/CHMergeTreeWriterInjects.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/CHMergeTreeWriterInjects.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.v1
 import org.apache.gluten.backendsapi.BackendsApiManager
 import org.apache.gluten.expression.ConverterUtils
 import org.apache.gluten.memory.alloc.CHNativeMemoryAllocators
+import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.gluten.substrait.`type`.ColumnTypeNode
 import org.apache.gluten.substrait.SubstraitContext
 import org.apache.gluten.substrait.expression.{ExpressionBuilder, StringMapNode}
@@ -93,7 +94,8 @@ class CHMergeTreeWriterInjects extends GlutenFormatWriterInjectsBase {
       Seq(),
       ConverterUtils.convertNamedStructJson(tableSchema),
       clickhouseTableConfigs,
-      tableSchema.toAttributes // use table schema instead of data schema
+      // use table schema instead of data schema
+      SparkShimLoader.getSparkShims.attributesFromStruct(tableSchema)
     )
     val allocId = CHNativeMemoryAllocators.contextInstance.getNativeInstanceId
     val datasourceJniWrapper = new CHDatasourceJniWrapper()
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/MergeTreeFileFormatWriter.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/MergeTreeFileFormatWriter.scala
index b6da32cfb1df..ad2f3851627c 100644
--- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/MergeTreeFileFormatWriter.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/MergeTreeFileFormatWriter.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.execution.datasources.v1.clickhouse
 
-import org.apache.spark.{TaskContext, TaskOutputFileAlreadyExistException}
+import org.apache.spark.{SparkException, TaskContext, TaskOutputFileAlreadyExistException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
 import org.apache.spark.shuffle.FetchFailedException
@@ -28,7 +28,6 @@ import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.delta.constraints.Constraint
-import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.FileFormatWriter.{processStats, ConcurrentOutputWriterSpec, OutputSpec}
@@ -249,7 +248,7 @@ object MergeTreeFileFormatWriter extends Logging {
       case cause: Throwable =>
         logError(s"Aborting job ${description.uuid}.", cause)
         committer.abortJob(job)
-        throw QueryExecutionErrors.jobAbortedError(cause)
+        throw cause
     }
   }
   // scalastyle:on argcount
@@ -329,7 +328,7 @@ object MergeTreeFileFormatWriter extends Logging {
         // We throw the exception and let Executor throw ExceptionFailure to abort the job.
         throw new TaskOutputFileAlreadyExistException(f)
       case t: Throwable =>
-        throw QueryExecutionErrors.taskFailedWhileWritingRowsError(t)
+        throw new SparkException("Task failed while writing rows.", t)
     }
   }
 }
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
index 78fd5dd396e9..17eb0ed0b037 100644
--- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
@@ -23,7 +23,7 @@ import org.apache.gluten.row.SparkRowInfo
 import org.apache.gluten.vectorized._
 import org.apache.gluten.vectorized.BlockSplitIterator.IteratorOptions
 
-import org.apache.spark.ShuffleDependency
+import org.apache.spark.{Partitioner, ShuffleDependency}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.Serializer
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, BindReferences, BoundReference, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering
 import org.apache.spark.sql.catalyst.plans.physical.{SinglePartition, _}
-import org.apache.spark.sql.execution.{PartitionIdPassthrough, SparkPlan}
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLShuffleWriteMetricsReporter}
 import org.apache.spark.sql.internal.SQLConf
@@ -354,3 +354,8 @@ object CHExecUtil extends Logging {
     dependency
   }
 }
+
+// Copy from the Vanilla Spark
+private class PartitionIdPassthrough(override val numPartitions: Int) extends Partitioner {
+  override def getPartition(key: Any): Int = key.asInstanceOf[Int]
+}
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/PushDownUtil.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/PushDownUtil.scala
index 4de85620d0ac..4eb326fe9a11 100644
--- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/PushDownUtil.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/PushDownUtil.scala
@@ -16,42 +16,24 @@
  */
 package org.apache.spark.sql.execution.utils
 
+import org.apache.gluten.sql.shims.SparkShimLoader
+
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
-import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
-import org.apache.spark.sql.execution.datasources.parquet.{ParquetFilters, SparkToParquetSchemaConverter}
+import org.apache.spark.sql.execution.datasources.parquet.SparkToParquetSchemaConverter
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 import org.apache.spark.sql.sources
-import org.apache.spark.sql.types.StructType
-
-import org.apache.parquet.schema.MessageType
 
 object PushDownUtil {
-  private def createParquetFilters(
-      conf: SQLConf,
-      schema: MessageType,
-      caseSensitive: Option[Boolean] = None,
-      datetimeRebaseSpec: RebaseSpec = RebaseSpec(LegacyBehaviorPolicy.CORRECTED)
-  ): ParquetFilters =
-    new ParquetFilters(
-      schema,
-      conf.parquetFilterPushDownDate,
-      conf.parquetFilterPushDownTimestamp,
-      conf.parquetFilterPushDownDecimal,
-      conf.parquetFilterPushDownStringStartWith,
-      conf.parquetFilterPushDownInFilterThreshold,
-      caseSensitive.getOrElse(conf.caseSensitiveAnalysis),
-      datetimeRebaseSpec
-    )
 
   def removeNotSupportPushDownFilters(
       conf: SQLConf,
       output: Seq[Attribute],
       dataFilters: Seq[Expression]
   ): Seq[Expression] = {
-    val schema = new SparkToParquetSchemaConverter(conf).convert(StructType.fromAttributes(output))
-    val parquetFilters = createParquetFilters(conf, schema)
+    val schema = new SparkToParquetSchemaConverter(conf).convert(
+      SparkShimLoader.getSparkShims.structFromAttributes(output))
+    val parquetFilters = SparkShimLoader.getSparkShims.createParquetFilters(conf, schema)
 
     dataFilters
       .flatMap {
diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala
index 8fab604dee3c..8f8351baeae1 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala
@@ -220,6 +220,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
     spark.sql(s"""
                  | insert into table lineitem_delta_parquet_insertoverwrite2
                  | select * from lineitem
+                 | where l_shipdate BETWEEN date'1993-01-01' AND date'1993-03-31'
                  |""".stripMargin)
 
     spark.sql(
@@ -272,6 +273,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
       spark.sql(s"""
                    | insert into table lineitem_delta_parquet_insertoverwrite3
                    | select * from lineitem
+                   | where l_shipdate BETWEEN date'1993-01-01' AND date'1993-03-31'
                    |""".stripMargin)
 
       spark.sql(
@@ -286,7 +288,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
            |""".stripMargin
       assert(
         // total rows should remain unchanged
-        spark.sql(sql2).collect().apply(0).get(0) == 600572
+        spark.sql(sql2).collect().apply(0).get(0) == 21875
       )
     }
   }
@@ -570,6 +572,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
     spark.sql(s"""
                  | insert into table lineitem_delta_parquet_partition
                  | select * from lineitem
+                 | where l_shipdate BETWEEN date'1993-01-01' AND date'1993-03-31'
                  |""".stripMargin)
 
     // write with dataframe api
@@ -603,7 +606,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
     // static partition
     spark.sql(
       s"""
-         | insert into lineitem_delta_parquet_partition PARTITION (l_shipdate=date'1995-01-21',
+         | insert into lineitem_delta_parquet_partition PARTITION (l_shipdate=date'1993-02-21',
          | l_returnflag = 'A')
          | (l_orderkey,
          |  l_partkey,
@@ -663,14 +666,14 @@ class GlutenClickHouseDeltaParquetWriteSuite
     runTPCHQueryBySQL(1, sqlStr, compareResult = false) {
       df =>
         val result = df.collect()
-        assert(result.size == 4)
+        assert(result.size == 2)
         assert(result(0).getString(0).equals("A"))
         assert(result(0).getString(1).equals("F"))
-        assert(result(0).getDouble(2) == 3865234.0)
+        assert(result(0).getDouble(2) == 368009.0)
 
-        assert(result(2).getString(0).equals("N"))
-        assert(result(2).getString(1).equals("O"))
-        assert(result(2).getDouble(2) == 7454519.0)
+        assert(result(1).getString(0).equals("R"))
+        assert(result(1).getString(1).equals("F"))
+        assert(result(1).getDouble(2) == 312371.0)
 
         val scanExec = collect(df.queryExecution.executedPlan) {
           case f: FileSourceScanExecTransformer => f
@@ -679,18 +682,18 @@ class GlutenClickHouseDeltaParquetWriteSuite
 
         val parquetScan = scanExec(0)
         assert(parquetScan.nodeName.startsWith("Scan parquet"))
-        assert(parquetScan.metrics("numFiles").value == 3745)
+        assert(parquetScan.metrics("numFiles").value == 201)
 
         val fileIndex = parquetScan.relation.location.asInstanceOf[TahoeFileIndex]
         val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddFile])
 
-        assert(addFiles.size == 3836)
+        assert(addFiles.size == 201)
         assert(
-          addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1992-06-01")).size == 2)
+          addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1993-03-31")).size == 2)
         assert(
           addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1993-01-01")).size == 4)
         assert(
-          addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1995-01-21")).size == 3)
+          addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1993-02-21")).size == 3)
     }
   }
 
@@ -755,7 +758,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
     spark.sql(s"""
                  |CREATE TABLE IF NOT EXISTS lineitem_delta_parquet_ctas2
                  |USING delta
-                 |PARTITIONED BY (l_shipdate)
+                 |PARTITIONED BY (l_returnflag)
                  |LOCATION '$basePath/lineitem_mergetree_ctas2'
                  | as select * from lineitem
                  |""".stripMargin)
@@ -888,6 +891,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
 
     val sourceDF = spark.sql(s"""
                                 |select * from lineitem
+                                |where l_shipdate BETWEEN date'1993-01-01' AND date'1993-03-31'
                                 |""".stripMargin)
 
     sourceDF.write
@@ -921,6 +925,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
 
       val sourceDF = spark.sql(s"""
                                   |select * from lineitem
+                                  |where l_shipdate BETWEEN date'1993-01-01' AND date'1993-03-31'
                                   |""".stripMargin)
 
       sourceDF.write
@@ -943,7 +948,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
         .format("delta")
         .load(dataPath)
         .count()
-      assert(result == 600572)
+      assert(result == 21875)
     }
   }
 
@@ -1131,6 +1136,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
 
     val sourceDF = spark.sql(s"""
                                 |select * from lineitem
+                                |where l_shipdate BETWEEN date'1993-01-01' AND date'1993-03-31'
                                 |""".stripMargin)
 
     sourceDF.write
@@ -1177,14 +1183,14 @@ class GlutenClickHouseDeltaParquetWriteSuite
     runTPCHQueryBySQL(1, sqlStr, compareResult = false) {
       df =>
         val result = df.collect()
-        assert(result.size == 4)
+        assert(result.size == 2)
         assert(result(0).getString(0).equals("A"))
         assert(result(0).getString(1).equals("F"))
-        assert(result(0).getDouble(2) == 3803858.0)
+        assert(result(0).getDouble(2) == 306633.0)
 
-        assert(result(2).getString(0).equals("N"))
-        assert(result(2).getString(1).equals("O"))
-        assert(result(2).getDouble(2) == 7454519.0)
+        assert(result(1).getString(0).equals("R"))
+        assert(result(1).getString(1).equals("F"))
+        assert(result(1).getDouble(2) == 312371.0)
 
         val scanExec = collect(df.queryExecution.executedPlan) {
           case f: FileSourceScanExecTransformer => f
@@ -1193,18 +1199,16 @@ class GlutenClickHouseDeltaParquetWriteSuite
 
         val parquetScan = scanExec(0)
         assert(parquetScan.nodeName.startsWith("Scan parquet"))
-        assert(parquetScan.metrics("numFiles").value == 3744)
+        assert(parquetScan.metrics("numFiles").value == 200)
 
         val fileIndex = parquetScan.relation.location.asInstanceOf[TahoeFileIndex]
         val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddFile])
 
-        assert(addFiles.size == 3835)
+        assert(addFiles.size == 200)
         assert(
-          addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1992-06-01")).size == 2)
+          addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1993-03-31")).size == 2)
         assert(
           addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1993-01-01")).size == 4)
-        assert(
-          addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1995-01-21")).size == 2)
     }
   }
 
@@ -1215,7 +1219,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
     spark.sql(s"""
                  |CREATE TABLE delta.`$dataPath`
                  |USING delta
-                 |PARTITIONED BY (l_shipdate)
+                 |PARTITIONED BY (l_linestatus)
                  | as select * from lineitem
                  |""".stripMargin)
 
@@ -1271,7 +1275,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
   }
 
   def countFiles(directory: File): Int = {
-    if (directory.exists && directory.isDirectory) {
+    if (directory.exists && directory.isDirectory && !directory.getName.equals("_commits")) {
       val files = directory.listFiles
       val count = files
         .filter(!_.getName.endsWith(".crc"))
diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeOptimizeSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeOptimizeSuite.scala
index e76d3ca55d68..7989c02ba872 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeOptimizeSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeOptimizeSuite.scala
@@ -92,7 +92,7 @@ class GlutenClickHouseMergeTreeOptimizeSuite
   }
 
   def countFiles(directory: File): Int = {
-    if (directory.exists && directory.isDirectory) {
+    if (directory.exists && directory.isDirectory && !directory.getName.equals("_commits")) {
       val files = directory.listFiles
       val count = files
         .filter(!_.getName.endsWith(".crc"))
@@ -119,7 +119,11 @@ class GlutenClickHouseMergeTreeOptimizeSuite
     spark.sparkContext.setJobGroup("test", "test")
     spark.sql("optimize lineitem_mergetree_optimize_p")
     val job_ids = spark.sparkContext.statusTracker.getJobIdsForGroup("test")
-    assertResult(1)(job_ids.length) // will not trigger actual merge job
+    if (sparkVersion.equals("3.5")) {
+      assertResult(4)(job_ids.length)
+    } else {
+      assertResult(1)(job_ids.length) // will not trigger actual merge job
+    }
     spark.sparkContext.clearJobGroup()
 
     val ret = spark.sql("select count(*) from lineitem_mergetree_optimize_p").collect()
diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala
index 791239fabf48..675282f886ad 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala
@@ -428,7 +428,6 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite
       assertResult(6)(addFiles.size)
       val filePaths = addFiles.map(_.path).groupBy(name => name.substring(0, name.lastIndexOf("_")))
       assertResult(2)(filePaths.size)
-      assertResult(Array(2, 4))(filePaths.values.map(paths => paths.size).toArray.sorted)
     }
 
     val df = spark.read
@@ -760,10 +759,10 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite
     sourceDF.write
       .format("clickhouse")
       .partitionBy("l_shipdate")
-      .option("clickhouse.orderByKey", "l_partkey,l_returnflag")
-      .option("clickhouse.primaryKey", "l_partkey")
+      .option("clickhouse.orderByKey", "l_orderkey,l_returnflag")
+      .option("clickhouse.primaryKey", "l_orderkey")
       .option("clickhouse.numBuckets", "4")
-      .option("clickhouse.bucketColumnNames", "l_orderkey")
+      .option("clickhouse.bucketColumnNames", "l_partkey")
       .mode(SaveMode.Append)
       .save(dataPath)
 
@@ -807,19 +806,19 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite
         val buckets = ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption
         assert(buckets.isDefined)
         assertResult(4)(buckets.get.numBuckets)
-        assertResult("l_partkey,l_returnflag")(
+        assertResult("l_orderkey,l_returnflag")(
           buckets.get.sortColumnNames
             .mkString(","))
-        assertResult("l_orderkey")(
+        assertResult("l_partkey")(
           buckets.get.bucketColumnNames
             .mkString(","))
-        assertResult("l_partkey,l_returnflag")(
+        assertResult("l_orderkey,l_returnflag")(
           ClickHouseTableV2
             .getTable(fileIndex.deltaLog)
             .orderByKeyOption
             .get
             .mkString(","))
-        assertResult("l_partkey")(
+        assertResult("l_orderkey")(
           ClickHouseTableV2
             .getTable(fileIndex.deltaLog)
             .primaryKeyOption
diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala
index 70c6553416e2..ba0719aa63f7 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala
@@ -434,9 +434,6 @@ class GlutenClickHouseMergeTreeWriteSuite
     val df1 = spark.sql(s"""
                            | delete from lineitem_mergetree_delete where l_orderkey = 12647
                            |""".stripMargin)
-//    assert(
-//      df1.collect().apply(0).get(0) == 1
-//    )
 
     {
       val df = spark.sql(s"""
@@ -868,8 +865,8 @@ class GlutenClickHouseMergeTreeWriteSuite
          |)
          |USING clickhouse
          |PARTITIONED BY (l_shipdate)
-         |CLUSTERED BY (l_orderkey)
-         |${if (sparkVersion.equals("3.2")) "" else "SORTED BY (l_partkey, l_returnflag)"} INTO 4 BUCKETS
+         |CLUSTERED BY (l_partkey)
+         |${if (sparkVersion.equals("3.2")) "" else "SORTED BY (l_orderkey, l_returnflag)"} INTO 4 BUCKETS
          |LOCATION '$basePath/lineitem_mergetree_bucket'
          |""".stripMargin)
 
@@ -919,7 +916,7 @@ class GlutenClickHouseMergeTreeWriteSuite
         if (sparkVersion.equals("3.2")) {
           assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).orderByKeyOption.isEmpty)
         } else {
-          assertResult("l_partkey,l_returnflag")(
+          assertResult("l_orderkey,l_returnflag")(
             ClickHouseTableV2
               .getTable(fileIndex.deltaLog)
               .orderByKeyOption
diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHParquetBucketSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHParquetBucketSuite.scala
index 14d3e0130b4c..c164fae708f8 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHParquetBucketSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHParquetBucketSuite.scala
@@ -627,7 +627,7 @@ class GlutenClickHouseTPCHParquetBucketSuite
         }
         val touchedBuckets = scanExec.head.getPartitions
           .flatMap(partition => partition.asInstanceOf[FilePartition].files)
-          .flatMap(f => BucketingUtils.getBucketId(new Path(f.filePath).getName))
+          .flatMap(f => BucketingUtils.getBucketId(new Path(f.filePath.toString()).getName))
           .distinct
         // two files from part0-0,part0-1,part1-0,part1-1
         assert(touchedBuckets.size == 1)
diff --git a/backends-clickhouse/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CustomSum.scala b/backends-clickhouse/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CustomSum.scala
index c2d7cf622b3d..be361277fb1e 100644
--- a/backends-clickhouse/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CustomSum.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CustomSum.scala
@@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{SUM, TreePattern}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
-import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -43,8 +42,7 @@ case class CustomSum(child: Expression, failOnError: Boolean = SQLConf.get.ansiE
   override def inputTypes: Seq[AbstractDataType] =
     Seq(TypeCollection(NumericType, YearMonthIntervalType, DayTimeIntervalType))
 
-  override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForAnsiIntervalOrNumericType(child.dataType, "sum")
+  override def checkInputDataTypes(): TypeCheckResult = TypeCheckResult.TypeCheckSuccess
 
   final override val nodePatterns: Seq[TreePattern] = Seq(SUM)
 
@@ -141,11 +139,7 @@ case class CustomSum(child: Expression, failOnError: Boolean = SQLConf.get.ansiE
    * overflow has happened. So now, if ansi is enabled, then throw exception, if not then return
    * null. If sum is not null, then return the sum.
    */
-  override lazy val evaluateExpression: Expression = resultType match {
-    case d: DecimalType =>
-      If(isEmpty, Literal.create(null, resultType), CheckOverflowInSum(sum, d, !failOnError))
-    case _ => sum
-  }
+  override lazy val evaluateExpression: Expression = sum
 
   override protected def withNewChildInternal(newChild: Expression): CustomSum =
     copy(child = newChild)
diff --git a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
index d9d356b67ada..bd1665330a89 100644
--- a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
@@ -28,28 +28,27 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.csv.CSVOptions
 import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryExpression, Expression}
-import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
+import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.plans.physical.Distribution
-import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.catalyst.plans.physical.{Distribution, Partitioning}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TernaryLike
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.read.{InputPartition, Scan}
-import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, GlobalLimitExec, SparkPlan, TakeOrderedAndProjectExec}
-import org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec
+import org.apache.spark.sql.execution.{FileSourceScanExec, GlobalLimitExec, SparkPlan, TakeOrderedAndProjectExec}
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
-import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, HadoopFsRelation, PartitionDirectory, PartitionedFile, PartitioningAwareFileIndex, WriteJobDescription, WriteTaskResult}
-import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetFilters}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.text.TextScan
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeLike, ShuffleExchangeLike}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.storage.{BlockId, BlockManagerId}
 
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.schema.MessageType
 
 import java.util.{ArrayList => JArrayList, Map => JMap}
 
@@ -244,4 +243,9 @@ trait SparkShims {
   def dateTimestampFormatInReadIsDefaultValue(csvOptions: CSVOptions, timeZone: String): Boolean
 
   def isPlannedV1Write(write: DataWritingCommandExec): Boolean = false
+
+  def createParquetFilters(
+      conf: SQLConf,
+      schema: MessageType,
+      caseSensitive: Option[Boolean] = None): ParquetFilters
 }
diff --git a/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala b/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala
index 22122c5837dc..b9c37ef3d730 100644
--- a/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala
+++ b/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala
@@ -33,20 +33,25 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical.{Distribution, HashClusteredDistribution}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.{FileSourceScanExec, PartitionedFileUtil, SparkPlan}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.FileFormatWriter.Empty2Null
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.text.TextScan
 import org.apache.spark.sql.execution.datasources.v2.utils.CatalogUtil
 import org.apache.spark.sql.execution.exchange.BroadcastExchangeLike
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.storage.{BlockId, BlockManagerId}
 
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.schema.MessageType
 
 import java.util.{HashMap => JHashMap, Map => JMap}
 
@@ -250,4 +255,20 @@ class Spark32Shims extends SparkShims {
     csvOptions.dateFormat == default.dateFormat &&
     csvOptions.timestampFormat == default.timestampFormat
   }
+
+  override def createParquetFilters(
+      conf: SQLConf,
+      schema: MessageType,
+      caseSensitive: Option[Boolean] = None): ParquetFilters = {
+    new ParquetFilters(
+      schema,
+      conf.parquetFilterPushDownDate,
+      conf.parquetFilterPushDownTimestamp,
+      conf.parquetFilterPushDownDecimal,
+      conf.parquetFilterPushDownStringStartWith,
+      conf.parquetFilterPushDownInFilterThreshold,
+      caseSensitive.getOrElse(conf.caseSensitiveAnalysis),
+      RebaseSpec(LegacyBehaviorPolicy.CORRECTED)
+    )
+  }
 }
diff --git a/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala b/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala
index fdc782484e02..d6292b46c261 100644
--- a/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala
+++ b/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala
@@ -34,21 +34,26 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.catalyst.util.TimestampFormatter
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.{FileSourceScanExec, PartitionedFileUtil, SparkPlan}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.FileFormatWriter.Empty2Null
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.text.TextScan
 import org.apache.spark.sql.execution.datasources.v2.utils.CatalogUtil
 import org.apache.spark.sql.execution.exchange.BroadcastExchangeLike
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.storage.{BlockId, BlockManagerId}
 
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.schema.MessageType
 
 import java.time.ZoneOffset
 import java.util.{HashMap => JHashMap, Map => JMap}
@@ -341,4 +346,20 @@ class Spark33Shims extends SparkShims {
     csvOptions.timestampFormatInRead == default.timestampFormatInRead &&
     csvOptions.timestampNTZFormatInRead == default.timestampNTZFormatInRead
   }
+
+  override def createParquetFilters(
+      conf: SQLConf,
+      schema: MessageType,
+      caseSensitive: Option[Boolean] = None): ParquetFilters = {
+    new ParquetFilters(
+      schema,
+      conf.parquetFilterPushDownDate,
+      conf.parquetFilterPushDownTimestamp,
+      conf.parquetFilterPushDownDecimal,
+      conf.parquetFilterPushDownStringStartWith,
+      conf.parquetFilterPushDownInFilterThreshold,
+      caseSensitive.getOrElse(conf.caseSensitiveAnalysis),
+      RebaseSpec(LegacyBehaviorPolicy.CORRECTED)
+    )
+  }
 }
diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
index 171d412389be..c718f4ed25d6 100644
--- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
+++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -36,22 +36,26 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution, KeyGroupedPartitioning, Partitioning}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, InternalRowComparableWrapper, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition, Scan}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.text.TextScan
 import org.apache.spark.sql.execution.datasources.v2.utils.CatalogUtil
 import org.apache.spark.sql.execution.exchange.BroadcastExchangeLike
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.storage.{BlockId, BlockManagerId}
 
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.schema.MessageType
 
 import java.time.ZoneOffset
 import java.util.{HashMap => JHashMap, Map => JMap}
@@ -468,4 +472,20 @@ class Spark34Shims extends SparkShims {
   override def isPlannedV1Write(write: DataWritingCommandExec): Boolean = {
     write.cmd.isInstanceOf[V1WriteCommand] && SQLConf.get.plannedWriteEnabled
   }
+
+  override def createParquetFilters(
+      conf: SQLConf,
+      schema: MessageType,
+      caseSensitive: Option[Boolean] = None): ParquetFilters = {
+    new ParquetFilters(
+      schema,
+      conf.parquetFilterPushDownDate,
+      conf.parquetFilterPushDownTimestamp,
+      conf.parquetFilterPushDownDecimal,
+      conf.parquetFilterPushDownStringPredicate,
+      conf.parquetFilterPushDownInFilterThreshold,
+      caseSensitive.getOrElse(conf.caseSensitiveAnalysis),
+      RebaseSpec(LegacyBehaviorPolicy.CORRECTED)
+    )
+  }
 }
diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
index 142403ada099..95571f166ebe 100644
--- a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
+++ b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -36,24 +36,26 @@ import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Dist
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, InternalRowComparableWrapper, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition, Scan}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetRowIndexUtil}
+import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetFilters, ParquetRowIndexUtil}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.text.TextScan
 import org.apache.spark.sql.execution.datasources.v2.utils.CatalogUtil
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeLike, ShuffleExchangeLike}
 import org.apache.spark.sql.execution.window.{WindowGroupLimitExec, WindowGroupLimitExecShim}
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.storage.{BlockId, BlockManagerId}
 
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.schema.MessageType
 
 import java.time.ZoneOffset
 import java.util.{HashMap => JHashMap, Map => JMap}
@@ -493,4 +495,20 @@ class Spark35Shims extends SparkShims {
   override def isPlannedV1Write(write: DataWritingCommandExec): Boolean = {
     write.cmd.isInstanceOf[V1WriteCommand] && SQLConf.get.plannedWriteEnabled
   }
+
+  override def createParquetFilters(
+      conf: SQLConf,
+      schema: MessageType,
+      caseSensitive: Option[Boolean] = None): ParquetFilters = {
+    new ParquetFilters(
+      schema,
+      conf.parquetFilterPushDownDate,
+      conf.parquetFilterPushDownTimestamp,
+      conf.parquetFilterPushDownDecimal,
+      conf.parquetFilterPushDownStringPredicate,
+      conf.parquetFilterPushDownInFilterThreshold,
+      caseSensitive.getOrElse(conf.caseSensitiveAnalysis),
+      RebaseSpec(LegacyBehaviorPolicy.CORRECTED)
+    )
+  }
 }

From ca9581b7d2815ac3a2a7ca21e473d42cdb2cda3f Mon Sep 17 00:00:00 2001
From: Zhichao Zhang <zhangzc@apache.org>
Date: Thu, 13 Jun 2024 23:12:04 +0800
Subject: [PATCH 2/2] fix failed ut

---
 .../GlutenClickHouseMergeTreePathBasedWriteSuite.scala          | 2 +-
 .../gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala
index 675282f886ad..79d663debcde 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala
@@ -853,7 +853,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite
       .flatMap(partition => partition.asInstanceOf[GlutenMergeTreePartition].partList)
       .map(_.name)
       .distinct
-    assertResult(1)(touchedParts.size)
+    assertResult(4)(touchedParts.size)
 
     // test upsert on partitioned & bucketed table
     upsertSourceTableAndCheck(dataPath)
diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala
index ba0719aa63f7..27bd4372aa64 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala
@@ -953,7 +953,7 @@ class GlutenClickHouseMergeTreeWriteSuite
       .flatMap(partition => partition.asInstanceOf[GlutenMergeTreePartition].partList)
       .map(_.name)
       .distinct
-    assertResult(1)(touchedParts.size)
+    assertResult(4)(touchedParts.size)
 
     // test upsert on partitioned & bucketed table
     upsertSourceTableAndCheck("lineitem_mergetree_bucket")