From 221f0f849c1e463dcecae7c515e23281d263f623 Mon Sep 17 00:00:00 2001
From: loudongfeng <nemonlou@qq.com>
Date: Tue, 20 Aug 2024 10:41:38 +0800
Subject: [PATCH 1/4] [CH]duplicate column name case support in broadcast join
 #6926 (#6927)

What changes were proposed in this pull request?
Fixes: #6926

How was this patch tested?
by UT
---
 .../GlutenClickhouseFunctionSuite.scala       | 20 ++++++++++++++++
 .../Join/BroadCastJoinBuilder.cpp             | 23 +++++++++++++++----
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
index ac18f256e8074..4130ea348fb45 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
@@ -198,4 +198,24 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite {
     }
   }
 
+  test("duplicate column name issue") {
+    withTable("left_table", "right_table") {
+      sql("create table left_table(id int, name string) using orc")
+      sql("create table right_table(id int, book string) using orc")
+      sql("insert into left_table values (1,'a'),(2,'b'),(3,'c'),(4,'d')")
+      sql("insert into right_table values (1,'a'),(1,'b'),(2,'c'),(2,'d')")
+      compareResultsAgainstVanillaSpark(
+        """
+          |select p1.id, p1.name, p2.book
+          | from left_table p1 left join
+          | (select id, id, book
+          |    from right_table where id <= 2) p2
+          | on p1.id=p2.id
+          |""".stripMargin,
+        true,
+        { _ => }
+      )
+    }
+  }
+
 }
diff --git a/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp b/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
index f47f423df89b3..da301dcb89f87 100644
--- a/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
+++ b/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
@@ -57,13 +57,26 @@ jlong callJavaGet(const std::string & id)
 DB::Block resetBuildTableBlockName(Block & block, bool only_one = false)
 {
     DB::ColumnsWithTypeAndName new_cols;
+    std::set<std::string> names;
+    int32_t seq = 0;
     for (const auto & col : block)
     {
-        // Add a prefix to avoid column name conflicts with left table.
-        new_cols.emplace_back(col.column, col.type, BlockUtil::RIHGT_COLUMN_PREFIX + col.name);
-
-        if (only_one)
-            break;
+      // Add a prefix to avoid column name conflicts with left table.
+      std::stringstream new_name;
+      // add a sequence to avoid duplicate name in some rare cases
+      if (names.find(col.name) == names.end())
+      {
+         new_name << BlockUtil::RIHGT_COLUMN_PREFIX << col.name;
+         names.insert(col.name);
+      }
+      else
+      {
+        new_name << BlockUtil::RIHGT_COLUMN_PREFIX  << (seq++) << "_" << col.name;
+      }
+      new_cols.emplace_back(col.column, col.type, new_name.str());
+
+      if (only_one)
+        break;
     }
     return DB::Block(new_cols);
 }

From af2d4295a6519d78ae365defcfb505ab76b2a894 Mon Sep 17 00:00:00 2001
From: Chang chen <chang.chen@kyligence.io>
Date: Tue, 20 Aug 2024 11:24:49 +0800
Subject: [PATCH 2/4] [GLUTEN-3582][CH] Fix bug for decimal and float type
 (#6925)

* [GLUTEN-3582][CH] Fix bug for decimal and float type

* fix style

* fix velox compile issue by introducing def enableSuite(suiteName: String) to avoid explicitly refer test suite

* Spark 35 CI Pipeline is not ready, let's ignore it first
---
 cpp-ch/local-engine/Common/GlutenConfig.h     |   7 +-
 cpp-ch/local-engine/Parser/JoinRelParser.cpp  |   2 +-
 .../Storages/Parquet/ParquetConverter.h       |  96 ++++++++---
 .../tests/decmial_filter_push_down/18_2.json  | 160 ++++++++++++++++++
 .../18_2_flba.snappy.parquet                  | Bin 0 -> 488 bytes
 .../tests/gtest_parquet_columnindex_bug.cpp   |  55 ++++++
 .../tests/json/gtest_local_engine_config.json |   8 +
 .../gluten/utils/BackendTestSettings.scala    |   5 +-
 gluten-ut/spark33/pom.xml                     |  22 +++
 .../parquet/GlutenParquetV1FilterSuite2.scala |  27 +++
 .../clickhouse/ClickHouseTestSettings.scala   |  14 ++
 .../GlutenColumnarWriteTestSupport.scala      |   5 +-
 .../parquet/GlutenParquetV1FilterSuite2.scala |  30 ++++
 .../clickhouse/ClickHouseTestSettings.scala   |   2 +
 14 files changed, 403 insertions(+), 30 deletions(-)
 create mode 100644 cpp-ch/local-engine/tests/decmial_filter_push_down/18_2.json
 create mode 100644 cpp-ch/local-engine/tests/decmial_filter_push_down/18_2_flba.snappy.parquet
 create mode 100644 cpp-ch/local-engine/tests/gtest_parquet_columnindex_bug.cpp
 create mode 100644 gluten-ut/spark33/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala
 create mode 100644 gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala

diff --git a/cpp-ch/local-engine/Common/GlutenConfig.h b/cpp-ch/local-engine/Common/GlutenConfig.h
index ac82b0fff03af..02bb8a9f4c04d 100644
--- a/cpp-ch/local-engine/Common/GlutenConfig.h
+++ b/cpp-ch/local-engine/Common/GlutenConfig.h
@@ -17,9 +17,9 @@
 
 #pragma once
 
-#include <base/unit.h>
-#include <base/types.h>
 #include <Interpreters/Context.h>
+#include <base/types.h>
+#include <base/unit.h>
 
 namespace local_engine
 {
@@ -104,7 +104,7 @@ struct JoinConfig
     bool prefer_multi_join_on_clauses = true;
     size_t multi_join_on_clauses_build_side_rows_limit = 10000000;
 
-    static JoinConfig loadFromContext(DB::ContextPtr context)
+    static JoinConfig loadFromContext(const DB::ContextPtr & context)
     {
         JoinConfig config;
         config.prefer_multi_join_on_clauses = context->getConfigRef().getBool(PREFER_MULTI_JOIN_ON_CLAUSES, true);
@@ -198,4 +198,3 @@ struct GlutenJobSchedulerConfig
     }
 };
 }
-
diff --git a/cpp-ch/local-engine/Parser/JoinRelParser.cpp b/cpp-ch/local-engine/Parser/JoinRelParser.cpp
index ef19e007d4398..30651aff1b843 100644
--- a/cpp-ch/local-engine/Parser/JoinRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/JoinRelParser.cpp
@@ -209,7 +209,7 @@ DB::QueryPlanPtr JoinRelParser::parseJoin(const substrait::JoinRel & join, DB::Q
     google::protobuf::StringValue optimization_info;
     optimization_info.ParseFromString(join.advanced_extension().optimization().value());
     auto join_opt_info = JoinOptimizationInfo::parse(optimization_info.value());
-    LOG_ERROR(getLogger("JoinRelParser"), "optimizaiton info:{}", optimization_info.value());
+    LOG_DEBUG(getLogger("JoinRelParser"), "optimization info:{}", optimization_info.value());
     auto storage_join = join_opt_info.is_broadcast ? BroadCastJoinBuilder::getJoin(join_opt_info.storage_join_key) : nullptr;
     if (storage_join)
     {
diff --git a/cpp-ch/local-engine/Storages/Parquet/ParquetConverter.h b/cpp-ch/local-engine/Storages/Parquet/ParquetConverter.h
index 312cea7efc0af..0ac16c11104d8 100644
--- a/cpp-ch/local-engine/Storages/Parquet/ParquetConverter.h
+++ b/cpp-ch/local-engine/Storages/Parquet/ParquetConverter.h
@@ -35,12 +35,23 @@ template <typename PhysicalType>
 struct ToParquet
 {
     using T = typename PhysicalType::c_type;
-    T as(const DB::Field & value, const parquet::ColumnDescriptor &)
+    T as(const DB::Field & value, const parquet::ColumnDescriptor & s)
     {
-        if constexpr (std::is_same_v<PhysicalType, parquet::Int32Type>)
-            return static_cast<T>(value.safeGet<Int64>());
+        if (s.logical_type()->is_decimal())
+        {
+            if constexpr (std::is_same_v<PhysicalType, parquet::Int32Type>)
+            {
+                const auto v = value.safeGet<DB::DecimalField<DB::Decimal32>>();
+                return v.getValue().value;
+            }
+            if constexpr (std::is_same_v<PhysicalType, parquet::Int64Type>)
+            {
+                const auto v = value.safeGet<DB::DecimalField<DB::Decimal64>>();
+                return v.getValue().value;
+            }
+        }
         // parquet::BooleanType, parquet::Int64Type, parquet::FloatType, parquet::DoubleType
-        return value.safeGet<T>(); // FLOAT, DOUBLE, INT64
+        return value.safeGet<T>(); // FLOAT, DOUBLE, INT64, Int32
     }
 };
 
@@ -57,28 +68,44 @@ struct ToParquet<parquet::ByteArrayType>
     }
 };
 
+template <typename T>
+parquet::FixedLenByteArray convertField(const DB::Field & value, uint8_t * buf, size_t type_length)
+{
+    assert(sizeof(T) >= type_length);
+
+    T val = value.safeGet<DB::DecimalField<DB::Decimal<T>>>().getValue().value;
+    std::reverse(reinterpret_cast<char *>(&val), reinterpret_cast<char *>(&val) + sizeof(T));
+    const int offset = sizeof(T) - type_length;
+
+    memcpy(buf, reinterpret_cast<char *>(&val) + offset, type_length);
+    return parquet::FixedLenByteArray(buf);
+}
+
 template <>
 struct ToParquet<parquet::FLBAType>
 {
-    uint8_t buf[256];
+    uint8_t buf[16];
     using T = parquet::FixedLenByteArray;
     T as(const DB::Field & value, const parquet::ColumnDescriptor & descriptor)
     {
-        if (value.getType() != DB::Field::Types::Decimal128)
-            throw DB::Exception(
-                DB::ErrorCodes::LOGICAL_ERROR, "Field type '{}' for FIXED_LEN_BYTE_ARRAY is not supported", value.getTypeName());
-        static_assert(sizeof(Int128) <= sizeof(buf));
-        if (descriptor.type_length() > sizeof(Int128))
+        if (value.getType() == DB::Field::Types::Decimal256)
+            throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Field type '{}' is not supported", value.getTypeName());
+
+        static_assert(sizeof(Int128) == sizeof(buf));
+
+        if (descriptor.type_length() > sizeof(buf))
             throw DB::Exception(
                 DB::ErrorCodes::LOGICAL_ERROR,
-                "descriptor.type_length() = {} , which is > {}, e.g. sizeof(Int128)",
+                "descriptor.type_length() = {} , which is > {}, e.g. sizeof(buf)",
                 descriptor.type_length(),
-                sizeof(Int128));
-        Int128 val = value.safeGet<DB::DecimalField<DB::Decimal128>>().getValue();
-        std::reverse(reinterpret_cast<char *>(&val), reinterpret_cast<char *>(&val) + sizeof(val));
-        const int offset = sizeof(Int128) - descriptor.type_length();
-        memcpy(buf, reinterpret_cast<char *>(&val) + offset, descriptor.type_length());
-        return parquet::FixedLenByteArray(buf);
+                sizeof(buf));
+
+        if (value.getType() == DB::Field::Types::Decimal32)
+            return convertField<Int32>(value, buf, descriptor.type_length());
+        if (value.getType() == DB::Field::Types::Decimal64)
+            return convertField<Int64>(value, buf, descriptor.type_length());
+
+        return convertField<Int128>(value, buf, descriptor.type_length());
     }
 };
 
@@ -86,7 +113,7 @@ struct ToParquet<parquet::FLBAType>
 template <typename DType, typename Col>
 struct ConverterNumeric
 {
-    using From = typename Col::Container::value_type;
+    using From = typename Col::ValueType;
     using To = typename DType::c_type;
 
     const Col & column;
@@ -119,6 +146,7 @@ using ConverterInt64 = ConverterNumeric<parquet::Int64Type, DB::ColumnVector<Int
 using ConverterInt64_u = ConverterNumeric<parquet::Int64Type, DB::ColumnVector<UInt64>>;
 
 using ConverterDouble = ConverterNumeric<parquet::DoubleType, DB::ColumnVector<Float64>>;
+using ConverterFloat = ConverterNumeric<parquet::FloatType, DB::ColumnVector<Float32>>;
 
 struct ConverterString
 {
@@ -141,7 +169,7 @@ struct ConverterString
 
 /// Like ConverterNumberAsFixedString, but converts to big-endian. Because that's the byte order
 /// Parquet uses for decimal types and literally nothing else, for some reason.
-template <typename T>
+template <DB::is_decimal T>
 struct ConverterDecimal
 {
     const parquet::ColumnDescriptor & descriptor;
@@ -165,7 +193,7 @@ struct ConverterDecimal
         data_buf.resize(count * sizeof(T));
         ptr_buf.resize(count);
         memcpy(data_buf.data(), reinterpret_cast<const char *>(column.getData().data() + offset), count * sizeof(T));
-        const size_t offset_in_buf = sizeof(Int128) - descriptor.type_length();
+        const size_t offset_in_buf = sizeof(T) - descriptor.type_length();
         ;
         for (size_t i = 0; i < count; ++i)
         {
@@ -176,6 +204,13 @@ struct ConverterDecimal
     }
 };
 
+using Decimal128ToFLB = ConverterDecimal<DB::Decimal128>;
+using Decimal64ToFLB = ConverterDecimal<DB::Decimal64>;
+using Decimal32ToFLB = ConverterDecimal<DB::Decimal32>;
+
+using ConverterDecimal32 = ConverterNumeric<parquet::Int32Type, DB::ColumnDecimal<DB::Decimal32>>;
+using ConverterDecimal64 = ConverterNumeric<parquet::Int64Type, DB::ColumnDecimal<DB::Decimal64>>;
+
 class BaseConverter
 {
 public:
@@ -239,6 +274,8 @@ std::shared_ptr<ParquetConverter<DType>> ParquetConverter<DType>::Make(const DB:
                 case TypeIndex::UInt32:
                     result = std::make_shared<ParquetConverterImpl<parquet::Int32Type, ConverterInt32_u>>(ConverterInt32_u(c));
                     break;
+                case TypeIndex::Decimal32:
+                    result = std::make_shared<ParquetConverterImpl<parquet::Int32Type, ConverterDecimal32>>(ConverterDecimal32(c));
                 default:
                     break;
             }
@@ -251,6 +288,8 @@ std::shared_ptr<ParquetConverter<DType>> ParquetConverter<DType>::Make(const DB:
                 case TypeIndex::UInt64:
                     result = std::make_shared<ParquetConverterImpl<parquet::Int64Type, ConverterInt64_u>>(ConverterInt64_u(c));
                     break;
+                case TypeIndex::Decimal64:
+                    result = std::make_shared<ParquetConverterImpl<parquet::Int64Type, ConverterDecimal64>>(ConverterDecimal64(c));
                 default:
                     break;
             }
@@ -258,6 +297,14 @@ std::shared_ptr<ParquetConverter<DType>> ParquetConverter<DType>::Make(const DB:
         case parquet::Type::INT96:
             break;
         case parquet::Type::FLOAT:
+            switch (c->getDataType())
+            {
+                case TypeIndex::Float32:
+                    result = std::make_shared<ParquetConverterImpl<parquet::FloatType, ConverterFloat>>(ConverterFloat(c));
+                    break;
+                default:
+                    break;
+            }
             break;
         case parquet::Type::DOUBLE:
             switch (c->getDataType())
@@ -283,8 +330,13 @@ std::shared_ptr<ParquetConverter<DType>> ParquetConverter<DType>::Make(const DB:
             switch (c->getDataType())
             {
                 case TypeIndex::Decimal128:
-                    result = std::make_shared<ParquetConverterImpl<parquet::FLBAType, ConverterDecimal<Decimal128>>>(
-                        ConverterDecimal<Decimal128>(c, desc));
+                    result = std::make_shared<ParquetConverterImpl<parquet::FLBAType, Decimal128ToFLB>>(Decimal128ToFLB(c, desc));
+                    break;
+                case TypeIndex::Decimal64:
+                    result = std::make_shared<ParquetConverterImpl<parquet::FLBAType, Decimal64ToFLB>>(Decimal64ToFLB(c, desc));
+                    break;
+                case TypeIndex::Decimal32:
+                    result = std::make_shared<ParquetConverterImpl<parquet::FLBAType, Decimal32ToFLB>>(Decimal32ToFLB(c, desc));
                     break;
                 default:
                     break;
diff --git a/cpp-ch/local-engine/tests/decmial_filter_push_down/18_2.json b/cpp-ch/local-engine/tests/decmial_filter_push_down/18_2.json
new file mode 100644
index 0000000000000..5ad0a62325def
--- /dev/null
+++ b/cpp-ch/local-engine/tests/decmial_filter_push_down/18_2.json
@@ -0,0 +1,160 @@
+{
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "filter": {
+            "common": {
+              "direct": {}
+            },
+            "input": {
+              "read": {
+                "common": {
+                  "direct": {}
+                },
+                "baseSchema": {
+                  "names": [
+                    "a"
+                  ],
+                  "struct": {
+                    "types": [
+                      {
+                        "decimal": {
+                          "scale": 2,
+                          "precision": 18,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      }
+                    ]
+                  },
+                  "columnTypes": [
+                    "NORMAL_COL"
+                  ]
+                },
+                "filter": {
+                  "singularOrList": {
+                    "value": {
+                      "selection": {
+                        "directReference": {
+                          "structField": {}
+                        }
+                      }
+                    },
+                    "options": [
+                      {
+                        "literal": {
+                          "decimal": {
+                            "value": "yAAAAAAAAAAAAAAAAAAAAA==",
+                            "precision": 18,
+                            "scale": 2
+                          }
+                        }
+                      },
+                      {
+                        "literal": {
+                          "decimal": {
+                            "value": "LAEAAAAAAAAAAAAAAAAAAA==",
+                            "precision": 18,
+                            "scale": 2
+                          }
+                        }
+                      },
+                      {
+                        "literal": {
+                          "decimal": {
+                            "value": "kAEAAAAAAAAAAAAAAAAAAA==",
+                            "precision": 18,
+                            "scale": 2
+                          }
+                        }
+                      },
+                      {
+                        "literal": {
+                          "decimal": {
+                            "value": "9AEAAAAAAAAAAAAAAAAAAA==",
+                            "precision": 18,
+                            "scale": 2
+                          }
+                        }
+                      }
+                    ]
+                  }
+                },
+                "advancedExtension": {
+                  "optimization": {
+                    "@type": "type.googleapis.com/google.protobuf.StringValue",
+                    "value": "isMergeTree=0\n"
+                  }
+                }
+              }
+            },
+            "condition": {
+              "singularOrList": {
+                "value": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {}
+                    }
+                  }
+                },
+                "options": [
+                  {
+                    "literal": {
+                      "decimal": {
+                        "value": "yAAAAAAAAAAAAAAAAAAAAA==",
+                        "precision": 18,
+                        "scale": 2
+                      }
+                    }
+                  },
+                  {
+                    "literal": {
+                      "decimal": {
+                        "value": "LAEAAAAAAAAAAAAAAAAAAA==",
+                        "precision": 18,
+                        "scale": 2
+                      }
+                    }
+                  },
+                  {
+                    "literal": {
+                      "decimal": {
+                        "value": "kAEAAAAAAAAAAAAAAAAAAA==",
+                        "precision": 18,
+                        "scale": 2
+                      }
+                    }
+                  },
+                  {
+                    "literal": {
+                      "decimal": {
+                        "value": "9AEAAAAAAAAAAAAAAAAAAA==",
+                        "precision": 18,
+                        "scale": 2
+                      }
+                    }
+                  }
+                ]
+              }
+            }
+          }
+        },
+        "names": [
+          "a#4772"
+        ],
+        "outputSchema": {
+          "types": [
+            {
+              "decimal": {
+                "scale": 2,
+                "precision": 18,
+                "nullability": "NULLABILITY_NULLABLE"
+              }
+            }
+          ],
+          "nullability": "NULLABILITY_REQUIRED"
+        }
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/cpp-ch/local-engine/tests/decmial_filter_push_down/18_2_flba.snappy.parquet b/cpp-ch/local-engine/tests/decmial_filter_push_down/18_2_flba.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ac0b015900dfcdcfd1a11089216a2d5848d12d5b
GIT binary patch
literal 488
zcmZWm%SyvQ6uphL1ks|<88VPX8ML$zhm2{p77^Tut0Jy^R3@2Gi%DCPRFTr1pW+7y
z{zN~*4{_<OR$O=&ckbhyIft3<(WxXrJmQm&_s_RAmlUxr5gUM>SpcxKglQi7=YSd8
zu$Y7U@6dncj982UFqhh-hXr8o9j|61mEK=x(Va;Yv4E7xI<cIB+FC)WP49A^5loBB
zWp2%ONtv}uU^9deix%lQ?bYmqU8w&`&i=$v4Bes-@?Q(2LT|_+Afs5bX|IA_JjWTP
zw?d5+>WgnH;?bm8e>iZK1JM!^oW?&y_7Dqn{V+DUNQX}%F}czzS59xj^XHM_9iHXs
zILf)lZ~7+I8Sh+7_&_D|mBQ%)rA^dNR9u&BZ>NFd!8ndp7$XYl#^b*i^Uh>Cy_$k^
zvXI?Q((1xzRSFphS*?fTeypoHY|3^Z{e9gE_kyO0TKj>o+k1Ybm65yBH2t>Lp=vPK
N#b<pf0Ce$>zW_t}Z9o73

literal 0
HcmV?d00001

diff --git a/cpp-ch/local-engine/tests/gtest_parquet_columnindex_bug.cpp b/cpp-ch/local-engine/tests/gtest_parquet_columnindex_bug.cpp
new file mode 100644
index 0000000000000..ee6e70305b27b
--- /dev/null
+++ b/cpp-ch/local-engine/tests/gtest_parquet_columnindex_bug.cpp
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <gluten_test_util.h>
+#include <incbin.h>
+#include <Core/Settings.h>
+#include <Parser/SerializedPlanParser.h>
+#include <Parser/SubstraitParserUtils.h>
+#include <gtest/gtest.h>
+#include <Common/DebugUtils.h>
+#include <Common/GlutenConfig.h>
+
+
+using namespace local_engine;
+
+using namespace DB;
+
+INCBIN(resource_embedded_pr_18_2_json, SOURCE_DIR "/utils/extern-local-engine/tests/decmial_filter_push_down/18_2.json");
+TEST(ColumnIndex, Deciaml182)
+{
+    // [precision,scale] = [18,2]
+    const auto context1 = DB::Context::createCopy(SerializedPlanParser::global_context);
+
+    auto config = ExecutorConfig::loadFromContext(context1);
+    EXPECT_TRUE(config.use_local_format) << "gtest need set use_local_format to true";
+
+    const std::string split_template
+        = R"({"items":[{"uriFile":"{replace_local_files}","partitionIndex":"0","length":"488","parquet":{},"schema":{},"metadataColumns":[{}]}]})";
+    const std::string split = replaceLocalFilesWildcards(
+        split_template, GLUTEN_DATA_DIR("/utils/extern-local-engine/tests/decmial_filter_push_down/18_2_flba.snappy.parquet"));
+
+    SerializedPlanParser parser(context1);
+    parser.addSplitInfo(local_engine::JsonStringToBinary<substrait::ReadRel::LocalFiles>(split));
+
+    const auto plan = local_engine::JsonStringToMessage<substrait::Plan>(
+        {reinterpret_cast<const char *>(gresource_embedded_pr_18_2_jsonData), gresource_embedded_pr_18_2_jsonSize});
+
+    auto local_executor = parser.createExecutor(plan);
+    EXPECT_TRUE(local_executor->hasNext());
+    const Block & x = *local_executor->nextColumnar();
+    debug::headBlock(x);
+}
\ No newline at end of file
diff --git a/cpp-ch/local-engine/tests/json/gtest_local_engine_config.json b/cpp-ch/local-engine/tests/json/gtest_local_engine_config.json
index 10f0ea3dfdad9..8ada07819bb6c 100644
--- a/cpp-ch/local-engine/tests/json/gtest_local_engine_config.json
+++ b/cpp-ch/local-engine/tests/json/gtest_local_engine_config.json
@@ -260,6 +260,14 @@
               "value": {
                 "string": "false"
               }
+            },
+            {
+              "key": {
+                "string": "spark.gluten.sql.columnar.backend.ch.runtime_config.use_local_format"
+              },
+              "value": {
+                "string": "true"
+              }
             }
           ]
         }
diff --git a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala
index 987635d067be6..dce8ac83710cb 100644
--- a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala
+++ b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala
@@ -30,7 +30,10 @@ abstract class BackendTestSettings {
   private val enabledSuites: java.util.Map[String, SuiteSettings] = new util.HashMap()
 
   protected def enableSuite[T: ClassTag]: SuiteSettings = {
-    val suiteName = implicitly[ClassTag[T]].runtimeClass.getCanonicalName
+    enableSuite(implicitly[ClassTag[T]].runtimeClass.getCanonicalName)
+  }
+
+  protected def enableSuite(suiteName: String): SuiteSettings = {
     if (enabledSuites.containsKey(suiteName)) {
       throw new IllegalArgumentException("Duplicated suite name: " + suiteName)
     }
diff --git a/gluten-ut/spark33/pom.xml b/gluten-ut/spark33/pom.xml
index 9251ebc8ab78b..539f60a63f1be 100644
--- a/gluten-ut/spark33/pom.xml
+++ b/gluten-ut/spark33/pom.xml
@@ -51,6 +51,28 @@
           <scope>test</scope>
         </dependency>
       </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-sources</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>src/test/backends-clickhouse</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
     </profile>
     <profile>
       <id>backends-velox</id>
diff --git a/gluten-ut/spark33/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala b/gluten-ut/spark33/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala
new file mode 100644
index 0000000000000..32c7784cff927
--- /dev/null
+++ b/gluten-ut/spark33/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.execution.parquet
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.execution.datasources.parquet.GlutenParquetV1FilterSuite
+
+/** testing use_local_format parquet reader. */
+class GlutenParquetV1FilterSuite2 extends GlutenParquetV1FilterSuite {
+  override def sparkConf: SparkConf =
+    super.sparkConf
+      .set("spark.gluten.sql.columnar.backend.ch.runtime_config.use_local_format", "true")
+}
diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index c8e162e61d666..660d693cce3ff 100644
--- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -1600,6 +1600,20 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-38825: in and notIn filters")
     .exclude("SPARK-36866: filter pushdown - year-month interval")
     .excludeGlutenTest("SPARK-25207: exception when duplicate fields in case-insensitive mode")
+  enableSuite("org.apache.gluten.execution.parquet.GlutenParquetV1FilterSuite2")
+    .exclude("filter pushdown - date")
+    .exclude("filter pushdown - timestamp")
+    .exclude("Filters should be pushed down for vectorized Parquet reader at row group level")
+    .exclude("SPARK-31026: Parquet predicate pushdown for fields having dots in the names")
+    .exclude("Filters should be pushed down for Parquet readers at row group level")
+    .exclude("filter pushdown - StringStartsWith")
+    .exclude("SPARK-17091: Convert IN predicate to Parquet filter push-down")
+    .exclude("SPARK-25207: exception when duplicate fields in case-insensitive mode")
+    .exclude("Support Parquet column index")
+    .exclude("SPARK-34562: Bloom filter push down")
+    .exclude("SPARK-38825: in and notIn filters")
+    .exclude("SPARK-36866: filter pushdown - year-month interval")
+    .excludeGlutenTest("SPARK-25207: exception when duplicate fields in case-insensitive mode")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
     .exclude("SPARK-7847: Dynamic partition directory path escaping and unescaping")
     .exclude("Various partition value types")
diff --git a/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala b/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
index 43b83afe9af37..4258cd891a5a6 100644
--- a/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
+++ b/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/GlutenColumnarWriteTestSupport.scala
@@ -16,11 +16,12 @@
  */
 package org.apache.gluten
 
-import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.{ColumnarWriteFilesExec, SparkPlan}
 
 trait GlutenColumnarWriteTestSupport {
 
   def checkWriteFilesAndGetChild(sparkPlan: SparkPlan): SparkPlan = {
-    throw new UnsupportedOperationException("Clickhouse Backend does not support write files")
+    assert(sparkPlan.isInstanceOf[ColumnarWriteFilesExec])
+    sparkPlan.asInstanceOf[ColumnarWriteFilesExec].child
   }
 }
diff --git a/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala b/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala
new file mode 100644
index 0000000000000..d20a419597d10
--- /dev/null
+++ b/gluten-ut/spark35/src/test/backends-clickhouse/org/apache/gluten/execution/parquet/GlutenParquetV1FilterSuite2.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.execution.parquet
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.execution.datasources.parquet.GlutenParquetV1FilterSuite
+
+
+/** testing use_local_format parquet reader.
+ * FIXME: Run this suite in Spark 35 CI Pipeline
+ * */
+class GlutenParquetV1FilterSuite2 extends GlutenParquetV1FilterSuite {
+  override def sparkConf: SparkConf =
+    super.sparkConf
+      .set("spark.gluten.sql.columnar.backend.ch.runtime_config.use_local_format", "true")
+}
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 77c12621efebc..bf971aba7282a 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -1438,6 +1438,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-34562: Bloom filter push down")
     .exclude("SPARK-38825: in and notIn filters")
     .exclude("SPARK-36866: filter pushdown - year-month interval")
+    .exclude("filter pushdown - StringContains")
+    .exclude("filter pushdown - StringPredicate")
     .excludeGlutenTest("SPARK-25207: exception when duplicate fields in case-insensitive mode")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
     .exclude("SPARK-7847: Dynamic partition directory path escaping and unescaping")

From a71f35ddacec8fb8354019a24c696cf9c15c519d Mon Sep 17 00:00:00 2001
From: Wenzheng Liu <lwz9103@163.com>
Date: Tue, 20 Aug 2024 13:24:12 +0800
Subject: [PATCH 3/4] [GLUTEN-6915][CH] Follow VL, fix github issue comment
 (#6922)

---
 .github/workflows/clickhouse_be_trigger.yml | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/clickhouse_be_trigger.yml b/.github/workflows/clickhouse_be_trigger.yml
index f19a328adcb34..19f9b55a03031 100644
--- a/.github/workflows/clickhouse_be_trigger.yml
+++ b/.github/workflows/clickhouse_be_trigger.yml
@@ -34,17 +34,21 @@ on:
 jobs:
   add-comment:
     runs-on: ubuntu-latest
+    permissions: write-all
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
       - name: Sleep for Dev PR workflow done
         run: |
           sleep 15
-        
       - name: Add comment to PR
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          COMMENT="Run Gluten Clickhouse CI"
-          URL=$(jq -r .pull_request.comments_url "$GITHUB_EVENT_PATH")
-          curl -H "Authorization: token ${GITHUB_TOKEN}" -X POST -d "{\"body\":\"$COMMENT\"}" "${URL}"
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.payload.number,
+              body: "Run Gluten Clickhouse CI"
+            });

From 4c52bddf9f152a4a95d81cdb4570cbfb4752e5b0 Mon Sep 17 00:00:00 2001
From: Kyligence Git <webmaster@kyligence.io>
Date: Tue, 20 Aug 2024 01:06:31 -0500
Subject: [PATCH 4/4] [GLUTEN-1632][CH]Daily Update Clickhouse Version
 (20240820) (#6929)

Co-authored-by: kyligence-git <gluten@kyligence.io>
---
 cpp-ch/clickhouse.version | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index d41875c54d7dc..b88675c4a96d6 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
 CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20240817
-CH_COMMIT=ed191291681
+CH_BRANCH=rebase_ch/20240820
+CH_COMMIT=b5b8245b022