[GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608) (#6023)

* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608) * Fix Build due to ClickHouse/ClickHouse#64798 * Fix UT --------- Co-authored-by: kyligence-git <[email protected]> Co-authored-by: Chang Chen <[email protected]>
apache · Jun 8, 2024 · 31c384f · 31c384f
1 parent 85e8619
commit 31c384f
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 61 deletions.
diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
 CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20240606
-CH_COMMIT=fed1c01e169
+CH_BRANCH=rebase_ch/20240608
+CH_COMMIT=b5050282335
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionFloor.h b/cpp-ch/local-engine/Functions/SparkFunctionFloor.h
@@ -14,13 +14,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <Functions/FunctionsRound.h>
-#include <Functions/FunctionFactory.h>
+#pragma once
+
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnVector.h>
-#include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypesNumber.h>
-#include <bit>
+#include <DataTypes/IDataType.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsRound.h>
 
 using namespace DB;
 
@@ -130,20 +131,29 @@ struct SparkFloatFloorImpl
 {
 private:
     static_assert(!is_decimal<T>);
-    using Op = FloatRoundingComputation<T, RoundingMode::Floor, scale_mode>;
-    using Data = std::array<T, Op::data_count>;
+    template <
+        Vectorize vectorize =
+#ifdef __SSE4_1__
+            Vectorize::Yes
+#else
+            Vectorize::No
+#endif
+        >
+    using Op = FloatRoundingComputation<T, RoundingMode::Floor, scale_mode, vectorize>;
+    using Data = std::array<T, Op<>::data_count>;
+
 public:
     static void apply(const PaddedPODArray<T> & in, size_t scale, PaddedPODArray<T> & out, PaddedPODArray<UInt8> & null_map)
     {
-        auto mm_scale = Op::prepare(scale);
+        auto mm_scale = Op<>::prepare(scale);
         const size_t data_count = std::tuple_size<Data>();
-        const T* end_in = in.data() + in.size();
-        const T* limit = in.data() + in.size() / data_count * data_count;
-        const T* __restrict p_in = in.data();
-        T* __restrict p_out = out.data();
+        const T * end_in = in.data() + in.size();
+        const T * limit = in.data() + in.size() / data_count * data_count;
+        const T * __restrict p_in = in.data();
+        T * __restrict p_out = out.data();
         while (p_in < limit)
         {
-            Op::compute(p_in, mm_scale, p_out);
+            Op<>::compute(p_in, mm_scale, p_out);
             p_in += data_count;
             p_out += data_count;
         }
@@ -154,7 +164,7 @@ struct SparkFloatFloorImpl
             Data tmp_dst;
             size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in);
             memcpy(&tmp_src, p_in, tail_size_bytes);
-            Op::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, reinterpret_cast<T *>(&tmp_dst));
+            Op<>::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, reinterpret_cast<T *>(&tmp_dst));
             memcpy(p_out, &tmp_dst, tail_size_bytes);
         }
 
@@ -171,29 +181,52 @@ struct SparkFloatFloorImpl
                 checkAndSetNullable(out[i], null_map[i]);
         }
     }
-
 };
 
 class SparkFunctionFloor : public DB::FunctionFloor
 {
+    static Scale getScaleArg(const ColumnsWithTypeAndName & arguments)
+    {
+        if (arguments.size() == 2)
+        {
+            const IColumn & scale_column = *arguments[1].column;
+            if (!isColumnConst(scale_column))
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must be constant");
+
+            Field scale_field = assert_cast<const ColumnConst &>(scale_column).getField();
+            if (scale_field.getType() != Field::Types::UInt64 && scale_field.getType() != Field::Types::Int64)
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must have integer type");
+
+            Int64 scale64 = scale_field.get<Int64>();
+            if (scale64 > std::numeric_limits<Scale>::max() || scale64 < std::numeric_limits<Scale>::min())
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale argument for rounding function is too large");
+
+            return scale64;
+        }
+        return 0;
+    }
+
 public:
     static constexpr auto name = "sparkFloor";
     static DB::FunctionPtr create(DB::ContextPtr) { return std::make_shared<SparkFunctionFloor>(); }
     SparkFunctionFloor() = default;
     ~SparkFunctionFloor() override = default;
     String getName() const override { return name; }
 
-    DB::DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const override
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+
+    DB::DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
         auto result_type = DB::FunctionFloor::getReturnTypeImpl(arguments);
         return makeNullable(result_type);
     }
 
-    DB::ColumnPtr executeImpl(const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr & result_type, size_t input_rows) const override
+    DB::ColumnPtr
+    executeImpl(const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr & result_type, size_t input_rows) const override
     {
         const ColumnWithTypeAndName & first_arg = arguments[0];
         Scale scale_arg = getScaleArg(arguments);
-        switch(first_arg.type->getTypeId())
+        switch (first_arg.type->getTypeId())
         {
             case TypeIndex::Float32:
                 return executeInternal<Float32>(first_arg.column, scale_arg);
@@ -206,7 +239,7 @@ class SparkFunctionFloor : public DB::FunctionFloor
         }
     }
 
-    template<typename T>
+    template <typename T>
     static ColumnPtr executeInternal(const ColumnPtr & col_arg, const Scale & scale_arg)
     {
         const auto * col = checkAndGetColumn<ColumnVector<T>>(col_arg.get());

diff --git a/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h b/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h
@@ -18,6 +18,11 @@
 
 #include <Functions/FunctionsRound.h>
 
+namespace DB::ErrorCodes
+{
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
 namespace local_engine
 {
 using namespace DB;
@@ -35,10 +40,11 @@ class BaseFloatRoundingHalfUpComputation<Float32>
 
     static VectorType load(const ScalarType * in) { return _mm_loadu_ps(in); }
     static VectorType load1(const ScalarType in) { return _mm_load1_ps(&in); }
-    static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out, val);}
+    static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out, val); }
     static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_ps(val, scale); }
     static VectorType divide(VectorType val, VectorType scale) { return _mm_div_ps(val, scale); }
-    template <RoundingMode mode> static VectorType apply(VectorType val)
+    template <RoundingMode mode>
+    static VectorType apply(VectorType val)
     {
         ScalarType tempFloatsIn[data_count];
         ScalarType tempFloatsOut[data_count];
@@ -49,10 +55,7 @@ class BaseFloatRoundingHalfUpComputation<Float32>
         return load(tempFloatsOut);
     }
 
-    static VectorType prepare(size_t scale)
-    {
-        return load1(scale);
-    }
+    static VectorType prepare(size_t scale) { return load1(scale); }
 };
 
 template <>
@@ -65,10 +68,11 @@ class BaseFloatRoundingHalfUpComputation<Float64>
 
     static VectorType load(const ScalarType * in) { return _mm_loadu_pd(in); }
     static VectorType load1(const ScalarType in) { return _mm_load1_pd(&in); }
-    static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out, val);}
+    static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out, val); }
     static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_pd(val, scale); }
     static VectorType divide(VectorType val, VectorType scale) { return _mm_div_pd(val, scale); }
-    template <RoundingMode mode> static VectorType apply(VectorType val)
+    template <RoundingMode mode>
+    static VectorType apply(VectorType val)
     {
         ScalarType tempFloatsIn[data_count];
         ScalarType tempFloatsOut[data_count];
@@ -79,10 +83,7 @@ class BaseFloatRoundingHalfUpComputation<Float64>
         return load(tempFloatsOut);
     }
 
-    static VectorType prepare(size_t scale)
-    {
-        return load1(scale);
-    }
+    static VectorType prepare(size_t scale) { return load1(scale); }
 };
 
 
@@ -135,11 +136,11 @@ struct FloatRoundingHalfUpImpl
 
         const size_t data_count = std::tuple_size<Data>();
 
-        const T* end_in = in.data() + in.size();
-        const T* limit = in.data() + in.size() / data_count * data_count;
+        const T * end_in = in.data() + in.size();
+        const T * limit = in.data() + in.size() / data_count * data_count;
 
-        const T* __restrict p_in = in.data();
-        T* __restrict p_out = out.data();
+        const T * __restrict p_in = in.data();
+        T * __restrict p_out = out.data();
 
         while (p_in < limit)
         {
@@ -169,9 +170,10 @@ template <typename T, RoundingMode rounding_mode, TieBreakingMode tie_breaking_m
 struct DispatcherRoundingHalfUp
 {
     template <ScaleMode scale_mode>
-    using FunctionRoundingImpl = std::conditional_t<std::is_floating_point_v<T>,
-                                                    FloatRoundingHalfUpImpl<T, rounding_mode, scale_mode>,
-                                                    IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;
+    using FunctionRoundingImpl = std::conditional_t<
+        std::is_floating_point_v<T>,
+        FloatRoundingHalfUpImpl<T, rounding_mode, scale_mode>,
+        IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;
 
     static ColumnPtr apply(const IColumn * col_general, Scale scale_arg)
     {
@@ -233,10 +235,7 @@ class FunctionRoundingHalfUp : public IFunction
     static constexpr auto name = "roundHalfUp";
     static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionRoundingHalfUp>(); }
 
-    String getName() const override
-    {
-        return name;
-    }
+    String getName() const override { return name; }
 
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
@@ -246,14 +245,16 @@ class FunctionRoundingHalfUp : public IFunction
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         if ((arguments.empty()) || (arguments.size() > 2))
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
-                            getName(), arguments.size());
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
+                getName(),
+                arguments.size());
 
         for (const auto & type : arguments)
             if (!isNumber(type))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
-                                arguments[0]->getName(), getName());
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());
 
         return arguments[0];
     }
@@ -267,13 +268,11 @@ class FunctionRoundingHalfUp : public IFunction
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must be constant");
 
             Field scale_field = assert_cast<const ColumnConst &>(scale_column).getField();
-            if (scale_field.getType() != Field::Types::UInt64
-                && scale_field.getType() != Field::Types::Int64)
+            if (scale_field.getType() != Field::Types::UInt64 && scale_field.getType() != Field::Types::Int64)
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must have integer type");
 
             Int64 scale64 = scale_field.get<Int64>();
-            if (scale64 > std::numeric_limits<Scale>::max()
-                || scale64 < std::numeric_limits<Scale>::min())
+            if (scale64 > std::numeric_limits<Scale>::max() || scale64 < std::numeric_limits<Scale>::min())
                 throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale argument for rounding function is too large");
 
             return scale64;
@@ -305,26 +304,24 @@ class FunctionRoundingHalfUp : public IFunction
         };
 
         if (!callOnIndexAndDataType<void>(column.type->getTypeId(), call))
-        {
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column.name, getName());
-        }
 
         return res;
     }
 
-    bool hasInformationAboutMonotonicity() const override
-    {
-        return true;
-    }
+    bool hasInformationAboutMonotonicity() const override { return true; }
 
     Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
     {
-        return { .is_monotonic = true, .is_always_monotonic = true };
+        return {.is_monotonic = true, .is_always_monotonic = true};
     }
 };
 
 
-struct NameRoundHalfUp { static constexpr auto name = "roundHalfUp"; };
+struct NameRoundHalfUp
+{
+    static constexpr auto name = "roundHalfUp";
+};
 
 using FunctionRoundHalfUp = FunctionRoundingHalfUp<NameRoundHalfUp, RoundingMode::Round, TieBreakingMode::Auto>;
 

diff --git a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
@@ -604,7 +604,7 @@ TEST(ColumnIndex, DecimalField)
     ASSERT_EQ(actual, expected);
 
 
-    /// Eexception test, only in relase release node
+    /// Exception test, only in release node
 #ifdef NDEBUG
     Field unsupport = DecimalField<Decimal256>(Int256(300000000), 4);
     EXPECT_THROW(to_parquet.as(unsupport, desc), DB::Exception);