Skip to content

Commit

Permalink
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608) (#6023)
Browse files Browse the repository at this point in the history
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608)

* Fix Build due to ClickHouse/ClickHouse#64798

* Fix UT

---------

Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
  • Loading branch information
3 people authored Jun 8, 2024
1 parent 85e8619 commit 31c384f
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 61 deletions.
4 changes: 2 additions & 2 deletions cpp-ch/clickhouse.version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
CH_BRANCH=rebase_ch/20240606
CH_COMMIT=fed1c01e169
CH_BRANCH=rebase_ch/20240608
CH_COMMIT=b5050282335
69 changes: 51 additions & 18 deletions cpp-ch/local-engine/Functions/SparkFunctionFloor.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <Functions/FunctionsRound.h>
#include <Functions/FunctionFactory.h>
#pragma once

#include <Columns/ColumnNullable.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypesNumber.h>
#include <bit>
#include <DataTypes/IDataType.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsRound.h>

using namespace DB;

Expand Down Expand Up @@ -130,20 +131,29 @@ struct SparkFloatFloorImpl
{
private:
static_assert(!is_decimal<T>);
using Op = FloatRoundingComputation<T, RoundingMode::Floor, scale_mode>;
using Data = std::array<T, Op::data_count>;
template <
Vectorize vectorize =
#ifdef __SSE4_1__
Vectorize::Yes
#else
Vectorize::No
#endif
>
using Op = FloatRoundingComputation<T, RoundingMode::Floor, scale_mode, vectorize>;
using Data = std::array<T, Op<>::data_count>;

public:
static void apply(const PaddedPODArray<T> & in, size_t scale, PaddedPODArray<T> & out, PaddedPODArray<UInt8> & null_map)
{
auto mm_scale = Op::prepare(scale);
auto mm_scale = Op<>::prepare(scale);
const size_t data_count = std::tuple_size<Data>();
const T* end_in = in.data() + in.size();
const T* limit = in.data() + in.size() / data_count * data_count;
const T* __restrict p_in = in.data();
T* __restrict p_out = out.data();
const T * end_in = in.data() + in.size();
const T * limit = in.data() + in.size() / data_count * data_count;
const T * __restrict p_in = in.data();
T * __restrict p_out = out.data();
while (p_in < limit)
{
Op::compute(p_in, mm_scale, p_out);
Op<>::compute(p_in, mm_scale, p_out);
p_in += data_count;
p_out += data_count;
}
Expand All @@ -154,7 +164,7 @@ struct SparkFloatFloorImpl
Data tmp_dst;
size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in);
memcpy(&tmp_src, p_in, tail_size_bytes);
Op::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, reinterpret_cast<T *>(&tmp_dst));
Op<>::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, reinterpret_cast<T *>(&tmp_dst));
memcpy(p_out, &tmp_dst, tail_size_bytes);
}

Expand All @@ -171,29 +181,52 @@ struct SparkFloatFloorImpl
checkAndSetNullable(out[i], null_map[i]);
}
}

};

class SparkFunctionFloor : public DB::FunctionFloor
{
static Scale getScaleArg(const ColumnsWithTypeAndName & arguments)
{
if (arguments.size() == 2)
{
const IColumn & scale_column = *arguments[1].column;
if (!isColumnConst(scale_column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must be constant");

Field scale_field = assert_cast<const ColumnConst &>(scale_column).getField();
if (scale_field.getType() != Field::Types::UInt64 && scale_field.getType() != Field::Types::Int64)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must have integer type");

Int64 scale64 = scale_field.get<Int64>();
if (scale64 > std::numeric_limits<Scale>::max() || scale64 < std::numeric_limits<Scale>::min())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale argument for rounding function is too large");

return scale64;
}
return 0;
}

public:
static constexpr auto name = "sparkFloor";
static DB::FunctionPtr create(DB::ContextPtr) { return std::make_shared<SparkFunctionFloor>(); }
SparkFunctionFloor() = default;
~SparkFunctionFloor() override = default;
String getName() const override { return name; }

DB::DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const override
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }

DB::DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
auto result_type = DB::FunctionFloor::getReturnTypeImpl(arguments);
return makeNullable(result_type);
}

DB::ColumnPtr executeImpl(const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr & result_type, size_t input_rows) const override
DB::ColumnPtr
executeImpl(const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr & result_type, size_t input_rows) const override
{
const ColumnWithTypeAndName & first_arg = arguments[0];
Scale scale_arg = getScaleArg(arguments);
switch(first_arg.type->getTypeId())
switch (first_arg.type->getTypeId())
{
case TypeIndex::Float32:
return executeInternal<Float32>(first_arg.column, scale_arg);
Expand All @@ -206,7 +239,7 @@ class SparkFunctionFloor : public DB::FunctionFloor
}
}

template<typename T>
template <typename T>
static ColumnPtr executeInternal(const ColumnPtr & col_arg, const Scale & scale_arg)
{
const auto * col = checkAndGetColumn<ColumnVector<T>>(col_arg.get());
Expand Down
77 changes: 37 additions & 40 deletions cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@

#include <Functions/FunctionsRound.h>

namespace DB::ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}

namespace local_engine
{
using namespace DB;
Expand All @@ -35,10 +40,11 @@ class BaseFloatRoundingHalfUpComputation<Float32>

static VectorType load(const ScalarType * in) { return _mm_loadu_ps(in); }
static VectorType load1(const ScalarType in) { return _mm_load1_ps(&in); }
static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out, val);}
static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out, val); }
static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_ps(val, scale); }
static VectorType divide(VectorType val, VectorType scale) { return _mm_div_ps(val, scale); }
template <RoundingMode mode> static VectorType apply(VectorType val)
template <RoundingMode mode>
static VectorType apply(VectorType val)
{
ScalarType tempFloatsIn[data_count];
ScalarType tempFloatsOut[data_count];
Expand All @@ -49,10 +55,7 @@ class BaseFloatRoundingHalfUpComputation<Float32>
return load(tempFloatsOut);
}

static VectorType prepare(size_t scale)
{
return load1(scale);
}
static VectorType prepare(size_t scale) { return load1(scale); }
};

template <>
Expand All @@ -65,10 +68,11 @@ class BaseFloatRoundingHalfUpComputation<Float64>

static VectorType load(const ScalarType * in) { return _mm_loadu_pd(in); }
static VectorType load1(const ScalarType in) { return _mm_load1_pd(&in); }
static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out, val);}
static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out, val); }
static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_pd(val, scale); }
static VectorType divide(VectorType val, VectorType scale) { return _mm_div_pd(val, scale); }
template <RoundingMode mode> static VectorType apply(VectorType val)
template <RoundingMode mode>
static VectorType apply(VectorType val)
{
ScalarType tempFloatsIn[data_count];
ScalarType tempFloatsOut[data_count];
Expand All @@ -79,10 +83,7 @@ class BaseFloatRoundingHalfUpComputation<Float64>
return load(tempFloatsOut);
}

static VectorType prepare(size_t scale)
{
return load1(scale);
}
static VectorType prepare(size_t scale) { return load1(scale); }
};


Expand Down Expand Up @@ -135,11 +136,11 @@ struct FloatRoundingHalfUpImpl

const size_t data_count = std::tuple_size<Data>();

const T* end_in = in.data() + in.size();
const T* limit = in.data() + in.size() / data_count * data_count;
const T * end_in = in.data() + in.size();
const T * limit = in.data() + in.size() / data_count * data_count;

const T* __restrict p_in = in.data();
T* __restrict p_out = out.data();
const T * __restrict p_in = in.data();
T * __restrict p_out = out.data();

while (p_in < limit)
{
Expand Down Expand Up @@ -169,9 +170,10 @@ template <typename T, RoundingMode rounding_mode, TieBreakingMode tie_breaking_m
struct DispatcherRoundingHalfUp
{
template <ScaleMode scale_mode>
using FunctionRoundingImpl = std::conditional_t<std::is_floating_point_v<T>,
FloatRoundingHalfUpImpl<T, rounding_mode, scale_mode>,
IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;
using FunctionRoundingImpl = std::conditional_t<
std::is_floating_point_v<T>,
FloatRoundingHalfUpImpl<T, rounding_mode, scale_mode>,
IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;

static ColumnPtr apply(const IColumn * col_general, Scale scale_arg)
{
Expand Down Expand Up @@ -233,10 +235,7 @@ class FunctionRoundingHalfUp : public IFunction
static constexpr auto name = "roundHalfUp";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionRoundingHalfUp>(); }

String getName() const override
{
return name;
}
String getName() const override { return name; }

bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
Expand All @@ -246,14 +245,16 @@ class FunctionRoundingHalfUp : public IFunction
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if ((arguments.empty()) || (arguments.size() > 2))
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
getName(), arguments.size());
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
getName(),
arguments.size());

for (const auto & type : arguments)
if (!isNumber(type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
arguments[0]->getName(), getName());
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());

return arguments[0];
}
Expand All @@ -267,13 +268,11 @@ class FunctionRoundingHalfUp : public IFunction
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must be constant");

Field scale_field = assert_cast<const ColumnConst &>(scale_column).getField();
if (scale_field.getType() != Field::Types::UInt64
&& scale_field.getType() != Field::Types::Int64)
if (scale_field.getType() != Field::Types::UInt64 && scale_field.getType() != Field::Types::Int64)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must have integer type");

Int64 scale64 = scale_field.get<Int64>();
if (scale64 > std::numeric_limits<Scale>::max()
|| scale64 < std::numeric_limits<Scale>::min())
if (scale64 > std::numeric_limits<Scale>::max() || scale64 < std::numeric_limits<Scale>::min())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale argument for rounding function is too large");

return scale64;
Expand Down Expand Up @@ -305,26 +304,24 @@ class FunctionRoundingHalfUp : public IFunction
};

if (!callOnIndexAndDataType<void>(column.type->getTypeId(), call))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column.name, getName());
}

return res;
}

bool hasInformationAboutMonotonicity() const override
{
return true;
}
bool hasInformationAboutMonotonicity() const override { return true; }

Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
{
return { .is_monotonic = true, .is_always_monotonic = true };
return {.is_monotonic = true, .is_always_monotonic = true};
}
};


struct NameRoundHalfUp { static constexpr auto name = "roundHalfUp"; };
struct NameRoundHalfUp
{
static constexpr auto name = "roundHalfUp";
};

using FunctionRoundHalfUp = FunctionRoundingHalfUp<NameRoundHalfUp, RoundingMode::Round, TieBreakingMode::Auto>;

Expand Down
2 changes: 1 addition & 1 deletion cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ TEST(ColumnIndex, DecimalField)
ASSERT_EQ(actual, expected);


/// Eexception test, only in relase release node
/// Exception test, only in release node
#ifdef NDEBUG
Field unsupport = DecimalField<Decimal256>(Int256(300000000), 4);
EXPECT_THROW(to_parquet.as(unsupport, desc), DB::Exception);
Expand Down

0 comments on commit 31c384f

Please sign in to comment.