From d448e59e0430e76166bd630d533274108d5f1f1b Mon Sep 17 00:00:00 2001 From: Wenzheng Liu Date: Thu, 4 Jan 2024 20:01:18 +0800 Subject: [PATCH] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240104) (#4272) * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240104) * fix build due to https://github.com/ClickHouse/ClickHouse/pull/57053 * fix test due to https://github.com/oap-project/gluten/pull/4231 * fix ut due to https://github.com/ClickHouse/ClickHouse/pull/58440 don't know the reason, let's relax double equality check --------- Co-authored-by: kyligence-git Co-authored-by: Chang Chen --- .../GlutenClickHouseTPCDSParquetAQESuite.scala | 2 +- ...seTPCDSParquetColumnarShuffleAQESuite.scala | 2 +- ...HouseTPCDSParquetColumnarShuffleSuite.scala | 2 +- .../GlutenClickHouseTPCDSParquetSuite.scala | 2 +- ...nClickHouseWholeStageTransformerSuite.scala | 18 +++++++++++++++++- cpp-ch/clickhouse.version | 4 ++-- .../OptimizedParquetBlockInputFormat.cpp | 4 ++-- .../OptimizedParquetBlockInputFormat.h | 2 +- cpp-ch/local-engine/tests/gtest_ch_join.cpp | 1 + 9 files changed, 27 insertions(+), 10 deletions(-) diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetAQESuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetAQESuite.scala index 6fd8a4613088..4881dd15cef2 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetAQESuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetAQESuite.scala @@ -70,7 +70,7 @@ class GlutenClickHouseTPCDSParquetAQESuite | where ss_quantity between 1 and 20 | and ss_sold_date_sk = 2452635 |""".stripMargin) { _ => } - assert(result(0).getDouble(0) == 379.21313271604936) + AlmostEqualsIsRel(379.21313271604936, result.head.getDouble(0), DBL_RELAX_EPSILON) } test("test select avg(int), avg(long)") { diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala index 5c33edf8f2fb..f1801f25cdc6 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala @@ -63,7 +63,7 @@ class GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite | where ss_quantity between 1 and 20 | and ss_sold_date_sk = 2452635 |""".stripMargin) { _ => } - assert(result(0).getDouble(0) == 379.21313271604936) + AlmostEqualsIsRel(379.21313271604936, result.head.getDouble(0), DBL_RELAX_EPSILON) } test("test select avg(int), avg(long)") { diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetColumnarShuffleSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetColumnarShuffleSuite.scala index 0aeef7ee7c69..11c51aaaa8e5 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetColumnarShuffleSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetColumnarShuffleSuite.scala @@ -60,7 +60,7 @@ class GlutenClickHouseTPCDSParquetColumnarShuffleSuite extends GlutenClickHouseT | where ss_quantity between 1 and 20 | and ss_sold_date_sk = 2452635 |""".stripMargin) { _ => } - assert(result(0).getDouble(0) == 379.21313271604936) + AlmostEqualsIsRel(379.21313271604936, result.head.getDouble(0), DBL_RELAX_EPSILON) } test("test select avg(int), avg(long)") { diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetSuite.scala index 062d518f3d81..7f52a5ccb357 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCDSParquetSuite.scala @@ -76,7 +76,7 @@ class GlutenClickHouseTPCDSParquetSuite extends GlutenClickHouseTPCDSAbstractSui | where ss_quantity between 1 and 20 | and ss_sold_date_sk = 2452635 |""".stripMargin) { _ => } - assert(result(0).getDouble(0) == 379.21313271604936) + AlmostEqualsIsRel(379.21313271604936, result.head.getDouble(0), DBL_RELAX_EPSILON) } test("test select avg(int), avg(long)") { diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseWholeStageTransformerSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseWholeStageTransformerSuite.scala index 39df9da0f9fa..89bde0dff6fe 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseWholeStageTransformerSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseWholeStageTransformerSuite.scala @@ -16,4 +16,20 @@ */ package io.glutenproject.execution -abstract class GlutenClickHouseWholeStageTransformerSuite extends WholeStageTransformerSuite {} +abstract class GlutenClickHouseWholeStageTransformerSuite extends WholeStageTransformerSuite { + + val DBL_EPSILON = 2.2204460492503131e-16 + val DBL_RELAX_EPSILON: Double = Math.pow(10, -11) + val FLT_EPSILON = 1.19209290e-07f + def AlmostEqualsIsRel(expected: Double, actual: Double, EPSILON: Double = DBL_EPSILON): Unit = { + val diff = Math.abs(expected - actual) + val epsilon = EPSILON * Math.max(Math.abs(expected), Math.abs(actual)) + if (diff > epsilon) { + fail(s""" + |expected: $expected + |actual: $actual + | abs(expected-expected) ~ epsilon = $diff ~ $epsilon + |""".stripMargin) + } + } +} diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version index 11ec0bc80d03..9ce2a9917de8 100644 --- a/cpp-ch/clickhouse.version +++ b/cpp-ch/clickhouse.version @@ -1,3 +1,3 @@ CH_ORG=Kyligence -CH_BRANCH=rebase_ch/20240103 -CH_COMMIT=9c039962813 +CH_BRANCH=rebase_ch/20240104 +CH_COMMIT=7ed09d2c7ac diff --git a/cpp-ch/local-engine/Storages/ch_parquet/OptimizedParquetBlockInputFormat.cpp b/cpp-ch/local-engine/Storages/ch_parquet/OptimizedParquetBlockInputFormat.cpp index e5780e7e0d44..f47ffad286b1 100644 --- a/cpp-ch/local-engine/Storages/ch_parquet/OptimizedParquetBlockInputFormat.cpp +++ b/cpp-ch/local-engine/Storages/ch_parquet/OptimizedParquetBlockInputFormat.cpp @@ -45,7 +45,7 @@ OptimizedParquetBlockInputFormat::OptimizedParquetBlockInputFormat(ReadBuffer & { } -Chunk OptimizedParquetBlockInputFormat::generate() +Chunk OptimizedParquetBlockInputFormat::read() { Chunk res; block_missing_values.clear(); @@ -62,7 +62,7 @@ Chunk OptimizedParquetBlockInputFormat::generate() std::shared_ptr table; arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table); if (!read_status.ok()) - throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", read_status.ToString()); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", read_status.ToString()); if (format_settings.use_lowercase_column_name) table = *table->RenameColumns(column_names); diff --git a/cpp-ch/local-engine/Storages/ch_parquet/OptimizedParquetBlockInputFormat.h b/cpp-ch/local-engine/Storages/ch_parquet/OptimizedParquetBlockInputFormat.h index 2e1ff8578f0d..6b55cc24f03f 100644 --- a/cpp-ch/local-engine/Storages/ch_parquet/OptimizedParquetBlockInputFormat.h +++ b/cpp-ch/local-engine/Storages/ch_parquet/OptimizedParquetBlockInputFormat.h @@ -48,7 +48,7 @@ class OptimizedParquetBlockInputFormat : public IInputFormat const BlockMissingValues & getMissingValues() const override; private: - Chunk generate() override; + Chunk read() override; protected: void prepareReader(); diff --git a/cpp-ch/local-engine/tests/gtest_ch_join.cpp b/cpp-ch/local-engine/tests/gtest_ch_join.cpp index 9dbd0202aa07..3e8e599c9eb3 100644 --- a/cpp-ch/local-engine/tests/gtest_ch_join.cpp +++ b/cpp-ch/local-engine/tests/gtest_ch_join.cpp @@ -199,6 +199,7 @@ TEST(TestJoin, StorageJoinFromReadBufferTest) auto table_join = std::make_shared(SizeLimits(), false, JoinKind::Left, JoinStrictness::All, cols); auto join_storage = std::shared_ptr(new StorageJoinFromReadBuffer( // NOLINT *in, + 2048, // Even if you don't know the number of rows, passing an arbitrary value is fine cols, false, table_join,