From 0cb2db3b7d9357a7581298274408b7604bd42ffc Mon Sep 17 00:00:00 2001 From: Kyligence Git Date: Sat, 6 Jul 2024 04:25:00 -0500 Subject: [PATCH] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240706) (#6359) * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240706) * Fix build due to https://github.com/ClickHouse/ClickHouse/pull/63636 * Revert "[GLUTEN-1632][CH]Daily Update Clickhouse Version (20240705) (#6338)" This reverts commit 4a674e5e8ab757b7699f8bc75377e67fe793ed17. * exclude"shift left", "shift right","shift right unsigned" due to https://github.com/ClickHouse/ClickHouse/pull/65838 --------- Co-authored-by: kyligence-git Co-authored-by: Chang Chen --- cpp-ch/clickhouse.version | 4 ++-- .../Operator/DefaultHashAggregateResult.cpp | 6 +++--- .../Mergetree/SparkMergeTreeWriter.cpp | 7 ++++--- .../Storages/Mergetree/SparkMergeTreeWriter.h | 2 +- .../Storages/SourceFromJavaIter.cpp | 4 ++-- .../SubstraitSource/ReadBufferBuilder.cpp | 19 +++++++++++++++++-- cpp-ch/local-engine/tests/gtest_parser.cpp | 3 +-- .../clickhouse/ClickHouseTestSettings.scala | 3 +++ .../clickhouse/ClickHouseTestSettings.scala | 3 +++ .../clickhouse/ClickHouseTestSettings.scala | 3 +++ .../clickhouse/ClickHouseTestSettings.scala | 3 +++ 11 files changed, 42 insertions(+), 15 deletions(-) diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version index 92bf886e9b85..6bba1b705e70 100644 --- a/cpp-ch/clickhouse.version +++ b/cpp-ch/clickhouse.version @@ -1,4 +1,4 @@ CH_ORG=Kyligence -CH_BRANCH=rebase_ch/20240705 -CH_COMMIT=531a87ed802 +CH_BRANCH=rebase_ch/20240706 +CH_COMMIT=25bf31bfbdf diff --git a/cpp-ch/local-engine/Operator/DefaultHashAggregateResult.cpp b/cpp-ch/local-engine/Operator/DefaultHashAggregateResult.cpp index fbad02fda592..35f891581595 100644 --- a/cpp-ch/local-engine/Operator/DefaultHashAggregateResult.cpp +++ b/cpp-ch/local-engine/Operator/DefaultHashAggregateResult.cpp @@ -116,7 +116,7 @@ class DefaultHashAggrgateResultTransform : public DB::IProcessor has_input = true; output_chunk = DB::Chunk(result_cols, 1); auto info = std::make_shared(); - output_chunk.getChunkInfos().add(std::move(info)); + output_chunk.setChunkInfo(info); return Status::Ready; } @@ -124,10 +124,10 @@ class DefaultHashAggrgateResultTransform : public DB::IProcessor if (input.hasData()) { output_chunk = input.pull(true); - if (output_chunk.getChunkInfos().empty()) + if (!output_chunk.hasChunkInfo()) { auto info = std::make_shared(); - output_chunk.getChunkInfos().add(std::move(info)); + output_chunk.setChunkInfo(info); } has_input = true; return Status::Ready; diff --git a/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.cpp b/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.cpp index 2f673fc386e8..406f2aaa23df 100644 --- a/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.cpp +++ b/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.cpp @@ -121,11 +121,12 @@ void SparkMergeTreeWriter::write(const DB::Block & block) checkAndMerge(); } -bool SparkMergeTreeWriter::chunkToPart(Chunk && plan_chunk) +bool SparkMergeTreeWriter::chunkToPart(Chunk && chunk) { - if (Chunk result_chunk = DB::Squashing::squash(std::move(plan_chunk))) + if (chunk.hasChunkInfo()) { - auto result = squashing->getHeader().cloneWithColumns(result_chunk.detachColumns()); + Chunk squash_chunk = DB::Squashing::squash(std::move(chunk)); + Block result = header.cloneWithColumns(squash_chunk.getColumns()); return blockToPart(result); } return false; diff --git a/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.h b/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.h index 269b0352c056..13ac22394477 100644 --- a/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.h +++ b/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.h @@ -77,7 +77,7 @@ class SparkMergeTreeWriter void saveMetadata(); void commitPartToRemoteStorageIfNeeded(); void finalizeMerge(); - bool chunkToPart(Chunk && plan_chunk); + bool chunkToPart(Chunk && chunk); bool blockToPart(Block & block); bool useLocalStorage() const; diff --git a/cpp-ch/local-engine/Storages/SourceFromJavaIter.cpp b/cpp-ch/local-engine/Storages/SourceFromJavaIter.cpp index 1c5902c8ca67..37501e98504a 100644 --- a/cpp-ch/local-engine/Storages/SourceFromJavaIter.cpp +++ b/cpp-ch/local-engine/Storages/SourceFromJavaIter.cpp @@ -109,13 +109,13 @@ DB::Chunk SourceFromJavaIter::generate() auto info = std::make_shared(); info->is_overflows = data->info.is_overflows; info->bucket_num = data->info.bucket_num; - result.getChunkInfos().add(std::move(info)); + result.setChunkInfo(info); } else { result = BlockUtil::buildRowCountChunk(rows); auto info = std::make_shared(); - result.getChunkInfos().add(std::move(info)); + result.setChunkInfo(info); } } return result; diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp index ec967a869600..7cafee8fe526 100644 --- a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp +++ b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -52,6 +51,10 @@ #include #include +#if USE_AZURE_BLOB_STORAGE +#include +#endif + #if USE_AWS_S3 #include #include @@ -687,7 +690,19 @@ class AzureBlobReadBuffer : public ReadBufferBuilder { if (shared_client) return shared_client; - shared_client = DB::getAzureBlobContainerClient(context->getConfigRef(), "blob"); + + const std::string config_prefix = "blob"; + const Poco::Util::AbstractConfiguration & config = context->getConfigRef(); + bool is_client_for_disk = false; + auto new_settings = DB::AzureBlobStorage::getRequestSettings(config, config_prefix, context); + DB::AzureBlobStorage::ConnectionParams params + { + .endpoint = DB::AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = DB::AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = DB::AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + shared_client = DB::AzureBlobStorage::getContainerClient(params, true); return shared_client; } }; diff --git a/cpp-ch/local-engine/tests/gtest_parser.cpp b/cpp-ch/local-engine/tests/gtest_parser.cpp index 34b3a8875f1a..24c796358f45 100644 --- a/cpp-ch/local-engine/tests/gtest_parser.cpp +++ b/cpp-ch/local-engine/tests/gtest_parser.cpp @@ -101,8 +101,7 @@ TEST(LocalExecutor, StorageObjectStorageSink) /// 2. Create Chunk /// 3. comsume - Chunk data = testChunk(); - sink.consume(data); + sink.consume(testChunk()); sink.onFinish(); } diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 60df3ee37f66..a7ffbc9fa7f6 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -850,6 +850,9 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("atan2") .exclude("round/bround") .exclude("SPARK-37388: width_bucket") + .exclude("shift left") + .exclude("shift right") + .exclude("shift right unsigned") enableSuite[GlutenMiscExpressionsSuite] enableSuite[GlutenNondeterministicSuite] .exclude("MonotonicallyIncreasingID") diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index df9f49bfc72e..ceb0d8a87e21 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -809,6 +809,9 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-35926: Support YearMonthIntervalType in width-bucket function") .exclude("SPARK-35925: Support DayTimeIntervalType in width-bucket function") .exclude("SPARK-37388: width_bucket") + .exclude("shift left") + .exclude("shift right") + .exclude("shift right unsigned") enableSuite[GlutenMiscExpressionsSuite] enableSuite[GlutenNondeterministicSuite] .exclude("MonotonicallyIncreasingID") diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 0dc2cdd89f93..66007a36770f 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -711,6 +711,9 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-35926: Support YearMonthIntervalType in width-bucket function") .exclude("SPARK-35925: Support DayTimeIntervalType in width-bucket function") .exclude("SPARK-37388: width_bucket") + .exclude("shift left") + .exclude("shift right") + .exclude("shift right unsigned") enableSuite[GlutenMiscExpressionsSuite] enableSuite[GlutenNondeterministicSuite] .exclude("MonotonicallyIncreasingID") diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 0dc2cdd89f93..66007a36770f 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -711,6 +711,9 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-35926: Support YearMonthIntervalType in width-bucket function") .exclude("SPARK-35925: Support DayTimeIntervalType in width-bucket function") .exclude("SPARK-37388: width_bucket") + .exclude("shift left") + .exclude("shift right") + .exclude("shift right unsigned") enableSuite[GlutenMiscExpressionsSuite] enableSuite[GlutenNondeterministicSuite] .exclude("MonotonicallyIncreasingID")