Skip to content

Commit

Permalink
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20241204) (#8135)
Browse files Browse the repository at this point in the history
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241204)

* Fix Build due to ClickHouse/ClickHouse#72715

* Fix Build due to ClickHouse/ClickHouse#65691

* Fix Build due to ClickHouse/ClickHouse#72722

* Fix gtest due to #8052

* Fix benchmark due to ClickHouse/ClickHouse#72460

* Add SPARK_DIR_NAME for fixing unstable ut

---------

Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
  • Loading branch information
3 people authored Dec 4, 2024
1 parent 6ffab3a commit 6dd65c0
Show file tree
Hide file tree
Showing 11 changed files with 22 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ class GlutenClickHouseHDFSSuite

test("GLUTEN-7542: Fix cache refresh") {
withSQLConf("spark.sql.hive.manageFilesourcePartitions" -> "false") {
val filePath = s"$tablesPath/issue_7542/"
val filePath = s"$tablesPath/$SPARK_DIR_NAME/issue_7542/"
val targetDirs = new Path(filePath)
val fs = targetDirs.getFileSystem(spark.sessionState.newHadoopConf())
fs.mkdirs(targetDirs)
Expand Down
4 changes: 2 additions & 2 deletions cpp-ch/clickhouse.version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
CH_BRANCH=rebase_ch/20241203
CH_COMMIT=473be0b9c50
CH_BRANCH=rebase_ch/20241204
CH_COMMIT=7b72d0dc991
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
*/
#include <vector>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/IAggregateFunction_fwd.h>
#include <Columns/ColumnArray.h>
Expand All @@ -28,17 +26,14 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/ISerialization.h>
#include <IO/VarInt.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Common/Exception.h>

#include <Poco/Logger.h>
#include <Common/logger_useful.h>

namespace DB::ErrorCodes
Expand Down
4 changes: 4 additions & 0 deletions cpp-ch/local-engine/Functions/SparkFunctionFloor.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsRound.h>

#if USE_MULTITARGET_CODE
#include <immintrin.h>
#endif

using namespace DB;

namespace local_engine
Expand Down
1 change: 1 addition & 0 deletions cpp-ch/local-engine/Shuffle/PartitionWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <format>
#include <memory>
#include <vector>
#include <Columns/ColumnsNumber.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromString.h>
Expand Down
1 change: 1 addition & 0 deletions cpp-ch/local-engine/Shuffle/ShuffleReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <jni.h>
#include <Formats/NativeReader.h>
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <Storages/IO/NativeReader.h>
#include <Common/BlockIterator.h>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "SparkStorageMergeTree.h"

#include <Disks/ObjectStorages/CompactObjectStorageDiskTransaction.h>
#include <Disks/SingleDiskVolume.h>
#include <Interpreters/MergeTreeTransaction.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -616,21 +616,20 @@ ConcurrentLRU<std::string, std::shared_ptr<DB::S3::Client>> S3FileReadBufferBuil
class AzureBlobReadBuffer : public ReadBufferBuilder
{
public:
explicit AzureBlobReadBuffer(DB::ContextPtr context_) : ReadBufferBuilder(context_) { }
explicit AzureBlobReadBuffer(const DB::ContextPtr & context_) : ReadBufferBuilder(context_) { }
~AzureBlobReadBuffer() override = default;

std::unique_ptr<DB::ReadBuffer> build(const substrait::ReadRel::LocalFiles::FileOrFiles & file_info) override
{
Poco::URI file_uri(file_info.uri_file());
std::unique_ptr<DB::ReadBuffer> read_buffer;
read_buffer = std::make_unique<DB::ReadBufferFromAzureBlobStorage>(getClient(), file_uri.getPath(), DB::ReadSettings(), 5, 5);
return read_buffer;
return std::make_unique<DB::ReadBufferFromAzureBlobStorage>(getClient(), file_uri.getPath(), DB::ReadSettings(), 5, 5);
}

private:
std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> shared_client;

std::shared_ptr<Azure::Storage::Blobs::BlobContainerClient> getClient()
std::shared_ptr<DB::AzureBlobStorage::ContainerClient> shared_client;

std::shared_ptr<DB::AzureBlobStorage::ContainerClient> getClient()
{
if (shared_client)
return shared_client;
Expand Down Expand Up @@ -687,7 +686,7 @@ DB::ReadSettings ReadBufferBuilder::getReadSettings() const
return read_settings;
}

ReadBufferBuilder::ReadBufferBuilder(DB::ContextPtr context_) : context(context_)
ReadBufferBuilder::ReadBufferBuilder(const DB::ContextPtr & context_) : context(context_)
{
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace local_engine
class ReadBufferBuilder
{
public:
explicit ReadBufferBuilder(DB::ContextPtr context_);
explicit ReadBufferBuilder(const DB::ContextPtr & context_);

virtual ~ReadBufferBuilder() = default;

Expand Down
2 changes: 1 addition & 1 deletion cpp-ch/local-engine/tests/benchmark_local_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,7 @@ QueryPlanPtr joinPlan(QueryPlanPtr left, QueryPlanPtr right, String left_key, St
auto hash_join = std::make_shared<HashJoin>(join, right->getCurrentHeader());

QueryPlanStepPtr join_step
= std::make_unique<JoinStep>(left->getCurrentHeader(), right->getCurrentHeader(), hash_join, block_size, 0, 1, false);
= std::make_unique<JoinStep>(left->getCurrentHeader(), right->getCurrentHeader(), hash_join, block_size, 8192, 1, NameSet{}, false, false);

std::vector<QueryPlanPtr> plans;
plans.emplace_back(std::move(left));
Expand Down
6 changes: 3 additions & 3 deletions cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ TEST(WritePipeline, SubstraitFileSink)
const auto context = DB::Context::createCopy(QueryContext::globalContext());
GlutenWriteSettings settings{
.task_write_tmp_dir = "file:///tmp/test_table/test",
.task_write_filename = "data.parquet",
.task_write_filename_pattern = "data.parquet",
};
settings.set(context);

Expand Down Expand Up @@ -155,7 +155,7 @@ TEST(WritePipeline, SubstraitFileSink)
std::cerr << debug::verticalShowString(x, 10, 50) << std::endl;
EXPECT_EQ(1, x.rows());
const auto & col_a = *(x.getColumns()[0]);
EXPECT_EQ(settings.task_write_filename, col_a.getDataAt(0));
EXPECT_EQ(settings.task_write_filename_pattern, col_a.getDataAt(0));
const auto & col_b = *(x.getColumns()[1]);
EXPECT_EQ(SubstraitFileSink::NO_PARTITION_ID, col_b.getDataAt(0));
const auto & col_c = *(x.getColumns()[2]);
Expand All @@ -169,7 +169,7 @@ TEST(WritePipeline, SubstraitPartitionedFileSink)
const auto context = DB::Context::createCopy(QueryContext::globalContext());
GlutenWriteSettings settings{
.task_write_tmp_dir = "file:///tmp/test_table/test_partition",
.task_write_filename = "data.parquet",
.task_write_filename_pattern = "data.parquet",
};
settings.set(context);

Expand Down

0 comments on commit 6dd65c0

Please sign in to comment.