Skip to content

Commit

Permalink
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20240920) (apache#7299
Browse files Browse the repository at this point in the history
)

* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240920)

* Fix build due to ClickHouse/ClickHouse#69213
---------

Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
  • Loading branch information
3 people authored Sep 20, 2024
1 parent aeed56a commit c5af284
Show file tree
Hide file tree
Showing 29 changed files with 304 additions and 148 deletions.
4 changes: 2 additions & 2 deletions cpp-ch/clickhouse.version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
CH_BRANCH=rebase_ch/20240918
CH_COMMIT=cc6de0f1995
CH_BRANCH=rebase_ch/20240920
CH_COMMIT=14c2da664d7
13 changes: 9 additions & 4 deletions cpp-ch/local-engine/Common/CHUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@

namespace DB
{
namespace Setting
{
extern const SettingsUInt64 prefer_external_sort_block_bytes;
extern const SettingsUInt64 max_bytes_before_external_sort;
}
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
Expand Down Expand Up @@ -701,12 +706,12 @@ void BackendInitializerUtil::initEnvs(DB::Context::ConfigurationPtr config)
spark_user = spark_user_c_str;
}

DB::Field BackendInitializerUtil::toField(const String key, const String value)
DB::Field BackendInitializerUtil::toField(const String & key, const String & value)
{
if (BOOL_VALUE_SETTINGS.contains(key))
return DB::Field(value == "true" || value == "1");
else if (LONG_VALUE_SETTINGS.contains(key))
return DB::Field(std::strtoll(value.c_str(), NULL, 10));
return DB::Field(std::strtoll(value.c_str(), nullptr, 10));
else
return DB::Field(value);
}
Expand Down Expand Up @@ -797,13 +802,13 @@ void BackendInitializerUtil::initSettings(std::map<std::string, std::string> & b
auto task_memory = std::stoull(backend_conf_map.at(GLUTEN_TASK_OFFHEAP));
if (!backend_conf_map.contains(CH_RUNTIME_SETTINGS_PREFIX + "max_bytes_before_external_sort"))
{
settings.max_bytes_before_external_sort = static_cast<size_t>(0.8 * task_memory);
settings[Setting::max_bytes_before_external_sort] = static_cast<size_t>(0.8 * task_memory);
}
if (!backend_conf_map.contains(CH_RUNTIME_SETTINGS_PREFIX + "prefer_external_sort_block_bytes"))
{
auto mem_gb = task_memory / static_cast<double>(1_GiB);
// 2.8x+5, Heuristics calculate the block size of external sort, [8,16]
settings.prefer_external_sort_block_bytes = std::max(std::min(static_cast<size_t>(2.8 * mem_gb + 5), 16ul), 8ul) * 1024 * 1024;
settings[Setting::prefer_external_sort_block_bytes] = std::max(std::min(static_cast<size_t>(2.8 * mem_gb + 5), 16ul), 8ul) * 1024 * 1024;
}
}
}
Expand Down
7 changes: 1 addition & 6 deletions cpp-ch/local-engine/Common/CHUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,14 @@ class JNIUtils;
class BackendInitializerUtil
{
public:
static DB::Field toField(const String key, const String value);
static DB::Field toField(const String & key, const String & value);

/// Initialize two kinds of resources
/// 1. global level resources like global_context/shared_context, notice that they can only be initialized once in process lifetime
/// 2. session level resources like settings/configs, they can be initialized multiple times following the lifetime of executor/driver
static void init(const std::string_view plan);
static void updateConfig(const DB::ContextMutablePtr &, std::string_view);

// use excel text parser
inline static const std::string USE_EXCEL_PARSER = "use_excel_serialization";
inline static const std::string EXCEL_EMPTY_AS_NULL = "use_excel_serialization.empty_as_null";
inline static const std::string EXCEL_NUMBER_FORCE = "use_excel_serialization.number_force";
inline static const std::string EXCEL_QUOTE_STRICT = "use_excel_serialization.quote_strict";
inline static const String CH_BACKEND_PREFIX = "spark.gluten.sql.columnar.backend.ch";

inline static const String CH_RUNTIME_CONFIG = "runtime_config";
Expand Down
1 change: 1 addition & 0 deletions cpp-ch/local-engine/Common/GlutenConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <Interpreters/Context.h>
#include <base/types.h>
#include <base/unit.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/logger_useful.h>

namespace local_engine
Expand Down
41 changes: 41 additions & 0 deletions cpp-ch/local-engine/Common/GlutenSettings.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "GlutenSettings.h"
#include <Core/Settings.h>

using namespace DB;
namespace local_engine
{

bool tryGetString(const DB::Settings & settings, std::string_view name, std::string & value)
{
Field field;
if (settings.tryGet(name, field))
{
value = field.safeGet<String>();
return true;
}
return false;
}
bool settingsEqual(const DB::Settings & settings, std::string_view name, const std::string & value)
{
if (DB::Field field; settings.tryGet(name, field))
return field.safeGet<String>() == value;
return false;
}
}
11 changes: 10 additions & 1 deletion cpp-ch/local-engine/Common/GlutenSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,13 @@
* limitations under the License.
*/
#pragma once

#include <Interpreters/Context_fwd.h>

namespace DB
{
struct Settings;
}
namespace local_engine
{

Expand Down Expand Up @@ -59,5 +64,9 @@ namespace local_engine
LIST_OF_SETTINGS_MACRO(IMPLEMENT_GLUTEN_SET_, SKIP_ALIAS, _) \
}

// workaround for tryGetString

bool tryGetString(const DB::Settings & settings, std::string_view name, std::string & value);
bool settingsEqual(const DB::Settings & settings, std::string_view name, const std::string & value);

}
} // namespace local_engine
2 changes: 2 additions & 0 deletions cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include <base/phdr_cache.h>
#include <base/sleep.h>
#include <Poco/Exception.h>
#include <Poco/Runnable.h>
#include <Poco/Thread.h>
#include <Common/CurrentThread.h>
#include <Common/GlutenSignalHandler.h>
#include <Common/MemoryTracker.h>
Expand Down
5 changes: 3 additions & 2 deletions cpp-ch/local-engine/Disks/ObjectStorages/GlutenDiskHDFS.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@

#include <config.h>

#include <Common/Throttler.h>
#include <Disks/ObjectStorages/DiskObjectStorage.h>
#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
#include <Disks/ObjectStorages/DiskObjectStorage.h>
#include <Interpreters/Cache/FileCacheFactory.h>
#include <Common/Throttler.h>
#include <Common/typeid_cast.h>
#if USE_HDFS
#include <Disks/ObjectStorages/GlutenHDFSObjectStorage.h>
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@

namespace DB
{
namespace Setting
{
extern const SettingsUInt64 hdfs_replication;
}

namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
Expand Down Expand Up @@ -120,7 +125,7 @@ void registerGlutenHDFSObjectStorage(ObjectStorageFactory & factory)

std::unique_ptr<HDFSObjectStorageSettings> settings = std::make_unique<HDFSObjectStorageSettings>(
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
context->getSettingsRef().hdfs_replication
context->getSettingsRef()[Setting::hdfs_replication]
);
return std::make_shared<GlutenHDFSObjectStorage>(uri, std::move(settings), config);
});
Expand Down
16 changes: 10 additions & 6 deletions cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,17 @@
#include <memory>
#include <string_view>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/IDataType.h>
#include <Functions/FunctionSQLJSON.h>
#include <Functions/IFunction.h>
#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
#include <Functions/JSONPath/Generator/GeneratorJSONPath.h>
#include <Functions/JSONPath/Parsers/ParserJSONPath.h>
#include <Interpreters/Context.h>
#include <Parsers/IAST.h>
#include <Parsers/IParser.h>
#include <Parsers/Lexer.h>
#include <Parsers/TokenIterator.h>
#include <base/find_symbols.h>
#include <base/range.h>
Expand All @@ -44,6 +42,12 @@

namespace DB
{
namespace Setting
{
extern const SettingsBool allow_simdjson;
extern const SettingsUInt64 max_parser_depth;
extern const SettingsUInt64 max_parser_backtracks;
}
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
Expand Down Expand Up @@ -526,7 +530,7 @@ class FlattenJSONStringOnRequiredFunction : public DB::IFunction
const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
{
#if USE_SIMDJSON
if (context->getSettingsRef().allow_simdjson)
if (context->getSettingsRef()[DB::Setting::allow_simdjson])
{
return innerExecuteImpl<
DB::SimdJSONParser,
Expand Down Expand Up @@ -600,8 +604,8 @@ class FlattenJSONStringOnRequiredFunction : public DB::IFunction
const char * query_begin = reinterpret_cast<const char *>(required_fields.back().c_str());
const char * query_end = required_fields.back().c_str() + required_fields.back().size();
DB::Tokens tokens(query_begin, query_end);
UInt32 max_parser_depth = static_cast<UInt32>(context->getSettingsRef().max_parser_depth);
UInt32 max_parser_backtracks = static_cast<UInt32>(context->getSettingsRef().max_parser_backtracks);
UInt32 max_parser_depth = static_cast<UInt32>(context->getSettingsRef()[DB::Setting::max_parser_depth]);
UInt32 max_parser_backtracks = static_cast<UInt32>(context->getSettingsRef()[DB::Setting::max_parser_backtracks]);
DB::IParser::Pos token_iterator(tokens, max_parser_depth, max_parser_backtracks);
DB::ASTPtr json_path_ast;
DB::ParserJSONPath path_parser;
Expand Down
6 changes: 5 additions & 1 deletion cpp-ch/local-engine/Parser/LocalExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include <QueryPipeline/printPipeline.h>
#include <Common/QueryContext.h>

namespace DB::Setting
{
extern const SettingsMaxThreads max_threads;
}
using namespace DB;
namespace local_engine
{
Expand Down Expand Up @@ -123,7 +127,7 @@ void LocalExecutor::execute()
{
chassert(query_pipeline_builder);
push_executor = query_pipeline_builder->execute();
push_executor->execute(local_engine::QueryContext::instance().currentQueryContext()->getSettingsRef().max_threads, false);
push_executor->execute(QueryContext::instance().currentQueryContext()->getSettingsRef()[Setting::max_threads], false);
}

Block LocalExecutor::getHeader()
Expand Down
Loading

0 comments on commit c5af284

Please sign in to comment.