From 8c73b403b5f8d2712923e39f53bd0dd5340ab936 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 17 Jul 2021 08:44:52 +0300 Subject: [PATCH 001/472] Auto version update to [21.9.2.1] [54454] --- cmake/autogenerated_versions.txt | 8 ++++---- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- .../System/StorageSystemContributors.generated.cpp | 14 ++++++++++++++ 6 files changed, 23 insertions(+), 9 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 18072566d04e..5719ae3a002f 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) -SET(VERSION_PATCH 1) -SET(VERSION_GITHASH f48c5af90c2ad51955d1ee3b6b05d006b03e4238) -SET(VERSION_DESCRIBE v21.9.1.1-prestable) -SET(VERSION_STRING 21.9.1.1) +SET(VERSION_PATCH 2) +SET(VERSION_GITHASH a091f2e36054061ceebb6826a590f8fb86f01196) +SET(VERSION_DESCRIBE v21.9.2.1-prestable) +SET(VERSION_STRING 21.9.2.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index 38f740ae062e..6b5fb76ba862 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.9.1.1) unstable; urgency=low +clickhouse (21.9.2.1) unstable; urgency=low * Modified source code - -- clickhouse-release Sat, 10 Jul 2021 08:22:49 +0300 + -- clickhouse-release Sat, 17 Jul 2021 08:44:49 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index f17fa8ade166..d484dade0aae 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.1.* +ARG version=21.9.2.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 5da9e703f4d2..3f9ce5e929be 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.1.* +ARG version=21.9.2.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 5768753cd7cb..4b8385ed639b 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.1.* +ARG version=21.9.2.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index bed8eadc19c3..fe14170165c0 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -54,6 +54,7 @@ const char * auto_contributors[] { "Alexander Sapin", "Alexander Tokmakov", "Alexander Tretiakov", + "Alexandra", "Alexandra Latysheva", "Alexandre Snarskii", "Alexandr Kondratev", @@ -95,6 +96,7 @@ const char * auto_contributors[] { "Anatoly Pugachev", "ana-uvarova", "AnaUvarova", + "Andr0901", "Andreas Hunkeler", "AndreevDm", "Andrei Bodrov", @@ -140,6 +142,7 @@ const char * auto_contributors[] { "aprudaev", "Ariel Robaldo", "Arsen Hakobyan", + "Arslan G", "ArtCorp", "Artem Andreenko", "Artemeey", @@ -335,6 +338,7 @@ const char * auto_contributors[] { "fessmage", "FgoDt", "fibersel", + "Filatenkov Artur", "filimonov", "filipe", "Filipe Caixeta", @@ -389,6 +393,7 @@ const char * auto_contributors[] { "hexiaoting", "Hiroaki Nakamura", "hotid", + "huangzhaowei", "HuFuwang", "Hui Wang", "hustnn", @@ -404,6 +409,7 @@ const char * auto_contributors[] { "Igr", "Igr Mineev", "ikarishinjieva", + "Ikko Ashimine", "ikopylov", "Ildar Musin", "Ildus Kurbangaliev", @@ -442,6 +448,7 @@ const char * auto_contributors[] { "Jacob Hayes", "jakalletti", "JaosnHsieh", + "jasine", "Jason", "javartisan", "javi", @@ -449,6 +456,7 @@ const char * auto_contributors[] { "Javi Santana", "Javi santana bot", "Jean Baptiste Favre", + "Jeffrey Dang", "jennyma", "jetgm", "Jiading Guo", @@ -502,6 +510,7 @@ const char * auto_contributors[] { "Leopold Schabel", "leozhang", "Lev Borodin", + "levie", "levushkin aleksej", "levysh", "Lewinma", @@ -634,7 +643,9 @@ const char * auto_contributors[] { "nauta", "nautaa", "Neeke Gao", + "neng.liu", "Neng Liu", + "NengLiu", "never lee", "NeZeD [Mac Pro]", "nicelulu", @@ -839,6 +850,7 @@ const char * auto_contributors[] { "TCeason", "Tema Novikov", "templarzq", + "terrylin", "The-Alchemist", "Tiaonmmn", "tiger.yan", @@ -877,6 +889,7 @@ const char * auto_contributors[] { "Veloman Yunkan", "Veniamin Gvozdikov", "Veselkov Konstantin", + "vgocoder", "vic", "vicdashkov", "Victor", @@ -925,6 +938,7 @@ const char * auto_contributors[] { "wzl", "Xianda Ke", "Xiang Zhou", + "xiedeyantu", "xPoSx", "Yağızcan Değirmenci", "yang", From 2dde99fd73fd5547aa80ffe28a4a60889118793f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 19 Aug 2021 14:27:12 +0300 Subject: [PATCH 002/472] dear GitHub please let me open this PR From 3084282417d5ac6dd4f87613591f6bae34c31bad Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 19 Aug 2021 19:24:51 +0300 Subject: [PATCH 003/472] boop the CI From 4a43f3e1d4bd1fe925c2f60cf2a376beeb9311c5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 20 Aug 2021 00:55:12 +0300 Subject: [PATCH 004/472] Backport #27815 to 21.9: Allow symlinks to library dictionary path --- src/Common/filesystemHelpers.cpp | 25 +++++++++++ src/Common/filesystemHelpers.h | 2 + src/Dictionaries/LibraryDictionarySource.cpp | 9 ++-- tests/integration/test_library_bridge/test.py | 44 +++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-) diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 4855500b7766..9c3db0f3e306 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -122,6 +122,24 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p return path_starts_with_prefix_path; } +bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) +{ + /// Differs from pathStartsWith in how `path` is normalized before comparison. + /// Make `path` absolute if it was relative and put it into normalized form: remove + /// `.` and `..` and extra `/`. Path is not canonized because otherwise path will + /// not be a path of a symlink itself. + + auto absolute_path = std::filesystem::absolute(path); + absolute_path = absolute_path.lexically_normal(); /// Normalize path. + auto absolute_prefix_path = std::filesystem::absolute(prefix_path); + absolute_prefix_path = absolute_prefix_path.lexically_normal(); /// Normalize path. + + auto [_, prefix_path_mismatch_it] = std::mismatch(absolute_path.begin(), absolute_path.end(), absolute_prefix_path.begin(), absolute_prefix_path.end()); + + bool path_starts_with_prefix_path = (prefix_path_mismatch_it == absolute_prefix_path.end()); + return path_starts_with_prefix_path; +} + bool pathStartsWith(const String & path, const String & prefix_path) { auto filesystem_path = std::filesystem::path(path); @@ -130,6 +148,13 @@ bool pathStartsWith(const String & path, const String & prefix_path) return pathStartsWith(filesystem_path, filesystem_prefix_path); } +bool symlinkStartsWith(const String & path, const String & prefix_path) +{ + auto filesystem_path = std::filesystem::path(path); + auto filesystem_prefix_path = std::filesystem::path(prefix_path); + + return symlinkStartsWith(filesystem_path, filesystem_prefix_path); +} } diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index b7525a64fae7..71ef7844ef7c 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -35,6 +35,8 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p /// Returns true if path starts with prefix path bool pathStartsWith(const String & path, const String & prefix_path); +bool symlinkStartsWith(const String & path, const String & prefix_path); + } namespace FS diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index 288abde8788d..f2c5cefa5436 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -41,10 +41,13 @@ LibraryDictionarySource::LibraryDictionarySource( , sample_block{sample_block_} , context(Context::createCopy(context_)) { - if (fs::path(path).is_relative()) - path = fs::canonical(path); + bool path_checked = false; + if (fs::is_symlink(path)) + path_checked = symlinkStartsWith(path, context->getDictionariesLibPath()); + else + path_checked = pathStartsWith(path, context->getDictionariesLibPath()); - if (created_from_ddl && !pathStartsWith(path, context->getDictionariesLibPath())) + if (created_from_ddl && !path_checked) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, context->getDictionariesLibPath()); if (!fs::exists(path)) diff --git a/tests/integration/test_library_bridge/test.py b/tests/integration/test_library_bridge/test.py index 97b2ccfbdbe8..12a967ebaa47 100644 --- a/tests/integration/test_library_bridge/test.py +++ b/tests/integration/test_library_bridge/test.py @@ -44,6 +44,11 @@ def ch_cluster(): '/usr/bin/g++ -shared -o /etc/clickhouse-server/config.d/dictionaries_lib/dict_lib.so -fPIC /etc/clickhouse-server/config.d/dictionaries_lib/dict_lib.cpp'], user='root') + instance.exec_in_container( + ['bash', '-c', + '/usr/bin/g++ -shared -o /dict_lib_copy.so -fPIC /etc/clickhouse-server/config.d/dictionaries_lib/dict_lib.cpp'], user='root') + instance.exec_in_container(['bash', '-c', 'ln -s /dict_lib_copy.so /etc/clickhouse-server/config.d/dictionaries_lib/dict_lib_symlink.so']) + yield cluster finally: @@ -59,6 +64,7 @@ def test_load_all(ch_cluster): if instance.is_built_with_memory_sanitizer(): pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") + instance.query('DROP DICTIONARY IF EXISTS lib_dict') instance.query(''' CREATE DICTIONARY lib_dict (key UInt64, value1 UInt64, value2 UInt64, value3 UInt64) PRIMARY KEY key @@ -128,6 +134,7 @@ def test_load_keys(ch_cluster): if instance.is_built_with_memory_sanitizer(): pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") + instance.query('DROP DICTIONARY IF EXISTS lib_dict_ckc') instance.query(''' CREATE DICTIONARY lib_dict_ckc (key UInt64, value1 UInt64, value2 UInt64, value3 UInt64) PRIMARY KEY key @@ -148,6 +155,7 @@ def test_load_all_many_rows(ch_cluster): pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") num_rows = [1000, 10000, 100000, 1000000] + instance.query('DROP DICTIONARY IF EXISTS lib_dict') for num in num_rows: instance.query(''' CREATE DICTIONARY lib_dict (key UInt64, value1 UInt64, value2 UInt64, value3 UInt64) @@ -267,6 +275,42 @@ def test_bridge_dies_with_parent(ch_cluster): instance.query('DROP DICTIONARY lib_dict_c') +def test_path_validation(ch_cluster): + if instance.is_built_with_memory_sanitizer(): + pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") + + instance.query('DROP DICTIONARY IF EXISTS lib_dict_c') + instance.query(''' + CREATE DICTIONARY lib_dict_c (key UInt64, value1 UInt64, value2 UInt64, value3 UInt64) + PRIMARY KEY key SOURCE(library(PATH '/etc/clickhouse-server/config.d/dictionaries_lib/dict_lib_symlink.so')) + LAYOUT(CACHE( + SIZE_IN_CELLS 10000000 + BLOCK_SIZE 4096 + FILE_SIZE 16777216 + READ_BUFFER_SIZE 1048576 + MAX_STORED_KEYS 1048576)) + LIFETIME(2) ; + ''') + + result = instance.query('''select dictGet(lib_dict_c, 'value1', toUInt64(1));''') + assert(result.strip() == '101') + + instance.query('DROP DICTIONARY IF EXISTS lib_dict_c') + instance.query(''' + CREATE DICTIONARY lib_dict_c (key UInt64, value1 UInt64, value2 UInt64, value3 UInt64) + PRIMARY KEY key SOURCE(library(PATH '/etc/clickhouse-server/config.d/dictionaries_lib/../../../../dict_lib_copy.so')) + LAYOUT(CACHE( + SIZE_IN_CELLS 10000000 + BLOCK_SIZE 4096 + FILE_SIZE 16777216 + READ_BUFFER_SIZE 1048576 + MAX_STORED_KEYS 1048576)) + LIFETIME(2) ; + ''') + result = instance.query_and_get_error('''select dictGet(lib_dict_c, 'value1', toUInt64(1));''') + assert('DB::Exception: File path /etc/clickhouse-server/config.d/dictionaries_lib/../../../../dict_lib_copy.so is not inside /etc/clickhouse-server/config.d/dictionaries_lib' in result) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From a45fd24ca2375d2f627c8caf93e2b6762071f861 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 20 Aug 2021 14:59:06 +0300 Subject: [PATCH 005/472] Backport #27875 to 21.9: Fix shutdown of NamedSessionStorage. --- programs/server/Server.cpp | 2 -- src/Interpreters/Context.cpp | 3 +++ src/Interpreters/Session.cpp | 51 ++++++++++++++++++++++-------------- src/Interpreters/Session.h | 7 ++--- 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c30ef52f46ac..4d68a8be4e4a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -1429,7 +1428,6 @@ if (ThreadFuzzer::instance().isEffective()) /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread. async_metrics.start(); - Session::startupNamedSessions(); { String level_str = config().getString("text_log.level", ""); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a634c19dcd61..348ca84ee6fd 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -273,6 +274,8 @@ struct ContextSharedPart return; shutdown_called = true; + Session::shutdownNamedSessions(); + /** After system_logs have been shut down it is guaranteed that no system table gets created or written to. * Note that part changes at shutdown won't be logged to part log. */ diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 7334f2e76409..c0e08395effa 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -54,17 +54,17 @@ class NamedSessionsStorage public: using Key = NamedSessionKey; + static NamedSessionsStorage & instance() + { + static NamedSessionsStorage the_instance; + return the_instance; + } + ~NamedSessionsStorage() { try { - { - std::lock_guard lock{mutex}; - quit = true; - } - - cond.notify_one(); - thread.join(); + shutdown(); } catch (...) { @@ -72,6 +72,20 @@ class NamedSessionsStorage } } + void shutdown() + { + { + std::lock_guard lock{mutex}; + sessions.clear(); + if (!thread.joinable()) + return; + quit = true; + } + + cond.notify_one(); + thread.join(); + } + /// Find existing session or create a new. std::pair, bool> acquireSession( const ContextPtr & global_context, @@ -94,6 +108,10 @@ class NamedSessionsStorage auto context = Context::createCopy(global_context); it = sessions.insert(std::make_pair(key, std::make_shared(key, context, timeout, *this))).first; const auto & session = it->second; + + if (!thread.joinable()) + thread = ThreadFromGlobalPool{&NamedSessionsStorage::cleanThread, this}; + return {session, true}; } else @@ -156,11 +174,9 @@ class NamedSessionsStorage { setThreadName("SessionCleaner"); std::unique_lock lock{mutex}; - - while (true) + while (!quit) { auto interval = closeSessions(lock); - if (cond.wait_for(lock, interval, [this]() -> bool { return quit; })) break; } @@ -208,8 +224,8 @@ class NamedSessionsStorage std::mutex mutex; std::condition_variable cond; - std::atomic quit{false}; - ThreadFromGlobalPool thread{&NamedSessionsStorage::cleanThread, this}; + ThreadFromGlobalPool thread; + bool quit = false; }; @@ -218,13 +234,12 @@ void NamedSessionData::release() parent.releaseSession(*this); } -std::optional Session::named_sessions = std::nullopt; - -void Session::startupNamedSessions() +void Session::shutdownNamedSessions() { - named_sessions.emplace(); + NamedSessionsStorage::instance().shutdown(); } + Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_) : global_context(global_context_) { @@ -317,15 +332,13 @@ ContextMutablePtr Session::makeSessionContext(const String & session_id_, std::c throw Exception("Session context already exists", ErrorCodes::LOGICAL_ERROR); if (query_context_created) throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR); - if (!named_sessions) - throw Exception("Support for named sessions is not enabled", ErrorCodes::LOGICAL_ERROR); /// Make a new session context OR /// if the `session_id` and `user_id` were used before then just get a previously created session context. std::shared_ptr new_named_session; bool new_named_session_created = false; std::tie(new_named_session, new_named_session_created) - = named_sessions->acquireSession(global_context, user_id.value_or(UUID{}), session_id_, timeout_, session_check_); + = NamedSessionsStorage::instance().acquireSession(global_context, user_id.value_or(UUID{}), session_id_, timeout_, session_check_); auto new_session_context = new_named_session->context; new_session_context->makeSessionContext(); diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 58370aad2d07..d104e2500996 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -28,9 +28,8 @@ using UserPtr = std::shared_ptr; class Session { public: - /// Allow to use named sessions. The thread will be run to cleanup sessions after timeout has expired. - /// The method must be called at the server startup. - static void startupNamedSessions(); + /// Stops using named sessions. The method must be called at the server shutdown. + static void shutdownNamedSessions(); Session(const ContextPtr & global_context_, ClientInfo::Interface interface_); Session(Session &&); @@ -83,8 +82,6 @@ class Session String session_id; std::shared_ptr named_session; bool named_session_created = false; - - static std::optional named_sessions; }; } From 3e21452ed8ffea070aa3deff24d6b7083f0029e0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 20 Aug 2021 15:00:16 +0300 Subject: [PATCH 006/472] Backport #27876 to 21.9: Fix postgres-like cast with negative numbers --- src/Parsers/ExpressionElementParsers.cpp | 30 ++++++++++++++----- src/Parsers/ExpressionListParsers.cpp | 6 ++-- .../01852_cast_operator_3.reference | 10 +++++++ .../0_stateless/01852_cast_operator_3.sql | 14 +++++++++ .../01852_cast_operator_bad_cases.reference | 8 +++++ .../01852_cast_operator_bad_cases.sh | 10 +++++++ 6 files changed, 69 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/01852_cast_operator_3.reference create mode 100644 tests/queries/0_stateless/01852_cast_operator_3.sql diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 16f2b720b4a3..a79b3e51e16f 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -850,15 +850,24 @@ static bool isOneOf(TokenType token) return ((token == tokens) || ...); } - bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - /// Parse numbers (including decimals), strings and arrays of them. + /// Parse numbers (including decimals), strings, arrays and tuples of them. const char * data_begin = pos->begin; const char * data_end = pos->end; bool is_string_literal = pos->type == TokenType::StringLiteral; - if (pos->type == TokenType::Number || is_string_literal) + + if (pos->type == TokenType::Minus) + { + ++pos; + if (pos->type != TokenType::Number) + return false; + + data_end = pos->end; + ++pos; + } + else if (pos->type == TokenType::Number || is_string_literal) { ++pos; } @@ -876,7 +885,7 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (pos->type == TokenType::ClosingSquareBracket) { - if (isOneOf(last_token)) + if (isOneOf(last_token)) return false; if (stack.empty() || stack.back() != TokenType::OpeningSquareBracket) return false; @@ -884,7 +893,7 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (pos->type == TokenType::ClosingRoundBracket) { - if (isOneOf(last_token)) + if (isOneOf(last_token)) return false; if (stack.empty() || stack.back() != TokenType::OpeningRoundBracket) return false; @@ -892,10 +901,15 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (pos->type == TokenType::Comma) { - if (isOneOf(last_token)) + if (isOneOf(last_token)) return false; } - else if (isOneOf(pos->type)) + else if (pos->type == TokenType::Number) + { + if (!isOneOf(last_token)) + return false; + } + else if (isOneOf(pos->type)) { if (!isOneOf(last_token)) return false; @@ -915,6 +929,8 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!stack.empty()) return false; } + else + return false; ASTPtr type_ast; if (ParserToken(TokenType::DoubleColon).ignore(pos, expected) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 58f5e7669050..3aa5c82884b6 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -664,10 +664,12 @@ bool ParserUnaryExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (pos->type == TokenType::Minus) { - ParserLiteral lit_p; Pos begin = pos; + if (ParserCastOperator().parse(pos, node, expected)) + return true; - if (lit_p.parse(pos, node, expected)) + pos = begin; + if (ParserLiteral().parse(pos, node, expected)) return true; pos = begin; diff --git a/tests/queries/0_stateless/01852_cast_operator_3.reference b/tests/queries/0_stateless/01852_cast_operator_3.reference new file mode 100644 index 000000000000..a1e54797d608 --- /dev/null +++ b/tests/queries/0_stateless/01852_cast_operator_3.reference @@ -0,0 +1,10 @@ +-1 +SELECT CAST(\'-1\', \'Int32\') +-0.1 +SELECT CAST(\'-0.1\', \'Decimal(38, 38)\') +-0.111 +SELECT CAST(\'-0.111\', \'Float64\') +[-1,2,-3] +SELECT CAST(\'[-1, 2, -3]\', \'Array(Int32)\') +[-1.1,2,-3] +SELECT CAST(\'[-1.1, 2, -3]\', \'Array(Float64)\') diff --git a/tests/queries/0_stateless/01852_cast_operator_3.sql b/tests/queries/0_stateless/01852_cast_operator_3.sql new file mode 100644 index 000000000000..1ad015a8dc49 --- /dev/null +++ b/tests/queries/0_stateless/01852_cast_operator_3.sql @@ -0,0 +1,14 @@ +SELECT -1::Int32; +EXPLAIN SYNTAX SELECT -1::Int32; + +SELECT -0.1::Decimal(38, 38); +EXPLAIN SYNTAX SELECT -0.1::Decimal(38, 38); + +SELECT -0.111::Float64; +EXPLAIN SYNTAX SELECT -0.111::Float64; + +SELECT [-1, 2, -3]::Array(Int32); +EXPLAIN SYNTAX SELECT [-1, 2, -3]::Array(Int32); + +SELECT [-1.1, 2, -3]::Array(Float64); +EXPLAIN SYNTAX SELECT [-1.1, 2, -3]::Array(Float64); diff --git a/tests/queries/0_stateless/01852_cast_operator_bad_cases.reference b/tests/queries/0_stateless/01852_cast_operator_bad_cases.reference index 2c4517e0eda5..b179e5e927ad 100644 --- a/tests/queries/0_stateless/01852_cast_operator_bad_cases.reference +++ b/tests/queries/0_stateless/01852_cast_operator_bad_cases.reference @@ -8,3 +8,11 @@ Syntax error Syntax error Syntax error Code: 6 +Syntax error +Syntax error +Syntax error +Syntax error +Syntax error +Syntax error +Syntax error +Syntax error diff --git a/tests/queries/0_stateless/01852_cast_operator_bad_cases.sh b/tests/queries/0_stateless/01852_cast_operator_bad_cases.sh index f2f566b78c44..6c578a0996c1 100755 --- a/tests/queries/0_stateless/01852_cast_operator_bad_cases.sh +++ b/tests/queries/0_stateless/01852_cast_operator_bad_cases.sh @@ -15,3 +15,13 @@ $CLICKHOUSE_CLIENT --query="SELECT [1 2]::Array(UInt8)" 2>&1 | grep -o -m1 'Syn $CLICKHOUSE_CLIENT --query="SELECT 1 4::UInt32" 2>&1 | grep -o 'Syntax error' $CLICKHOUSE_CLIENT --query="SELECT '1' '4'::UInt32" 2>&1 | grep -o -m1 'Syntax error' $CLICKHOUSE_CLIENT --query="SELECT '1''4'::UInt32" 2>&1 | grep -o -m1 'Code: 6' + +$CLICKHOUSE_CLIENT --query="SELECT ::UInt32" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT ::String" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT -::Int32" 2>&1 | grep -o 'Syntax error' + +$CLICKHOUSE_CLIENT --query="SELECT [1, -]::Array(Int32)" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT [1, 3-]::Array(Int32)" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT [-, 2]::Array(Int32)" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT [--, 2]::Array(Int32)" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT [1, 2]-::Array(Int32)" 2>&1 | grep -o 'Syntax error' From 8f73038a7ffaa9d7f2f39186a766f201005bda46 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 21 Aug 2021 15:03:33 +0300 Subject: [PATCH 007/472] =?UTF-8?q?Backport=20#27927=20to=2021.9:=20=D0=A1?= =?UTF-8?q?heck=20cluster=20name=20before=20creating=20Distributed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Interpreters/Context.cpp | 4 ++-- src/Storages/StorageDistributed.cpp | 20 ++++++++++--------- src/Storages/StorageDistributed.h | 3 ++- .../00987_distributed_stack_overflow.sql | 4 +--- .../01763_max_distributed_depth.sql | 4 +++- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 348ca84ee6fd..b8537dce8226 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1824,8 +1824,8 @@ std::shared_ptr Context::getCluster(const std::string & cluster_name) c auto res = getClusters()->getCluster(cluster_name); if (res) return res; - - res = tryGetReplicatedDatabaseCluster(cluster_name); + if (!cluster_name.empty()) + res = tryGetReplicatedDatabaseCluster(cluster_name); if (res) return res; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index fcd0e255e5c9..df7d568deb95 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -327,11 +327,13 @@ StorageDistributed::StorageDistributed( const String & relative_data_path_, const DistributedSettings & distributed_settings_, bool attach_, - ClusterPtr owned_cluster_) + ClusterPtr owned_cluster_, + ASTPtr remote_table_function_ptr_) : IStorage(id_) , WithContext(context_->getGlobalContext()) , remote_database(remote_database_) , remote_table(remote_table_) + , remote_table_function_ptr(remote_table_function_ptr_) , log(&Poco::Logger::get("StorageDistributed (" + id_.table_name + ")")) , owned_cluster(std::move(owned_cluster_)) , cluster_name(getContext()->getMacros()->expand(cluster_name_)) @@ -363,10 +365,13 @@ StorageDistributed::StorageDistributed( } /// Sanity check. Skip check if the table is already created to allow the server to start. - if (!attach_ && !cluster_name.empty()) + if (!attach_) { - size_t num_local_shards = getContext()->getCluster(cluster_name)->getLocalShardCount(); - if (num_local_shards && remote_database == id_.database_name && remote_table == id_.table_name) + if (remote_database.empty() && !remote_table_function_ptr && !getCluster()->maybeCrossReplication()) + LOG_WARNING(log, "Name of remote database is empty. Default database will be used implicitly."); + + size_t num_local_shards = getCluster()->getLocalShardCount(); + if (num_local_shards && (remote_database.empty() || remote_database == id_.database_name) && remote_table == id_.table_name) throw Exception("Distributed table " + id_.table_name + " looks at itself", ErrorCodes::INFINITE_LOOP); } } @@ -399,9 +404,9 @@ StorageDistributed::StorageDistributed( relative_data_path_, distributed_settings_, attach, - std::move(owned_cluster_)) + std::move(owned_cluster_), + remote_table_function_ptr_) { - remote_table_function_ptr = std::move(remote_table_function_ptr_); } QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( @@ -810,9 +815,6 @@ void StorageDistributed::alter(const AlterCommands & params, ContextPtr local_co void StorageDistributed::startup() { - if (remote_database.empty() && !remote_table_function_ptr && !getCluster()->maybeCrossReplication()) - LOG_WARNING(log, "Name of remote database is empty. Default database will be used implicitly."); - if (!storage_policy) return; diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index b6a26467a3fd..b003f8c64867 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -136,7 +136,8 @@ class StorageDistributed final : public shared_ptr_helper, p const String & relative_data_path_, const DistributedSettings & distributed_settings_, bool attach_, - ClusterPtr owned_cluster_ = {}); + ClusterPtr owned_cluster_ = {}, + ASTPtr remote_table_function_ptr_ = {}); StorageDistributed( const StorageID & id_, diff --git a/tests/queries/0_stateless/00987_distributed_stack_overflow.sql b/tests/queries/0_stateless/00987_distributed_stack_overflow.sql index d2e2b8f37ef6..1ef7c5432526 100644 --- a/tests/queries/0_stateless/00987_distributed_stack_overflow.sql +++ b/tests/queries/0_stateless/00987_distributed_stack_overflow.sql @@ -4,8 +4,7 @@ DROP TABLE IF EXISTS distr2; CREATE TABLE distr (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr); -- { serverError 269 } -CREATE TABLE distr0 (x UInt8) ENGINE = Distributed(test_shard_localhost, '', distr0); -SELECT * FROM distr0; -- { serverError 581 } +CREATE TABLE distr0 (x UInt8) ENGINE = Distributed(test_shard_localhost, '', distr0); -- { serverError 269 } CREATE TABLE distr1 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr2); CREATE TABLE distr2 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr1); @@ -13,6 +12,5 @@ CREATE TABLE distr2 (x UInt8) ENGINE = Distributed(test_shard_localhost, current SELECT * FROM distr1; -- { serverError 581 } SELECT * FROM distr2; -- { serverError 581 } -DROP TABLE distr0; DROP TABLE distr1; DROP TABLE distr2; diff --git a/tests/queries/0_stateless/01763_max_distributed_depth.sql b/tests/queries/0_stateless/01763_max_distributed_depth.sql index d1bb9e4be90d..89909a3bd8dd 100644 --- a/tests/queries/0_stateless/01763_max_distributed_depth.sql +++ b/tests/queries/0_stateless/01763_max_distributed_depth.sql @@ -9,7 +9,9 @@ CREATE TABLE tt6 `status` String ) -ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); +ENGINE = Distributed('test_shard_localhost', '', 'tt7', rand()); + +CREATE TABLE tt7 as tt6 ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError 581 } From 3f8b69d3a9785876a9b6145d5c11add354a35bc6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 21 Aug 2021 15:04:49 +0300 Subject: [PATCH 008/472] Backport #27918 to 21.9: Fix selecting with extremes from LowCardinality(UUID) --- src/Columns/ColumnUnique.h | 2 +- ...02012_low_cardinality_uuid_with_extremes.reference | 4 ++++ .../02012_low_cardinality_uuid_with_extremes.sql | 11 +++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02012_low_cardinality_uuid_with_extremes.reference create mode 100644 tests/queries/0_stateless/02012_low_cardinality_uuid_with_extremes.sql diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 8ca4baff7c74..bfa80b5e3b21 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -304,7 +304,7 @@ size_t ColumnUnique::uniqueInsert(const Field & x) if (x.getType() == Field::Types::Null) return getNullValueIndex(); - if (isNumeric()) + if (valuesHaveFixedSize()) return uniqueInsertData(&x.reinterpret(), size_of_value_if_fixed); auto & val = x.get(); diff --git a/tests/queries/0_stateless/02012_low_cardinality_uuid_with_extremes.reference b/tests/queries/0_stateless/02012_low_cardinality_uuid_with_extremes.reference new file mode 100644 index 000000000000..af2447df8072 --- /dev/null +++ b/tests/queries/0_stateless/02012_low_cardinality_uuid_with_extremes.reference @@ -0,0 +1,4 @@ +0562380c-d1f3-4091-83d5-8c972f534317 + +0562380c-d1f3-4091-83d5-8c972f534317 +0562380c-d1f3-4091-83d5-8c972f534317 diff --git a/tests/queries/0_stateless/02012_low_cardinality_uuid_with_extremes.sql b/tests/queries/0_stateless/02012_low_cardinality_uuid_with_extremes.sql new file mode 100644 index 000000000000..191383cc9787 --- /dev/null +++ b/tests/queries/0_stateless/02012_low_cardinality_uuid_with_extremes.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS tbl; + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE tbl (`lc` LowCardinality(UUID)) ENGINE = Memory; + +INSERT INTO tbl VALUES ('0562380c-d1f3-4091-83d5-8c972f534317'); + +SET extremes = 1; +SELECT * FROM tbl; + +DROP TABLE tbl; From 80f7bfd69db67cac65356f34b6e3933f7b599eab Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 22 Aug 2021 03:03:24 +0300 Subject: [PATCH 009/472] Backport #27939 to 21.9: Fix JSONExtract String with null value. --- src/Functions/FunctionsJSON.h | 2 ++ .../queries/0_stateless/01915_json_extract_raw_string.reference | 1 + tests/queries/0_stateless/01915_json_extract_raw_string.sql | 2 ++ 3 files changed, 5 insertions(+) diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 4097e341fbbe..df17a39812bc 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -696,6 +696,8 @@ struct JSONExtractTree { if (element.isString()) return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); + else if (element.isNull()) + return false; else return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); } diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.reference b/tests/queries/0_stateless/01915_json_extract_raw_string.reference index 839cb33f5f22..3a41f35710c5 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.reference +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.reference @@ -1 +1,2 @@ ('123','456','[7,8,9]') +\N diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.sql b/tests/queries/0_stateless/01915_json_extract_raw_string.sql index 6ba94ac6dfd9..4c5be79f6ef5 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.sql +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.sql @@ -1 +1,3 @@ select JSONExtract('{"a": "123", "b": 456, "c": [7, 8, 9]}', 'Tuple(a String, b String, c String)'); + +with '{"string_value":null}' as json select JSONExtract(json, 'string_value', 'Nullable(String)'); From 0fdeb8b3c02477e2df3a9abc04405a93e3337ce2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 22 Aug 2021 05:06:07 +0300 Subject: [PATCH 010/472] Backport #27563 to 21.9: Bugfix for windowFunnel's \"strict\" mode. --- .../parametric-functions.md | 2 +- .../AggregateFunctionWindowFunnel.h | 24 ++++++++++--------- .../00632_aggregation_window_funnel.reference | 1 + .../00632_aggregation_window_funnel.sql | 11 +++++++-- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index bdf115acb34d..6c24ffdba57b 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -255,7 +255,7 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN) - `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`. - `mode` — It is an optional argument. One or more modes can be set. - - `'strict'` — If same condition holds for sequence of events then such non-unique events would be skipped. + - `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing. - `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2. - `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps. diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index 591596461f89..982cd1b4e325 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -137,8 +137,8 @@ class AggregateFunctionWindowFunnel final private: UInt64 window; UInt8 events_size; - /// When the 'strict' is set, it applies conditions only for the not repeating values. - bool strict; + /// When the 'strict_deduplication' is set, it applies conditions only for the not repeating values. + bool strict_deduplication; /// When the 'strict_order' is set, it doesn't allow interventions of other events. /// In the case of 'A->B->D->C', it stops finding 'A->B->C' at the 'D' and the max event level is 2. @@ -150,7 +150,7 @@ class AggregateFunctionWindowFunnel final /// Loop through the entire events_list, update the event timestamp value /// The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window. /// If found, returns the max event level, else return 0. - /// The Algorithm complexity is O(n). + /// The algorithm works in O(n) time, but the overall function works in O(n * log(n)) due to sorting. UInt8 getEventLevel(Data & data) const { if (data.size() == 0) @@ -163,10 +163,10 @@ class AggregateFunctionWindowFunnel final /// events_timestamp stores the timestamp of the first and previous i-th level event happen within time window std::vector>> events_timestamp(events_size); bool first_event = false; - for (const auto & pair : data.events_list) + for (size_t i = 0; i < data.events_list.size(); ++i) { - const T & timestamp = pair.first; - const auto & event_idx = pair.second - 1; + const T & timestamp = data.events_list[i].first; + const auto & event_idx = data.events_list[i].second - 1; if (strict_order && event_idx == -1) { if (first_event) @@ -179,9 +179,9 @@ class AggregateFunctionWindowFunnel final events_timestamp[0] = std::make_pair(timestamp, timestamp); first_event = true; } - else if (strict && events_timestamp[event_idx].has_value()) + else if (strict_deduplication && events_timestamp[event_idx].has_value()) { - return event_idx + 1; + return data.events_list[i - 1].second; } else if (strict_order && first_event && !events_timestamp[event_idx - 1].has_value()) { @@ -226,18 +226,20 @@ class AggregateFunctionWindowFunnel final events_size = arguments.size() - 1; window = params.at(0).safeGet(); - strict = false; + strict_deduplication = false; strict_order = false; strict_increase = false; for (size_t i = 1; i < params.size(); ++i) { String option = params.at(i).safeGet(); - if (option == "strict") - strict = true; + if (option == "strict_deduplication") + strict_deduplication = true; else if (option == "strict_order") strict_order = true; else if (option == "strict_increase") strict_increase = true; + else if (option == "strict") + throw Exception{"strict is replaced with strict_deduplication in Aggregate function " + getName(), ErrorCodes::BAD_ARGUMENTS}; else throw Exception{"Aggregate function " + getName() + " doesn't support a parameter: " + option, ErrorCodes::BAD_ARGUMENTS}; } diff --git a/tests/queries/0_stateless/00632_aggregation_window_funnel.reference b/tests/queries/0_stateless/00632_aggregation_window_funnel.reference index 2c68f277bfa0..d586e5a4b679 100644 --- a/tests/queries/0_stateless/00632_aggregation_window_funnel.reference +++ b/tests/queries/0_stateless/00632_aggregation_window_funnel.reference @@ -37,6 +37,7 @@ [5, 2] [6, 1] [7, 1] +[1] [1, 2] [2, 2] [3, 0] diff --git a/tests/queries/0_stateless/00632_aggregation_window_funnel.sql b/tests/queries/0_stateless/00632_aggregation_window_funnel.sql index aa0dc8042389..e548aa4d81d1 100644 --- a/tests/queries/0_stateless/00632_aggregation_window_funnel.sql +++ b/tests/queries/0_stateless/00632_aggregation_window_funnel.sql @@ -43,7 +43,7 @@ drop table if exists funnel_test_strict; create table funnel_test_strict (timestamp UInt32, event UInt32) engine=Memory; insert into funnel_test_strict values (00,1000),(10,1001),(20,1002),(30,1003),(40,1004),(50,1005),(51,1005),(60,1006),(70,1007),(80,1008); -select 6 = windowFunnel(10000, 'strict')(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004, event = 1005, event = 1006) from funnel_test_strict; +select 6 = windowFunnel(10000, 'strict_deduplication')(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004, event = 1005, event = 1006) from funnel_test_strict; select 7 = windowFunnel(10000)(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004, event = 1005, event = 1006) from funnel_test_strict; @@ -62,11 +62,18 @@ insert into funnel_test_strict_order values (1, 5, 'a') (2, 5, 'a') (3, 5, 'b') insert into funnel_test_strict_order values (1, 6, 'c') (2, 6, 'c') (3, 6, 'b') (4, 6, 'b') (5, 6, 'a') (6, 6, 'a'); select user, windowFunnel(86400)(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow; select user, windowFunnel(86400, 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow; -select user, windowFunnel(86400, 'strict', 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow; +select user, windowFunnel(86400, 'strict_deduplication', 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow; insert into funnel_test_strict_order values (1, 7, 'a') (2, 7, 'c') (3, 7, 'b'); select user, windowFunnel(10, 'strict_order')(dt, event = 'a', event = 'b', event = 'c') as s from funnel_test_strict_order where user = 7 group by user format JSONCompactEachRow; drop table funnel_test_strict_order; +--https://github.com/ClickHouse/ClickHouse/issues/27469 +drop table if exists strict_BiteTheDDDD; +create table strict_BiteTheDDDD (ts UInt64, event String) engine = Log(); +insert into strict_BiteTheDDDD values (1,'a') (2,'b') (3,'c') (4,'b') (5,'d'); +select 3 = windowFunnel(86400, 'strict_deduplication')(ts, event='a', event='b', event='c', event='d') from strict_BiteTheDDDD format JSONCompactEachRow; +drop table strict_BiteTheDDDD; + drop table if exists funnel_test_non_null; create table funnel_test_non_null (`dt` DateTime, `u` int, `a` Nullable(String), `b` Nullable(String)) engine = MergeTree() partition by dt order by u; insert into funnel_test_non_null values (1, 1, 'a1', 'b1') (2, 1, 'a2', 'b2'); From aa4d69e1edc7232211fcb5541500cd8bf41701da Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 23 Aug 2021 01:09:04 +0300 Subject: [PATCH 011/472] Backport #27983 to 21.9: Fix checking access rights when executing GRANT WITH REPLACE OPTION on cluster --- src/Interpreters/InterpreterGrantQuery.cpp | 129 +++++++++++---------- 1 file changed, 70 insertions(+), 59 deletions(-) diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index 2e7b3a58012d..506ab8a3387a 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -170,15 +170,18 @@ namespace auto entity = access_control.tryRead(id); if (auto role = typeid_cast(entity)) { - checkGranteeIsAllowed(current_user_access, id, *role); + if (need_check_grantees_are_allowed) + checkGranteeIsAllowed(current_user_access, id, *role); all_granted_access.makeUnion(role->access); } else if (auto user = typeid_cast(entity)) { - checkGranteeIsAllowed(current_user_access, id, *user); + if (need_check_grantees_are_allowed) + checkGranteeIsAllowed(current_user_access, id, *user); all_granted_access.makeUnion(user->access); } } + need_check_grantees_are_allowed = false; /// already checked if (!elements_to_revoke.empty() && elements_to_revoke[0].is_partial_revoke) @@ -200,28 +203,6 @@ namespace current_user_access.checkGrantOption(elements_to_revoke); } - /// Checks if the current user has enough access rights granted with grant option to grant or revoke specified access rights. - /// Also checks if grantees are allowed for the current user. - void checkGrantOptionAndGrantees( - const AccessControlManager & access_control, - const ContextAccess & current_user_access, - const std::vector & grantees_from_query, - const AccessRightsElements & elements_to_grant, - AccessRightsElements & elements_to_revoke) - { - bool need_check_grantees_are_allowed = true; - checkGrantOption( - access_control, - current_user_access, - grantees_from_query, - need_check_grantees_are_allowed, - elements_to_grant, - elements_to_revoke); - - if (need_check_grantees_are_allowed) - checkGranteesAreAllowed(access_control, current_user_access, grantees_from_query); - } - /// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles. void checkAdminOption( const AccessControlManager & access_control, @@ -262,18 +243,21 @@ namespace auto entity = access_control.tryRead(id); if (auto role = typeid_cast(entity)) { - checkGranteeIsAllowed(current_user_access, id, *role); + if (need_check_grantees_are_allowed) + checkGranteeIsAllowed(current_user_access, id, *role); all_granted_roles.makeUnion(role->granted_roles); } else if (auto user = typeid_cast(entity)) { - checkGranteeIsAllowed(current_user_access, id, *user); + if (need_check_grantees_are_allowed) + checkGranteeIsAllowed(current_user_access, id, *user); all_granted_roles.makeUnion(user->granted_roles); } } - const auto & all_granted_roles_set = admin_option ? all_granted_roles.getGrantedWithAdminOption() : all_granted_roles.getGranted(); + need_check_grantees_are_allowed = false; /// already checked + const auto & all_granted_roles_set = admin_option ? all_granted_roles.getGrantedWithAdminOption() : all_granted_roles.getGranted(); if (roles_to_revoke.all) boost::range::set_difference(all_granted_roles_set, roles_to_revoke.except_ids, std::back_inserter(roles_to_revoke_ids)); else @@ -283,28 +267,45 @@ namespace current_user_access.checkAdminOption(roles_to_revoke_ids); } - /// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles. - /// Also checks if grantees are allowed for the current user. - void checkAdminOptionAndGrantees( - const AccessControlManager & access_control, - const ContextAccess & current_user_access, - const std::vector & grantees_from_query, - const std::vector & roles_to_grant, - RolesOrUsersSet & roles_to_revoke, - bool admin_option) + /// Returns access rights which should be checked for executing GRANT/REVOKE on cluster. + /// This function is less accurate than checkGrantOption() because it cannot use any information about + /// access rights the grantees currently have (due to those grantees are located on multiple nodes, + /// we just don't have the full information about them). + AccessRightsElements getRequiredAccessForExecutingOnCluster(const AccessRightsElements & elements_to_grant, const AccessRightsElements & elements_to_revoke) { - bool need_check_grantees_are_allowed = true; - checkAdminOption( - access_control, - current_user_access, - grantees_from_query, - need_check_grantees_are_allowed, - roles_to_grant, - roles_to_revoke, - admin_option); - - if (need_check_grantees_are_allowed) - checkGranteesAreAllowed(access_control, current_user_access, grantees_from_query); + auto required_access = elements_to_grant; + required_access.insert(required_access.end(), elements_to_revoke.begin(), elements_to_revoke.end()); + std::for_each(required_access.begin(), required_access.end(), [&](AccessRightsElement & element) { element.grant_option = true; }); + return required_access; + } + + /// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles on cluster. + /// This function is less accurate than checkAdminOption() because it cannot use any information about + /// granted roles the grantees currently have (due to those grantees are located on multiple nodes, + /// we just don't have the full information about them). + void checkAdminOptionForExecutingOnCluster(const ContextAccess & current_user_access, + const std::vector roles_to_grant, + const RolesOrUsersSet & roles_to_revoke) + { + if (roles_to_revoke.all) + { + /// Revoking all the roles on cluster always requires ROLE_ADMIN privilege + /// because when we send the query REVOKE ALL to each shard we don't know at this point + /// which roles exactly this is going to revoke on each shard. + /// However ROLE_ADMIN just allows to revoke every role, that's why we check it here. + current_user_access.checkAccess(AccessType::ROLE_ADMIN); + return; + } + + if (current_user_access.isGranted(AccessType::ROLE_ADMIN)) + return; + + for (const auto & role_id : roles_to_grant) + current_user_access.checkAdminOption(role_id); + + + for (const auto & role_id : roles_to_revoke.getMatchingIDs()) + current_user_access.checkAdminOption(role_id); } template @@ -382,29 +383,39 @@ BlockIO InterpreterGrantQuery::execute() throw Exception("A partial revoke should be revoked, not granted", ErrorCodes::LOGICAL_ERROR); auto & access_control = getContext()->getAccessControlManager(); + auto current_user_access = getContext()->getAccess(); + std::vector grantees = RolesOrUsersSet{*query.grantees, access_control, getContext()->getUserID()}.getMatchingIDs(access_control); - /// Check if the current user has corresponding roles granted with admin option. + /// Collect access rights and roles we're going to grant or revoke. + AccessRightsElements elements_to_grant, elements_to_revoke; + collectAccessRightsElementsToGrantOrRevoke(query, elements_to_grant, elements_to_revoke); + std::vector roles_to_grant; RolesOrUsersSet roles_to_revoke; collectRolesToGrantOrRevoke(access_control, query, roles_to_grant, roles_to_revoke); - checkAdminOptionAndGrantees(access_control, *getContext()->getAccess(), grantees, roles_to_grant, roles_to_revoke, query.admin_option); + /// Executing on cluster. if (!query.cluster.empty()) { - /// To execute the command GRANT the current user needs to have the access granted with GRANT OPTION. - auto required_access = query.access_rights_elements; - std::for_each(required_access.begin(), required_access.end(), [&](AccessRightsElement & element) { element.grant_option = true; }); - checkGranteesAreAllowed(access_control, *getContext()->getAccess(), grantees); + auto required_access = getRequiredAccessForExecutingOnCluster(elements_to_grant, elements_to_revoke); + checkAdminOptionForExecutingOnCluster(*current_user_access, roles_to_grant, roles_to_revoke); + checkGranteesAreAllowed(access_control, *current_user_access, grantees); return executeDDLQueryOnCluster(query_ptr, getContext(), std::move(required_access)); } - query.replaceEmptyDatabase(getContext()->getCurrentDatabase()); + /// Check if the current user has corresponding access rights granted with grant option. + String current_database = getContext()->getCurrentDatabase(); + elements_to_grant.replaceEmptyDatabase(current_database); + elements_to_revoke.replaceEmptyDatabase(current_database); + bool need_check_grantees_are_allowed = true; + checkGrantOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, elements_to_grant, elements_to_revoke); - /// Check if the current user has corresponding access rights with grant option. - AccessRightsElements elements_to_grant, elements_to_revoke; - collectAccessRightsElementsToGrantOrRevoke(query, elements_to_grant, elements_to_revoke); - checkGrantOptionAndGrantees(access_control, *getContext()->getAccess(), grantees, elements_to_grant, elements_to_revoke); + /// Check if the current user has corresponding roles granted with admin option. + checkAdminOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, roles_to_grant, roles_to_revoke, query.admin_option); + + if (need_check_grantees_are_allowed) + checkGranteesAreAllowed(access_control, *current_user_access, grantees); /// Update roles and users listed in `grantees`. auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr From bcbe93af0d3804b696977af6a6f3a2538c0736a4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 23 Aug 2021 17:17:21 +0300 Subject: [PATCH 012/472] Backport #28016 to 21.9: Use ru.archive.ubuntu.com as default one is not responding from CI --- docker/builder/Dockerfile | 2 ++ docker/client/Dockerfile | 2 ++ docker/packager/binary/Dockerfile | 2 ++ docker/packager/deb/Dockerfile | 2 ++ docker/packager/unbundled/Dockerfile | 2 ++ docker/server/Dockerfile | 2 ++ docker/test/base/Dockerfile | 2 ++ docker/test/codebrowser/Dockerfile | 2 ++ docker/test/fasttest/Dockerfile | 2 ++ docker/test/fuzzer/Dockerfile | 2 ++ docker/test/performance-comparison/Dockerfile | 2 ++ docker/test/sqlancer/Dockerfile | 2 ++ docker/test/style/Dockerfile | 2 ++ docker/test/testflows/runner/Dockerfile | 2 ++ 14 files changed, 28 insertions(+) diff --git a/docker/builder/Dockerfile b/docker/builder/Dockerfile index 199b5217d795..abe102e9c809 100644 --- a/docker/builder/Dockerfile +++ b/docker/builder/Dockerfile @@ -2,6 +2,8 @@ FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ --yes --no-install-recommends --verbose-versions \ diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index f17fa8ade166..2391256ec6ac 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -3,6 +3,8 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG version=21.9.1.* +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && apt-get install --yes --no-install-recommends \ apt-transport-https \ diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 29225bbfeb8b..0393669df48d 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -3,6 +3,8 @@ FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && apt-get install \ apt-transport-https \ diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 241b691cd235..294c86454554 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -3,6 +3,8 @@ FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ --yes --no-install-recommends --verbose-versions \ diff --git a/docker/packager/unbundled/Dockerfile b/docker/packager/unbundled/Dockerfile index 07031aa2d1bd..b2d9f555f193 100644 --- a/docker/packager/unbundled/Dockerfile +++ b/docker/packager/unbundled/Dockerfile @@ -5,6 +5,8 @@ RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ && wget -nv -O /tmp/arrow-keyring.deb "https://apache.jfrog.io/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-${CODENAME}.deb" \ && dpkg -i /tmp/arrow-keyring.deb +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + # Libraries from OS are only needed to test the "unbundled" build (that is not used in production). RUN apt-get update \ && apt-get install \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 5da9e703f4d2..474ebaaee88d 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -26,6 +26,8 @@ ARG DEBIAN_FRONTEND=noninteractive # installed to prevent picking those uid / gid by some unrelated software. # The same uid / gid (101) is used both for alpine and ubuntu. +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN groupadd -r clickhouse --gid=101 \ && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \ && apt-get update \ diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 29ac7a925b87..611ef6b7702f 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -3,6 +3,8 @@ FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ --yes --no-install-recommends --verbose-versions \ diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index bb35258bed89..33173ab90f9d 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -2,6 +2,8 @@ # docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output yandex/clickhouse-codebrowser FROM yandex/clickhouse-binary-builder +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev # repo versions doesn't work correctly with C++17 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 916c94e73119..2e0bbcd350f4 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -3,6 +3,8 @@ FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ --yes --no-install-recommends --verbose-versions \ diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index 57daba9cfd60..18684145636b 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -5,6 +5,8 @@ ENV LANG=C.UTF-8 ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ ca-certificates \ diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 5ec048de6571..1a61c4b274ae 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -5,6 +5,8 @@ ENV LANG=C.UTF-8 ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ bash \ diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile index 253ca1b729ad..672364023525 100644 --- a/docker/test/sqlancer/Dockerfile +++ b/docker/test/sqlancer/Dockerfile @@ -1,6 +1,8 @@ # docker build -t yandex/clickhouse-sqlancer-test . FROM ubuntu:20.04 +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip RUN mkdir /sqlancer && \ diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 86595a77a54f..c0b3b0102cfe 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,6 +1,8 @@ # docker build -t yandex/clickhouse-style-test . FROM ubuntu:20.04 +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ shellcheck \ libxml2-utils \ diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index 264b98c669de..f170adf10471 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -1,6 +1,8 @@ # docker build -t yandex/clickhouse-testflows-runner . FROM ubuntu:20.04 +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ ca-certificates \ From 60c8d4b30cb002013fd87e74adbf701731d74846 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 24 Aug 2021 01:17:50 +0300 Subject: [PATCH 013/472] Backport #28036 to 21.9: Follow-up to #28016 --- docker/test/integration/runner/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 6bde4ef60db1..4130fc101786 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -1,6 +1,8 @@ # docker build -t yandex/clickhouse-integration-tests-runner . FROM ubuntu:20.04 +RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list + RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ ca-certificates \ From 02cf32ef29e64ec2fec30b82714eca3454cf61e1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 24 Aug 2021 13:19:33 +0300 Subject: [PATCH 014/472] Backport #28027 to 21.9: Fix throw without exception in MySQL source. --- src/Formats/MySQLSource.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Formats/MySQLSource.cpp b/src/Formats/MySQLSource.cpp index 2d305a29df6e..e6cb9f5ff11e 100644 --- a/src/Formats/MySQLSource.cpp +++ b/src/Formats/MySQLSource.cpp @@ -100,12 +100,12 @@ void MySQLWithFailoverSource::onStart() catch (const mysqlxx::ConnectionLost & ecl) /// There are two retriable failures: CR_SERVER_GONE_ERROR, CR_SERVER_LOST { LOG_WARNING(log, "Failed connection ({}/{}). Trying to reconnect... (Info: {})", count_connect_attempts, settings->default_num_tries_on_connection_loss, ecl.displayText()); - } - if (++count_connect_attempts > settings->default_num_tries_on_connection_loss) - { - LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, settings->default_num_tries_on_connection_loss); - throw; + if (++count_connect_attempts > settings->default_num_tries_on_connection_loss) + { + LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, settings->default_num_tries_on_connection_loss); + throw; + } } } From 93ba4424b75ac57011d95b07fe611d4dce4f0c2e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 24 Aug 2021 21:22:42 +0300 Subject: [PATCH 015/472] Backport #28035 to 21.9: Fix race between REPLACE PARTITION and MOVE PARTITION --- src/Storages/StorageReplicatedMergeTree.cpp | 74 ++++++++++++--------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 98ce2ac73e13..67a1ba7fc786 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6370,6 +6370,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); String partition_id = getPartitionIDFromQuery(partition, query_context); + /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. DataPartsVector src_all_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); DataPartsVector src_parts; MutableDataPartsVector dst_parts; @@ -6497,8 +6498,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( delimiting_block_lock->getUnlockOps(ops); /// Check and update version to avoid race with DROP_RANGE - ops.emplace_back(zkutil::makeCheckRequest(alter_partition_version_path, alter_partition_version_stat.version)); - ops.emplace_back(zkutil::makeSetRequest(alter_partition_version_path, "", -1)); + ops.emplace_back(zkutil::makeSetRequest(alter_partition_version_path, "", alter_partition_version_stat.version)); /// Just update version, because merges assignment relies on it ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "log", "", -1)); ops.emplace_back(zkutil::makeCreateRequest(fs::path(zookeeper_path) / "log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); @@ -6580,7 +6580,39 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta auto src_data_id = src_data.getStorageID(); String partition_id = getPartitionIDFromQuery(partition, query_context); - DataPartsVector src_all_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); + /// A range for log entry to remove parts from the source table (myself). + auto zookeeper = getZooKeeper(); + String alter_partition_version_path = zookeeper_path + "/alter_partition_version"; + Coordination::Stat alter_partition_version_stat; + zookeeper->get(alter_partition_version_path, &alter_partition_version_stat); + + MergeTreePartInfo drop_range; + std::optional delimiting_block_lock; + getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); + String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); + + DataPartPtr covering_part; + DataPartsVector src_all_parts; + { + /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. + auto parts_lock = src_data.lockParts(); + src_all_parts = src_data.getActivePartsToReplace(drop_range, drop_range_fake_part_name, covering_part, parts_lock); + } + + if (covering_part) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got part {} covering drop range {}, it's a bug", + covering_part->name, drop_range_fake_part_name); + + /// After allocating block number for drop_range we must ensure that it does not intersect block numbers + /// allocated by concurrent REPLACE query. + /// We could check it in multi-request atomically with creation of DROP_RANGE entry in source table log, + /// but it's better to check it here and fail as early as possible (before we have done something to destination table). + Coordination::Error version_check_code = zookeeper->trySet(alter_partition_version_path, "", alter_partition_version_stat.version); + if (version_check_code != Coordination::Error::ZOK) + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, "Cannot DROP PARTITION in {} after copying partition to {}, " + "because another ALTER PARTITION query was concurrently executed", + getStorageID().getFullTableName(), dest_table_storage->getStorageID().getFullTableName()); + DataPartsVector src_parts; MutableDataPartsVector dst_parts; Strings block_id_paths; @@ -6590,21 +6622,11 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); static const String TMP_PREFIX = "tmp_move_from_"; - auto zookeeper = getZooKeeper(); - - /// A range for log entry to remove parts from the source table (myself). - - MergeTreePartInfo drop_range; - std::optional delimiting_block_lock; - getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); - String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); /// Clone parts into destination table. - - String alter_partition_version_path = dest_table_storage->zookeeper_path + "/alter_partition_version"; - Coordination::Stat alter_partition_version_stat; - zookeeper->get(alter_partition_version_path, &alter_partition_version_stat); - + String dest_alter_partition_version_path = dest_table_storage->zookeeper_path + "/alter_partition_version"; + Coordination::Stat dest_alter_partition_version_stat; + zookeeper->get(dest_alter_partition_version_path, &dest_alter_partition_version_stat); for (const auto & src_part : src_all_parts) { if (!dest_table_storage->canReplacePartition(src_part)) @@ -6685,8 +6707,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta } /// Check and update version to avoid race with DROP_RANGE - ops.emplace_back(zkutil::makeCheckRequest(alter_partition_version_path, alter_partition_version_stat.version)); - ops.emplace_back(zkutil::makeSetRequest(alter_partition_version_path, "", -1)); + ops.emplace_back(zkutil::makeSetRequest(dest_alter_partition_version_path, "", dest_alter_partition_version_stat.version)); /// Just update version, because merges assignment relies on it ops.emplace_back(zkutil::makeSetRequest(fs::path(dest_table_storage->zookeeper_path) / "log", "", -1)); ops.emplace_back(zkutil::makeCreateRequest(fs::path(dest_table_storage->zookeeper_path) / "log/log-", @@ -6740,26 +6761,14 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta } /// Create DROP_RANGE for the source table - alter_partition_version_path = zookeeper_path + "/alter_partition_version"; - zookeeper->get(alter_partition_version_path, &alter_partition_version_stat); - Coordination::Requests ops_src; ops_src.emplace_back(zkutil::makeCreateRequest( fs::path(zookeeper_path) / "log/log-", entry_delete.toString(), zkutil::CreateMode::PersistentSequential)); - /// Check and update version to avoid race with REPLACE_RANGE - ops_src.emplace_back(zkutil::makeCheckRequest(alter_partition_version_path, alter_partition_version_stat.version)); - ops_src.emplace_back(zkutil::makeSetRequest(alter_partition_version_path, "", -1)); /// Just update version, because merges assignment relies on it ops_src.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "log", "", -1)); delimiting_block_lock->getUnlockOps(ops_src); - Coordination::Error code = zookeeper->tryMulti(ops_src, op_results); - if (code == Coordination::Error::ZBADVERSION) - throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, "Cannot DROP PARTITION in {} after copying partition to {}, " - "because another ALTER PARTITION query was concurrently executed", - getStorageID().getFullTableName(), dest_table_storage->getStorageID().getFullTableName()); - else - zkutil::KeeperMultiException::check(code, ops_src, op_results); + op_results = zookeeper->multi(ops_src); log_znode_path = dynamic_cast(*op_results.front()).path_created; entry_delete.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); @@ -7131,8 +7140,7 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition( /// Check and update version to avoid race with REPLACE_RANGE. /// Otherwise new parts covered by drop_range_info may appear after execution of current DROP_RANGE entry /// as a result of execution of concurrently created REPLACE_RANGE entry. - ops.emplace_back(zkutil::makeCheckRequest(alter_partition_version_path, alter_partition_version_stat.version)); - ops.emplace_back(zkutil::makeSetRequest(alter_partition_version_path, "", -1)); + ops.emplace_back(zkutil::makeSetRequest(alter_partition_version_path, "", alter_partition_version_stat.version)); /// Just update version, because merges assignment relies on it ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "log", "", -1)); From d01111d7ee74f174dad294efaaf0efd9b4e84081 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 25 Aug 2021 07:24:32 +0300 Subject: [PATCH 016/472] Backport #28088 to 21.9: Destroy `main_config_reloader` before shared context. --- programs/server/Server.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4d68a8be4e4a..5ae0e905b53c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1076,6 +1076,9 @@ if (ThreadFuzzer::instance().isEffective()) /// Wait server pool to avoid use-after-free of destroyed context in the handlers server_pool.joinAll(); + // Uses a raw pointer to global context for getting ZooKeeper. + main_config_reloader.reset(); + /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. * At this moment, no one could own shared part of Context. */ @@ -1507,7 +1510,6 @@ if (ThreadFuzzer::instance().isEffective()) LOG_INFO(log, "Closed connections."); dns_cache_updater.reset(); - main_config_reloader.reset(); if (current_connections) { From a048c09ad47e873e49e68b84111787aede7dec80 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 25 Aug 2021 21:35:37 +0300 Subject: [PATCH 017/472] Backport #27858 to 21.9: Use Multipart copy upload for large S3 objects --- base/common/unit.h | 10 +++ src/Disks/S3/DiskS3.cpp | 149 ++++++++++++++++++++++++++++++++++------ src/Disks/S3/DiskS3.h | 11 ++- 3 files changed, 149 insertions(+), 21 deletions(-) create mode 100644 base/common/unit.h diff --git a/base/common/unit.h b/base/common/unit.h new file mode 100644 index 000000000000..d5c8d5c90271 --- /dev/null +++ b/base/common/unit.h @@ -0,0 +1,10 @@ +#pragma once +#include + +constexpr size_t KiB = 1024; +constexpr size_t MiB = 1024 * KiB; +constexpr size_t GiB = 1024 * MiB; + +constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; } +constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; } +constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; } diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 6dd29165566e..7378a08d1ea9 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -6,25 +6,37 @@ #include #include #include -#include -#include -#include + +#include + +#include + +#include +#include +#include +#include + #include #include + +#include + +#include +#include #include #include #include #include -#include -#include -#include -#include -#include + #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE namespace DB @@ -388,16 +400,7 @@ void DiskS3::saveSchemaVersion(const int & version) void DiskS3::updateObjectMetadata(const String & key, const ObjectMetadata & metadata) { - auto settings = current_settings.get(); - Aws::S3::Model::CopyObjectRequest request; - request.SetCopySource(bucket + "/" + key); - request.SetBucket(bucket); - request.SetKey(key); - request.SetMetadata(metadata); - request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE); - - auto outcome = settings->client->CopyObject(request); - throwIfError(outcome); + copyObjectImpl(bucket, key, bucket, key, std::nullopt, metadata); } void DiskS3::migrateFileToRestorableSchema(const String & path) @@ -553,18 +556,124 @@ void DiskS3::listObjects(const String & source_bucket, const String & source_pat } while (outcome.GetResult().GetIsTruncated()); } -void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key) const +void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head) const +{ + if (head && (head->GetContentLength() >= static_cast(5_GiB))) + copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head); + else + copyObjectImpl(src_bucket, src_key, dst_bucket, dst_key); +} + +void DiskS3::copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head, + std::optional> metadata) const { auto settings = current_settings.get(); Aws::S3::Model::CopyObjectRequest request; request.SetCopySource(src_bucket + "/" + src_key); request.SetBucket(dst_bucket); request.SetKey(dst_key); + if (metadata) + { + request.SetMetadata(*metadata); + request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE); + } auto outcome = settings->client->CopyObject(request); + + if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge") + { // Can't come here with MinIO, MinIO allows single part upload for large objects. + copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata); + return; + } + throwIfError(outcome); } +void DiskS3::copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head, + std::optional> metadata) const +{ + LOG_DEBUG(log, "Multipart copy upload has created. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, Metadata: {}", + src_bucket, src_key, dst_bucket, dst_key, metadata ? "REPLACE" : "NOT_SET"); + + auto settings = current_settings.get(); + + if (!head) + head = headObject(src_bucket, src_key); + + size_t size = head->GetContentLength(); + + String multipart_upload_id; + + { + Aws::S3::Model::CreateMultipartUploadRequest request; + request.SetBucket(dst_bucket); + request.SetKey(dst_key); + if (metadata) + request.SetMetadata(*metadata); + + auto outcome = settings->client->CreateMultipartUpload(request); + + throwIfError(outcome); + + multipart_upload_id = outcome.GetResult().GetUploadId(); + } + + std::vector part_tags; + + size_t upload_part_size = settings->s3_min_upload_part_size; + for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) + { + Aws::S3::Model::UploadPartCopyRequest part_request; + part_request.SetCopySource(src_bucket + "/" + src_key); + part_request.SetBucket(dst_bucket); + part_request.SetKey(dst_key); + part_request.SetUploadId(multipart_upload_id); + part_request.SetPartNumber(part_number); + part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1)); + + auto outcome = settings->client->UploadPartCopy(part_request); + if (!outcome.IsSuccess()) + { + Aws::S3::Model::AbortMultipartUploadRequest abort_request; + abort_request.SetBucket(dst_bucket); + abort_request.SetKey(dst_key); + abort_request.SetUploadId(multipart_upload_id); + settings->client->AbortMultipartUpload(abort_request); + // In error case we throw exception later with first error from UploadPartCopy + } + throwIfError(outcome); + + auto etag = outcome.GetResult().GetCopyPartResult().GetETag(); + part_tags.push_back(etag); + } + + { + Aws::S3::Model::CompleteMultipartUploadRequest req; + req.SetBucket(dst_bucket); + req.SetKey(dst_key); + req.SetUploadId(multipart_upload_id); + + Aws::S3::Model::CompletedMultipartUpload multipart_upload; + for (size_t i = 0; i < part_tags.size(); ++i) + { + Aws::S3::Model::CompletedPart part; + multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1)); + } + + req.SetMultipartUpload(multipart_upload); + + auto outcome = settings->client->CompleteMultipartUpload(req); + + throwIfError(outcome); + + LOG_DEBUG(log, "Multipart copy upload has completed. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, " + "Upload_id: {}, Parts: {}", src_bucket, src_key, dst_bucket, dst_key, multipart_upload_id, part_tags.size()); + } +} + struct DiskS3::RestoreInformation { UInt64 revision = LATEST_REVISION; @@ -757,7 +866,7 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so /// Copy object if we restore to different bucket / path. if (bucket != source_bucket || remote_fs_root_path != source_path) - copyObject(source_bucket, key, bucket, remote_fs_root_path + relative_key); + copyObject(source_bucket, key, bucket, remote_fs_root_path + relative_key, head_result); metadata.addObject(relative_key, head_result.GetContentLength()); metadata.save(); diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 133488ad31fe..d881cee8f542 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -7,6 +7,7 @@ #if USE_AWS_S3 #include +#include #include #include "Disks/DiskFactory.h" #include "Disks/Executor.h" @@ -131,7 +132,15 @@ class DiskS3 final : public IDiskRemote Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key) const; void listObjects(const String & source_bucket, const String & source_path, std::function callback) const; - void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key) const; + void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head = std::nullopt) const; + + void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head = std::nullopt, + std::optional> metadata = std::nullopt) const; + void copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + std::optional head = std::nullopt, + std::optional> metadata = std::nullopt) const; /// Restore S3 metadata files on file system. void restore(); From 3c28e88a2c27e70f69a3c650c76d2ebb3d06b056 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 26 Aug 2021 13:40:04 +0300 Subject: [PATCH 018/472] Backport #28139 to 21.9: Fix bug in MergeTreeWhereOptimizer --- src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 2da20073427d..806c861cf007 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -47,8 +47,12 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( if (!primary_key.column_names.empty()) first_primary_key_column = primary_key.column_names[0]; - for (const auto & [_, size] : column_sizes) - total_size_of_queried_columns += size; + for (const auto & name : queried_columns) + { + auto it = column_sizes.find(name); + if (it != column_sizes.end()) + total_size_of_queried_columns += it->second; + } determineArrayJoinedNames(query_info.query->as()); optimize(query_info.query->as()); From 8c87c55ab5c20d9514f25f121f63bca870f380d6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 26 Aug 2021 21:42:36 +0300 Subject: [PATCH 019/472] Backport #28150 to 21.9: Fix `Attempt to read after eof with enabled data compression` --- docker/test/fasttest/run.sh | 6 ++ docker/test/stateless/Dockerfile | 2 + src/IO/ZlibInflatingReadBuffer.cpp | 75 +++++++++++------- .../0_stateless/02013_zlib_read_after_eof.go | 61 ++++++++++++++ .../02013_zlib_read_after_eof.reference | 1 + .../0_stateless/02013_zlib_read_after_eof.sh | 18 +++++ .../data_zlib/02013_zlib_read_after_eof_data | Bin 0 -> 348894 bytes 7 files changed, 135 insertions(+), 28 deletions(-) create mode 100644 tests/queries/0_stateless/02013_zlib_read_after_eof.go create mode 100644 tests/queries/0_stateless/02013_zlib_read_after_eof.reference create mode 100755 tests/queries/0_stateless/02013_zlib_read_after_eof.sh create mode 100644 tests/queries/0_stateless/data_zlib/02013_zlib_read_after_eof_data diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index d0184bb1a64b..a6340f7385ec 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -392,6 +392,12 @@ function run_tests 01853_s2_cells_intersect 01854_s2_cap_contains 01854_s2_cap_union + + # needs s3 + 01944_insert_partition_by + + # depends on Go + 02013_zlib_read_after_eof ) time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index f5fa86a6f339..39c8a2e53580 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -24,6 +24,8 @@ RUN apt-get update -y \ python3-pip \ qemu-user-static \ sudo \ + # golang version 1.13 on Ubuntu 20 is enough for tests + golang \ telnet \ tree \ unixodbc \ diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp index bea83c74e219..472399dea3d7 100644 --- a/src/IO/ZlibInflatingReadBuffer.cpp +++ b/src/IO/ZlibInflatingReadBuffer.cpp @@ -38,7 +38,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer( #pragma GCC diagnostic pop if (rc != Z_OK) - throw Exception(std::string("inflateInit2 failed: ") + zError(rc) + "; zlib version: " + ZLIB_VERSION, ErrorCodes::ZLIB_INFLATE_FAILED); + throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateInit2 failed: {}; zlib version: {}.", zError(rc), ZLIB_VERSION); } ZlibInflatingReadBuffer::~ZlibInflatingReadBuffer() @@ -48,41 +48,60 @@ ZlibInflatingReadBuffer::~ZlibInflatingReadBuffer() bool ZlibInflatingReadBuffer::nextImpl() { - if (eof) - return false; - - if (!zstr.avail_in) + /// Need do-while loop to prevent situation, when + /// eof was not reached, but working buffer became empty (when nothing was decompressed in current iteration) + /// (this happens with compression algorithms, same idea is implemented in ZstdInflatingReadBuffer) + do { - in->nextIfAtEnd(); - zstr.next_in = reinterpret_cast(in->position()); - zstr.avail_in = in->buffer().end() - in->position(); - } - zstr.next_out = reinterpret_cast(internal_buffer.begin()); - zstr.avail_out = internal_buffer.size(); - - int rc = inflate(&zstr, Z_NO_FLUSH); + /// if we already found eof, we shouldn't do anything + if (eof) + return false; - in->position() = in->buffer().end() - zstr.avail_in; - working_buffer.resize(internal_buffer.size() - zstr.avail_out); - - if (rc == Z_STREAM_END) - { - if (in->eof()) + /// if there is no available bytes in zstr, move ptr to next available data + if (!zstr.avail_in) { - eof = true; - return !working_buffer.empty(); + in->nextIfAtEnd(); + zstr.next_in = reinterpret_cast(in->position()); + zstr.avail_in = in->buffer().end() - in->position(); } - else + /// init output bytes (place, where decompressed data will be) + zstr.next_out = reinterpret_cast(internal_buffer.begin()); + zstr.avail_out = internal_buffer.size(); + + int rc = inflate(&zstr, Z_NO_FLUSH); + + /// move in stream on place, where reading stopped + in->position() = in->buffer().end() - zstr.avail_in; + /// change size of working buffer (it's size equal to internal_buffer size without unused uncompressed values) + working_buffer.resize(internal_buffer.size() - zstr.avail_out); + + /// If end was reached, it can be end of file or end of part (for example, chunk) + if (rc == Z_STREAM_END) { - rc = inflateReset(&zstr); - if (rc != Z_OK) - throw Exception(std::string("inflateReset failed: ") + zError(rc), ErrorCodes::ZLIB_INFLATE_FAILED); - return true; + /// if it is end of file, remember this and return + /// * true if we can work with working buffer (we still have something to read, so next must return true) + /// * false if there is no data in working buffer + if (in->eof()) + { + eof = true; + return !working_buffer.empty(); + } + /// If it is not end of file, we need to reset zstr and return true, because we still have some data to read + else + { + rc = inflateReset(&zstr); + if (rc != Z_OK) + throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc)); + return true; + } } + /// If it is not end and not OK, something went wrong, throw exception + if (rc != Z_OK) + throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc)); } - if (rc != Z_OK) - throw Exception(std::string("inflate failed: ") + zError(rc), ErrorCodes::ZLIB_INFLATE_FAILED); + while (working_buffer.empty()); + /// if code reach this section, working buffer is not empty, so we have some data to process return true; } diff --git a/tests/queries/0_stateless/02013_zlib_read_after_eof.go b/tests/queries/0_stateless/02013_zlib_read_after_eof.go new file mode 100644 index 000000000000..a97a1438bdff --- /dev/null +++ b/tests/queries/0_stateless/02013_zlib_read_after_eof.go @@ -0,0 +1,61 @@ +package main + +import ( + "compress/gzip" + "fmt" + "io" + "io/ioutil" + "net/http" + "net/url" + "os" +) + +func compress(data io.Reader) io.Reader { + pr, pw := io.Pipe() + gw := gzip.NewWriter(pw) + + go func() { + _, _ = io.Copy(gw, data) + gw.Close() + pw.Close() + }() + + return pr +} + +func main() { + database := os.Getenv("CLICKHOUSE_DATABASE") + p, err := url.Parse("http://localhost:8123/") + if err != nil { + panic(err) + } + q := p.Query() + + q.Set("query", "INSERT INTO "+database+".graphite FORMAT RowBinary") + p.RawQuery = q.Encode() + queryUrl := p.String() + + var req *http.Request + + req, err = http.NewRequest("POST", queryUrl, compress(os.Stdin)) + req.Header.Add("Content-Encoding", "gzip") + + if err != nil { + panic(err) + } + + client := &http.Client{ + Transport: &http.Transport{DisableKeepAlives: true}, + } + resp, err := client.Do(req) + if err != nil { + panic(err) + } + defer resp.Body.Close() + + body, _ := ioutil.ReadAll(resp.Body) + + if resp.StatusCode != 200 { + panic(fmt.Errorf("clickhouse response status %d: %s", resp.StatusCode, string(body))) + } +} diff --git a/tests/queries/0_stateless/02013_zlib_read_after_eof.reference b/tests/queries/0_stateless/02013_zlib_read_after_eof.reference new file mode 100644 index 000000000000..5caff40c4a0c --- /dev/null +++ b/tests/queries/0_stateless/02013_zlib_read_after_eof.reference @@ -0,0 +1 @@ +10000 diff --git a/tests/queries/0_stateless/02013_zlib_read_after_eof.sh b/tests/queries/0_stateless/02013_zlib_read_after_eof.sh new file mode 100755 index 000000000000..d74dca6cc61c --- /dev/null +++ b/tests/queries/0_stateless/02013_zlib_read_after_eof.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_FILE=$CUR_DIR/data_zlib/02013_zlib_read_after_eof_data + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS graphite;" + +$CLICKHOUSE_CLIENT -q "CREATE TABLE graphite(\`Path\` String, \`Value\` Float64, \`Time\` UInt32, \`Date\` Date, \`Timestamp\` UInt32) \ + ENGINE = MergeTree PARTITION BY toYYYYMM(Date) ORDER BY (Path, Time) SETTINGS index_granularity = 8192;" + +cat "$DATA_FILE" | go run $CUR_DIR/02013_zlib_read_after_eof.go + +$CLICKHOUSE_CLIENT -q "SELECT count() FROM graphite;" + +$CLICKHOUSE_CLIENT -q "drop table graphite;" diff --git a/tests/queries/0_stateless/data_zlib/02013_zlib_read_after_eof_data b/tests/queries/0_stateless/data_zlib/02013_zlib_read_after_eof_data new file mode 100644 index 0000000000000000000000000000000000000000..3e57c0824627b65a15b2314249c7ace9b71b4ee6 GIT binary patch literal 348894 zcmaf+Ns?tpmK}2gBx@z0TW`h|>{;xmIV+JA3^2e1#0IzqE`@uda2Yf~BD_RQPq^kZ zuc&?8IC$~YkA27A{PxrT{OyO*_x$0PpMLxG=fC{v_y7I*$ML`a-{1YOzx_Y|_y7If z-+UWcKC*q}=*ao-k>VrON1Bgx9~nO4+daPBg@uAxtAG+P~q1zoF zy4~@i+Z`Xe-SMH@9Us2k;@d60-QwFVzTM*6Exz62+bzD`;@d60-Rj$|zTN8Et-jsr z+pWIc>f5cp-Rj$|zTM{AZNA;++ikwx=G$$)-R9eEzTM{AZNA;^+wH#H?%VCY-R|4% zzTNKI?Y`aa+wH#H;oBX)-Qn9EzTM&59lqV++a12$;oBX)-Raw%zTN5Doxa`a+nv7M z>D!&Y-Raw%zTM^9UB2Dr+g-lh<=b7p-R0X|zTM^9UB2Dz+ugq1?c3eH-R;}mzFl{< zv3^xyV`VDo9=4UU2VFnO?S2Ft~TA(rn}m7SDWr?(_L-4t4(*c>8>{2)uy}J zbXS}1YSUe9x~olhwdt-l-PNYM+H_Z&?rPIrZMv&XceUxRHr>^xyV`VDo9=4UU2VFn zO?S2Ft~TA(rn}m7SDWr?(_L-4t4(*c>8>{2)uy}JbXS}1YSUe9x~olhwdt-l-PNYM z+H_Z&?rPIrZMv&XceUxRHr>^xyV`VDo9=4UU2VFnO?S2Ft~TA(rn}m7SDWr?(_L-4 zt4(*c>8>{2)uy}JbXS}1YSUe9x~olhwdt-l-PNYM+H_Z2?rO_jZMmy0ceUlNw%pa0 zyV`PBTkdMhU2VCmEqAr$uD0COmb=<=S6l9C%Ux}`t1Wl6<*v5e)t0;3a#vgKYRg@1 zxvMRAwdJn1+|`!5+HzN0?rO_jZMmy0ceUlNw%pa0yV`PBTkdMhU2VCmEqApASDWuG zngv%2BkxZj!^r#d-7xb0d^e1|Ki}mDTrG^eKi>@_@6UI`$ouo%F!KI~6SPplEhCTrE&EyBn@X3S2FWcy{4xf#TVPs|AW@7p@j4o?Wakl-PJaFwQ!JU7rk1bcy`gN1&U`Ey;`7ncHPxBdNm&8uC~#u1&U`Ey;`7n zcG0T^if0$S8Y%8-+g)v=R|^MucG0T^if0$STA+A#(W?cDX7{+O9rS8A=(wvL^lE{k z**)mh0!6cX(5nTCX7`|1BgI|qxT_uXYT+Qw?m@2>D4N}aUM)~Gy9d2mpm=uO)ed?! z9^|fe(5nTCXBWL%pm=uCs|AW@7rh!O?rO(f?Vwi+2YGhUs|AW@7rk1bcy`gN1&U|a zUG1P(<3a9f2fbRLcy`^@j=S2y9mw!mJiE99Ns7DLaaTL;Y6o{9!)NjB;tph>_>;kV zwd1aKyjMH!YR6sexT_s^wd1|oaaTLus~vZ>L4j@aL7cf{Vv?uh-Zc1P@u?2gzw!yU0V zvihN}c1L`>>S~wb+f`S)6yL78+NJn*;cDx-b#*E4b~yr93nTA#!^oT6F!F9UjJ(+m zBky*@$eZ0T@@_Ybyx9#SzFoK)DR8wg;@d^97AU@5^lE|P+eNPyD860vYNWu`!iZ-V zt`;bsUG!>!;@O3(1&U`Ey;`7ncHwHIz}3QtXBVy(D4t!oTA+A#;c9{6*@df-0#^$o zo?WV5q`=j}h-SCI)dEGcTi|MeqS-BQ zwLsDA7I(Gcu2#^i@w1>;3nQLg+<^=f&o1sj28w4FcOV1Bv+J%_+|>$tHGUR%wSrzP zP<*@S)dI!0i(V~Ie7o*y1-%*%a#t(p)dIzz4CvJY#h(o5)dIzz4CvKJaaSwuY6ZPo zILMz2xC0p|o?YC53>42UdbL3D?7FKJ^lCiFU9F&33lz^TdbL3D?4nl-6wfYtHB#Kw zio04tuNDsS?4nl-6wfYtwLtOgqE`zP&#t>#L9fPx+|>$twLtOgqE`zP&n|kkK=JIN zS0lw;t+=Zd^lITC&n|kkK=JINR|^!+E_$^<(d^!K==o0!fO0DiA5ojA=+(kOn%#{j$@fuh;1=+y#6vs=AatL|#mU9GySRd=aJGZ)vCK%^aJG3 zSF7%7)m^Q+t5tWk>b+WZSF7HuRd==Oy;^lwtL|#mU9GySRqxfRyIMuBw%_Yr6|NRW z-uK-w^1hLUk=NBQ(%@=g!;@d^97AU@5^lE{k*==w&Qs8P~#Ip-m3lz^TTrE&MyXe&d#j}fEEl@nW za5YlkYGK5)3s(yi&n{doP&~VEwLtOg!qrHDtA!EIE?g~8JiBnUK=JIt)dIz{3s(yi z&n{ez6u4R#@$ACY0>!fnR|^!+E?g~8JiBl;Qs8P~#Ip-m3lz^TTrE&MyKuEY@$ACY z0>!fnS0e?k7Dha~aJ4}3?84Oo#j^`n3lz^TT#XdCS{U){!qo!BvkO-X6wfYPEl@nW zaJ4}3?84PZfvbfP&n{doP&~WtYRz4(;SOZ@ES_E5fdr+yt9A5hfuh^(=+y#6x7*RH z1&TfyI(oH0(d~BhYNWWUb$7LnUM(D?Plk?OEl~8y(9x>}iar@SdbL2&Cqs8v>*&>Z zkh@w(uNEktUG!>!;@L&77AT%w^lGHIt95s^j$SPs~97AT%w^lE|P*+s7wD4tz+ zwT@nm2f3?t^lE|P*+s7wD4t#PYJuX}MXyGRyIOZw>*&?OL7rXoYJuX}MXwepo?Y~6 zf#TV9SL^82c#ykVN3Rwro?Y~6f#TUkuNEktUG!?CxT|${wT@mb9OT(WuNEktUG!>! z;@L&77AT%wceRdQjR(1_b@Xb1;@L&77AT%w^lE|P*+s8Lio05OSL^82!a<&0^lE|P z*>zXz?rI%(Aj4<)Le;jT9DeKmZR;jT9DeYHT*?GAijEl~8yFuYeA?rOtbZMdrq zceUZY+HhAJ-m49Fwc)+ma911Ns||Ox;jT8^)rPy;@Lp}Ws}1kfhP&GEUTwIm4R^KS zt~T7&hWBd2U2S-;Hr&;Q_iDpkZFsLX+|`D=+HhAJ?rOt(wc)NdyjL6UYQuZA;jT8^ z)rPy;a911Ns||Ox;l0{$R~z1|4R^KSz1nbB8}4euU2V9l4e!;4yV~$xZMdrq@70F8 z+HhAJ?rOtbZJ<{>?+vg4R|_NW8(A26Z6b`kZ)9QQwTUqDzVC7bt`!fnR|^!+E_yXkCR{Cy zXm%%DEl@PO6Rs90n%xOk3lz=ngsTOLW_QBXNP(+`5zX#|s|AW?cf!>IMYB8MYJsBJ zop3c$;A&ySvkO-X6wfYPEl@nWaJ4}3?84Oo#j^`nBL%J&Mm)Q4wLtOg!qo!BvkO-X z6wfYPjTE?A81d}F)dIz{3s(yi&n{doP&~VEwLtOg!qrHDtA!EIE?g~8JiBnUK=JIt z)dIz{3s)lrt`!fnR|^!+E?g~8JiBl;Qs8P~#Ix(JHr>@G?m&jm z;@QO=$UyPz;tnJ!?rIagTA=uL(W?cDKN--g1&VJMy;`97cG0Vm;;uH`)h2qiaF9P4 z(5nTCKN--g1&Tiz(5nTCJ{gv~+Cr~}gO!;@L&7MvA-Ia#vgE)xtrZUG!>!;@L&77AT%w^lE|P*>zW2=+$_TyV^pp7AT%w z^lE|P*+s7wD4t#PYNWWUEqAqrUM(Et*>zW2?rIBnAj4w%ygX_iEc+ zZM&;&ceU-Vw!K%|?rPh6we7C9y;s}rYTI3HyQ^(?we7vyc30cpt8I6+?Y-KDtNoMr z>d;27#u2z$7~v`$iT<-uK-w^1hLA1g;iF ze7kV9K=JLu)dI!03s(yi-!5D&P<*>^HB#VeVZ^fwR|^!+E?g~8JiBnUK=JIt)kuM> zg%Qs#TrE&MyKuEY@$ACY0>!fnR|^!+E?kWixLO$T?84Oo#j}fEEl@nWaJ4}3?84PZ zfvbfP&n{doP&~VEwLtOg!qo!BvkO-X6wU5|tATRhYGFjPd*Et;qS-xgwLsDA9=KYd zXm$@=jTE?A7}4w=xLTlSb`M-FP&B&-t`;bo-2+z(6wfYPjTE?A81d}F)dIz{3s(yi z&n{doP&~VEHB#VeVZ^fwR|^!+E?g~8JiBnUK=JIls~vZ>n4t$nDg-4V~Oy4t09cGcA`#j~rfb}61+b+z`P zu69Q}yXtC};@MSKyA;o^y4t09cGcA`#j~rf);`qL?ucg>cOdzIS690S`F7RSF2%R2 zu68NDU3Ingp{{mEe7ovum*P(bb+t?JCxg1$rTCLUU9EkntKAWQGN`Lvia#0D)h@-e ztFCq_o?UgdOY!WgtF;exwL9Y3Rad(d&#t=KrFeGL)h@-etFCtcQ%wH8tFCq_zFqCr zF2%R2z1pStcC}Z#6yL7)YVAW^?T+}9L0#=q{K=rb+NJoDL3_1J@h5}!YL}u!rrS0e?k7Djx#aJ4}3?ZVXp#j^`n3lz^TT#XdCS{U){!qo!B zvkO-X6wfYPEl@nWaJ4}3?84PZfvbfP&n{doP&~VEwLtOg!qo!BvkO-v1+ErGJiBnU zK=JIt)dIz{3s(yi&n{doP&~VEHB#VeVZ^fwR|^!+E?g~8JiBnUK=JIt)kuM>g%Qs# zTrE&MyKuEY@$ACY0>!fnR|^!+E?kWixLO$T?84Oo#j^`n3lz^TTrE&MyKps9;A&yS zvkO-X6wfYPEl@nWaJ4}3?84OoMYCJsYM@lOS{TvnR=8TAXm+c+T1BrG4$|#b^lE|P z+jUo~=+$@-dbKd3*{$f+0!6c1-PJ03wQ!I=87g`;Qry+5yIMuB77p_5qE`zP-!6K! zK=JINR|^!+uDe=Auf~Ji)hc?mK=CI7dbL2&ztt*wwLtOgqE{ouU9GySRrG4%AkQv( zwLtOgqE`zP&n|kkK=JIlt5x)BJjh+GqE`zP&n|kkK=JINR|^!+E_yXm+|{bPT1BrG z4)W}xR|^!+E_$^<@$8~k3lz_;yIMuB#)I6|Dtfg*@$8~k3lz^TdbL3D?4nmA#a*qs zt5x)B;ULd0dbL3D?4nl-6wfYtwLtOgx~o<6YCOnYt)f>86wfYtwLtOgqE`zP&n|j3 zQ0`5>AAb4iw_kt$%b$M#-=BXBBWl9;+Lco@yZ3gBQ#8Bxo`F*|ySEf}ie~rrfJ$*! zYwl_dcOd!4=&sgq2QpB6yY6bud$s1S*4)*ayIONsYu>9hceUodT60%x-m5itwdTEA zb60EbYRz4(xvMqr)tb9n^IomFt2OV{n!8$aS8MKS&0Vc|uh!hvn)hnWU9EYq*4)*a z_iD{ut+}f;ceUoO*1T71?rP0@wdStYyjN@PYRz4(xvMpIwdTEAb60ELt2KAE=Dk{T zS8Lv@HFveOYCj)x5K=JLOR|^#1E_$^<@$I^+ zb@XaH$X%_YR|^z>GN4xr6n`?HR|^z>GN4xjWw@&iceR0DEgYoT9q82pMYB85s|AW? zcc51b6wU5%R~zWnc#yl=K(7`kn%#k3El@PO1HD?HXm$sBHB#KwhP&E8uNDsS?4nl- z6wfYtwLtOgqE`zP&#t@LK(EGw+|>qpwLtOgqE`zP&n|kkK=JINS0lw;ZMdrq^lITC z&n|kkK=JINR|^!+E_$^<@$9;*4fJX}$X#uqR|^!+E_$^<@$8~k3lz^TdNoqq)rPy; zK(7`K^6a8l3lz^TdbL3D?4nl-6wj`^+CZD5@LBxH;Jw;#R~z1|4R^KSz1nbB8}4euU2V9l4e!;4yV~$xZMdrq z@70F8+H_Z&?rPIrZF;XZ-PNY|YSUe9dapL!)u#7q(_L-4t4(*c>8>`tSDWr?(|fh) zt~R|_o9=4UU2VFnO?S2Fz1nnFo8GHUceUxg+H_Z&-m6V_wdt-l-PNYM+Voy+x~om^ z)uy}J^j>Yct4(*c>8>{2)h2qifBs&EnsBu+^1knek@t-(jJ)r=VdQ-y3nTCQE=S;M zVdU*5!pPhFgpt=K!pQqZ7Djx#=+y$nw~JmaP<*>^HB#VeVZ^tKUM*03yXe&d#j}fE zEl@nW=+#JptA!EIE?g~8JiBnUK=JINR|^!+E_$^<@$ACYNP(+`5zj7MEl@nWaJ4}3 z?84Oo#j^`nBL%J&Mm)Q4wLtOg!qo!BvkO-X6wfYPEl@PO3$6yrf~$oQ&F+G$1&U^O z!PNpqv%BDGfuh-6a5YlkYGFjPyWnbpqS;+=wLsDAF1T8tXm%G|El@nWa5YlkYGK5) z3s(yi&n{doP&~VEwLtOg!qrHDtA!EIE?g~8JiBnUK=JIt)dIz{3s(yi&n{ez6u4R# z@$9;*EqAqrJCNbCcy@6IGEh9bxC2RwyV^pp7AU@5^lE|PPX_dAf#TamuNEl2UG!?C zxT`I9wS`_S9OO?1^lE|PPX_dAf#OdF^lE|PPX>3jg42UdbL3D?4nl-6wj`^+Cs0!gWS~?dbL3D?4nl- z6wfYtwLtOgqE`cDyQ^(?wT)ga9HiOZ=+y#6v%Ar&1&U^OqgM+Q&F*$r+vwGJkh|JO zuNEkp-Hl!?P&B(6y;`7Xb~k!8Qry+HyV^#t77p_4qE`zP&n|kkK=JINR|^!+uDjYs zuf~Ji)i!#yK=JINR|^!+E_$^<@$8~kBgI{9yQ^*VYT+QyuDjZHSKGJ)89s|=7k3~7 z#k1?Kw%ye>?m+UhxT|e^UoBAl$>6=(c30cpt8I6+?XI@n)wa9Z_Fiqft8MSqw!7N) zUTwRpZSU2#yV`bF+wN-HU2S`>w%ygX_iEc+ZF{e_-PN|c+ICml?rPh6we7C9y;s}r zYTJ9Y?XI@HSKIDt+g)wDt8I6+?Y-J|SKHpJZFjZpz1ntHJML=7UG2E59q-kSyV~(y z?YOHQ@70dG+VNiPxT_s^wd1aK+|`cvYR6sec&~Qc)sFXS$6f8Xs~vZ>16TVO?~vYZGDQec$B>TrG^ey;>M~yNNLJ+C&)f?ZVXp#kUJr z3l!fjT#XdCS{U)|!qo!Bw+mMb6wfYPEl@nWaJ4}3?84PZfvbfP&n{doP&~Wn)dIz{ z3s(yi&n{ez6u4R#@$ACY0>!fnR|^!+E?g~8JiBnUK=JIt)kuM>g%Qs#TrE&MyKuEY z@$ACY0>!fnS0e?k7Dha~aJ4}3?84Oo#j^`n3lz^TTrE&MyKps9;A&ySvkO-X6wfYP zEl@nWaJ4}3?84Pv%KbHv?{Ae4BbMDuvF+Xw%kCYq?cNc~?j5o1-Vw|09kK1+5zFrV z4Vu1PxElWe;A&ySw+mMb6yGjfEl_;BaJ4}3?ZVYafvbfP&#t;!|3I%64)X1yR|^#1 zE_$^<@$IUs-CynMPX_dA{4D6z!iYZ^)Yb0C^(TY6+NJoDL0zqXsH@!(->$marTCLU zUF}jlyXtC};@MSKyA;o^y4wAP!oFSfY7C~j+C9j#tFCq_o?UgdOY!WgtMw0cwL9Y5 zRad(d&#t=KrFeGL)h@-etFCq_o?Ugd`)jOyyXe)JU3Im4kY`t2?NU6u>S~wb*;QBT zAL?p%#J8)ib}61+b+t?J?5e9>if30{?NU6u>T36wr~7u%t1-LkYWE<|uDaT#cy`s* zF2%F!u2$UD3VJoHuDGieceUcKR@~K!yIOHqEADE=U9GsQ6?e7bu2#^i@z2Ozt)N#6 z6wPiyuNEkp-GW|?6nC}au2!^HyZ_AueeM=_wSw=fg?}R7F21i8D85~MUyT%Zwc@T; z+|`P^TEX|#!pGIW)rz}XaaSwuYQ!fnS0e?k z7Dha~aJ4}3?84Oo#j^`n3lz^TTrE&MyKps9;A&ySvkO-X6wfYPEl@nWaJ4}3?84PZ zfvbfP&n{doP&~VEwLtOg!qo!BvkO-X6wfYPjTE?A81d}F)dIz{3s(yi&n{doP&~VE zHB#VeVZ^fwR|^!+E?g~8JiBnUK=JIt)dIz{3s)lrt`!iIu2$XE zDta}1mgcV3(5nTCZnvRV3l!aML$4Mny4{9eEl~8y(9o-q;;z=*)f#%WaF9M38hW)r z(I-R09mqh@Cqu&>$UxC2LvvSa=+$_TyIMo97AT%w^lE|P*+s7wD4t#PYNWWUHFvd! zUM(Et*+s7wD4t#PYJuX}MXwepo?UmfhF*;axvMqwYJuX}MXwepo?Y~6f#TUkuSSZy zT60%x=+(kOo?Y~6f#TUkuNEktUG!>!;@NdqYv|Q@kh@w#uNEktUG!>!;@L&77AT%w z^lGHIt2KAEhF&ck~97AT%w^lE|P*+s7wD4tz+wT51e2f3>?^lE|P*+s7wD4t#P zYJuX}MXyGRyIONsYv|R&L7rXoYJuX}MXwepo?Umf=B{?Xb@jt9KmGRW&wu&T@BjPr zkN7P2TUS1!u6DoY;S|m8y>agp&F;PX>=e!Jy@je2ceU=WcJJW%LB3sgweG!IcUSA) zt95s^+Z*dYw7Xh&SL^O--CeDFuh!kwy7y|`U9Ees*4@>*yIOZw>+WjZd$sPa*1cEj z?rPn8weGIgy;tk*YTaF}yQ_6~weG!IcUSA)t95s^?!8)fSL^O--CeD_t99?yy1QEU zUah;Ub??=>yIS{Nt-GsrceU=W*4@>*_iEi;t$VN5-POAHYTaF}yQ_7$+K=z8s}5HS zBkwyy7jVz43?+hG)tA&yGeK(A}Z)9P_w~JmaP<*@S z)dI!0i(V~Ie7kTpQs8P~#J7uHEl@nW=+y$nvx{CWP&~VEHBbgzEsSV(2fnWsD4N}Y z@2drhW_RHGYJsBJ9r(UlplEgnT#XdCS{Tvn4%~qZ6wU5{s|AW?cfi#GMYB8LYNWu` z!iZ-Vt`;bsUAS7Hcy{4xf#TVPs|AW@7p_JMTrG@vcHwG);@O3(1&U`Et`;bsUAP)4 zaJ4Yv*@deGif0$D7AT%wxLTlicHwG);@O3(kpfo>Bc5HjTA+A#;c9{6*@deGif0$D zMhaXljCgk8YJuX}g{uXMXBVy(D4t!oTA+A#;cBF~s||Oxfjf}lAkQxDKn99u7k3~7 z#j}e$kfgY)4fJY(;!g(jYJuY0MXwepzFqWcf#TamuSSZy+HhAJ=+(kO{$xO}7AXE? zK(7`k{$xO}7AX2;nC@y5y&4Xh?rIagTA*llCwjF&(d!;@Ndqo9NYekh|JMuNEktUG!>!;@L&77AT%w z^lGHIt4(*ciC!%n~97AT%w^lE|P*+s7wD4tz+wTWJh2f3?F^lE|P*+s7wD4t#P zYJuX}MXyGRyV`VDo9=4UU2WnHWcawAUEF~T6wfa1Kn99u*IjM8t4-X2Yct4;6Kmb=<=S6l9C%Ux}G zueRLPmiKDQU2S=wdJn1yjNT9YRg@1xvMRA zwdK9qa#vg4t1Wl6<-OW+S6l9C%Ux}`t1a)WUTwLnE$`KqyV~+zZMmy0ceUlN zw%pa0_iD>sZF#S@;A;Qsy}Py0tA&yGjf^93wJ`EFKVjtk>3JA=T`i2fHW5bN_uVk^ z+C&(6-*>~vYZGC_w+mMz1+ErGe7kV9K=JLu)dI!03s(yi-!5E@6u4R#@$ACY0>!fn zR|^!+E?g~8JiBnUK=JIt)kuM>g%Qs#TrE&MyKuEY@$ACY0>!fnS0e?k7Dha~aJ4}3 z?84Oo#j^`n3lz^TTrE&EyBn?s%7&|j5zX#~s|AW?cf-{JMYFr%YJsBJ-EcKh;A&w+ zv%BGHfuh;naJ4|u>~6SPplEhCTrE&MyKps9;A&ySvkO-X6wfYPEl@nWaJ4}3?84PZ zfvbfP&n{doP&~VEwLtOg!qo!BvkO-X6wfYPjTE?A81d}F)dIz{3s(yi&#t@Lc30cz z)%aQ5)i!#yK=JM34rHMCc5w$XP<*?%0~sj(WWXIrQry+HyV^#t77p?!1MWZuia!}} z2QpCn$$&eMf#OdFceRaPjR(1_ZS-n^;@QO=$UyPz;tph>cy@6Ik`#Bf?XI@btA&F+ zyXe&d#j}fEEl@nW=+y$nv+J(5(W~(wceRaPEl@nW=+y$nvx{CWP&~Wn)j&D!YR6se zpjQhAX?72KwLsDA9`tH~qS-y@)dEGcd)(CydNm&8u6EF?1&U_(pjQhN&F(?37ATtC zgI!iIu6EF?@gR4#gI+CAJiF-C0>!h7 zUM)~OyXe(OaaTL;Y6rbqILNb$UM)~OyXe&d#k1?KcHGsDyV`MAJGcYMe_>;k1?YOHQceUfa+HqGq-m4vVwd1|oaaTLus~vZ>Am69=9kHLD-x2!>S}kyx2vvpDZX9p)h@-itG(K#_;$5dyA&t7TX*EmZWwvD8%EyjhLLx>VdTwj7g%Qs#TrE&M zyKuEY@$ACY0>!fnR|^!)Zh@=w9MplEgrTrE&Ey9KTmD4N{@R|^!+E?kWixLO$T?7FKJceR2$km0j!h7UM)~OyXe&d#k1?KR?w^QAa}KbUM)~OyXe&d#j}fEEl@nW=+#JZS1ayn1-)80 z$g_)HEl@nW=+y$nvx{CWP&~WtY6ZO-4{}#4=+y$nvx{CWP&~Wn)dIz{i(UaJGZ z)hc?maFAxVqE`zP&2B}n7ATtCie4>HG`rPZt)f@sLGEf5y;`7Xb}M?dK+)`0^lE{k z*{$f+NO4!I?rIgiS~$qF>#kPa)hg~lhR@>J#U02%@$9;*Rd=aJGZ)vCK%byut2t5tWk>b+WZSF7HuRd==Oy;^lwtL|#mU9GySRqxfR zyIS>Lt-7mK@71ciT6I^e?rPOtt$MFk-PNl1YSmq>daqX9)vEVu)m^Q+t5tWk>aJG3 zSF7%7)qAz-u2#KQtL|#mU9GySRd==Oy;^lwtKO?sceU!hT6I^e-m6u2wd$@`-PNkQ zTJ>J7x~o<1)vCK%^!fn zR|^!+E_$^<@$8~kBL%J&Mm)Q4wLtOg!qo!BvkO-X6wfYPEl@nWa5YlkYGK5)3s(yi z&n{doP&~VEwLtOg!qrHDtA!EIE?g~8JiBnUK=JIt)dIz{3s(yi&n{ez6u4R#@$ACY z0>!fnR|^!+E?g~8JiBl;Qs8P~#Ip-m3lz^TTrE&MyKuEY@$ACY0>!fnS0e?k7Dha~ zaJ4}3?84Oo#j^`n3lz^TT#XdCS{U){!qo!BvkO-X6wfYPEl@nWaJ4|u>~^>sDBWGH zyQ_8FfeZ&}c02As28w34;|^q?Xm&g9K$7CF*3qj4iar@SdbL3D?V?u;6yGj-wLtOh zqE{ouU9G#Tb@XcCAbm1)^lE{kPlk?OEl~8y(9x>}ia#0L)jE1L9^|gp(W?cDXBWL% zpm=uCs|AW@7rh!O?rPm#t)o{92YGhUs|AW@7rk1bcy`gN1&U|aU9F>6<3a9f9lcth zcy`gN1&U`Ey;`7ncG0Vm;;z=+)jE2$aFAygy;`7ncG0T^if0$STA+A#-PJmJH6G-y z*3qj4if0$STA+A#(W?cDXBWL1Deh|BU9F>63kP|2(W?cDXBWL%pm=uCs|AW@*Ilin zSK~qMY8|~=pm=uCs|AW@7rk1bcy`gNfpULC#t*;z^xLmL|K(4=|L@N~h7mR4`>Pe4 zqS+m|0~tPxW_REYWT0qv2kt-yie`7Xs||Oxfjf}=EbeN~vYZGDQecufuuT6xJ_kA~v_;%5& zkpfo>Bfed@TA=uL(W?cDZx_8`ndsHRh-P=9R|^!)?nJK^D4N}g zUM)~GyA!<{DR8wgqS>8rwLsDAPPkg2Xm%%DEl@PO6Rs90o?W;aDR8wg;@O3(1&U`E zt`;bsUAS7Hcy{4xq`=j}h-VkB7AT%wxLTlicHwG);@O3(1&U`Eu0{%6EsS_};c9{6 z*@deGif0$D7AT%wxEd*NwJ_q@g{uXMXBVy(D4t!oTA+A#;c9{6*@df-0#^$oo?W#jD@tMMT8 zYGK5;i(V~Ie7oq?0>z&U=+y$nx9hGp(W~(wceROLEl~W)fL<+7{KNO4zN?rICYS~$qFi(V~IJiF-C0>!h7UM)~OyY6ZWy&4a4S6k@S0>!h7 zUM)~OyXe&d#j}fEjTCpa<*v5StA&F+yXe&d#j}fEEl@nW=+y$nv+J(5(5vwvceRCH zEl@nW=+y$nvx{CWP&~Wn)ktwyTkdKLy;?ZPvx{CWP&~Wn)dIz{i(V~IJiG2{3%wc- za#vgE)dIz{i(V~IJiG2{%Ux~Z4kSN|yV`PBTkdKL-&YHt#kY&^s|AX07vEP46n`>! zueRLPmb=<=S6l9C%X_uuuC}~aTkdMhd$r}Rw%ygXyV`bF+uo~fceU-k+ICml-m7hQ zwe7vyc30c(YTI3HyQ^*Q)wa9Z_Fiqft8MSqw!7MPSKIDt+g)vYueROQw)bk=U2S`> zw%ygX_iEc+ZM&;&ceU-Vw!K%|?rPh6we7C9y;s}rYTI3HyQ^(?we7vyc30cpt8I6+ z?Y-J|SKHpJZFjZpuD0Q7|L(oJwc%=E>jvUplEgvTrE&Ey9cgD3S2FWXm$@=El@PO2d)+< zn%x6e3lz=nfvW|IXBVzU3S2FWcy{4xf#TVPs|AW@7p@j4o?W;aDR8wg;@O3(1&U`E zt`;bsUAS7Hcy{4xf#TVPtC0d%3nQLgxLTlicHwG);@O3(1&U`Eu0{%6EsS_}-PMk} z+QA*j@L4>&xC0p|o?YC53>43S~wb*;QA&6wj`@+Wl6uXBWL1v#YLl5Ay7)t6hp` zS6%H=JiF>@m*Uw~SDPQ|YInr5i|?!P&q!VE9^~6qSGyG7uDaT#_;%IR?$_=8jZ9tb zQarobt6hq3S9`Tf@$G7_b}7DHb+!4Su69Q}yV|Q=ia&R?SGyE{?rN`gDgNBmUhVz{ z2Y>FWt6hq3S6%H=e7ovum*U%1SGyG7uDe=sS1a19&El?B+|`P^T5(q^?rOzdt+=Zd zceUcKR@~K!yIOHqEADE=U9GsQ6?e7bu2$UDio05IS1ayn#a*qqs}*;(;;vTQ)rz}X zfvf%d=X%#HaJ4Y!fnR|^!+E?kWi zxLO$T?84Oo#j^`n3lz^TTrE&MyKuEY@$ACYNP(+`5zj7MEl@nWaJ4}3?84Oo#j^`n zBL%J&Mm)Q4wLtOg!qo!BvkO-X6wfYPEl@nWa5YlkYGK5)3s(yi&n{doP&~VEwLtOg z!qq^jaJ4X^*{yK3K+)`0xLTlSb}L*hP&B(0t`;bo-3nJD1+ErGG`kh97ATtC3Reph z&2ELO1&U_3!qrHDtA!EIE?g~8JiBnUK=JIt)dIz{3s(yi&#t>#MX$z#(5rGN4xr6n`?HR|^z>GN4x@#a*qst5x)B z;ULd0dbL3D?4nl-6wfYtwLtOgx~o<6YCOnYt)f>86wfYtwLtOgqE`zP&n|j3Qry+5 zyIMuB77p_4qE`zP&n|kkK=JINR|^!+uDe=Auf~Ji)hc?mK=JINR|^!+E_$^<@$8~k zBgI{^66`hF%Q^HFvd!UM)~GyA8csplEg* zdbL2&>^Agjq`0d!ceRFIEgYoTZRphkMYG$`s|AW?x1m=H6wj`^T0^hKgWS~`dbL3D z?4nl-6wfYtwLtOgdau^p)tb9nb60D)1Id3PceRE)kb&ac#U02%@$KRcBq{D{&0Vdz zt2KAE=Dk{TS8Lv@HFveSTJv75xvMpI zwdStY+|`=*YRz4(d9T*o)tdKe&0Vdzt2KAE=C0PfS8MKS&3m=xuGYL)Ywl{zd$s1S z*4)*ayIONsYu>9hceUodT60%x-m5itwdStY+|`=9TJv75xvMqr)tb9n^IomFt2OV{ zy1QC;SL^O--CeDFuh!kwI(oJL@LuogaJ4Y!fnS0e?k z7Dha~aJ4}3?84Oo#j^`n3lz^TT#XdCS{U){!qo!BvkO-X6wfYPEl@nWaJ4}3?84PZ zfvbfP&n{doP&~VEwLtOg!qo!BvkO-v1+ErGJiBnUK=JIt)dIz{3s(yi&n{doP&~VE zHB#VeVZ^fwR|^!+E?g~8JiBnUK=JIt)kuM>g%Qs#TrE&MyKuEY@$ACY0>!fnR|^!+ zE?kWixLO$T?84Oo#j^`n3lz^TTrE&MyKps723#$SXm$r&El@PO1FjY*n%&{9Hr&+) z?m&jmqS+npY6HC*4??dNMtr;I)dI!0i(V~I^vN*Ls|AX0*IjL(SK~qMY6HDmpy-of zpjQhNeKHL6YJs9phJjv<6nC}Zt~SuCg@Zi1=+y$nvx_^Bf#TW49mqiO?7FKB^lCiF zU2ULO3lz^TdbL3D?4nl-6wfYtHB#KwhP&E8uNDsS?4nl-6wfYtwLtOgqE`zP&#t@L zK(EGw+|>qpwLtOgqE`zP&n|kkK=JINS0lw;ZMdrq^lITC&n|kkK=JINR|^!+E_$^< z@$9;*4fJX}$X#uqR|^!+E_$^<@$8~k3lz^TdNoqq)rPy;K(7`K^6a8l3lz^TdbL3D z?4nl-6wU7aZH7Pm^3!j>{`{9e{r;x~olhwdt-l-PNY|YSUe9 zdapL!)u#7q(_L-4t4(*c>8>`tSDWr?(|fh)t~R|_o9=4Ud$sAVHr>^xyV`VDo8GHU zceUxg+H_Z&-m6V_wdt-l-PNYM+Voy+x~om^)uy}J^j>Yct4;6Krn}m7SDWr?(_L+P zuQuJ)ruS;oU2S@=Hr>^xyV`VDo9=4Ud$sAVHoaGy?rPI}wFy`IkMDh{iC&E(aJ4Y< zzVC*S_kA~vyl-S-!fnR|^!+E?g~8JiBnUK=JIt)kuM>g%Qs#TrE&MyKuEY@$ACY0>!fnS0e?k z7Dha~aJ4}3?84Oo#j^`n3lz^TTrE&MyKps9;A&ySvkO-X6wfYPEl@nWaJ4}3?84PZ zfvbfP&n{doP&~VEwLtOg!qo!Bv+J(5+|`!5+Cs0!kBeR{jQDnO2QpB6ySM`xD860X zfeaLXGPtWP^lCiFU2WkGWT5zy0e2t+#h(ng0~sj(WWXIrP`10;c30cD0~rp|>~7qF z3>3}o#vRB&(d=&AfeaMQ?siw(=+$_TyV^#t7ATtCjb1HKG`kzUTA*llH+nTv+|{ zw%ygX_iEc+ZF{e_-PMk}+HqGq?rO(-wd1aKyjMH!YR7xE zPJB$im2L6C8o7g^{=U2_tW>7DirI3nQ;hgpt?P z!pQr+8%BJ)aJ4}3?ZVYafvbfP-!5D&P<*>^wLtOh!qo!BvkO-v1+ErGJiBnUK=JIt z)dIz{i(V~IJiF-C0>!fnS0e?k7Dha~aJ4}3?84Oo#j^`n3lz^TT!fnR|^!+E?kWixLO$T?84Oo#j^`n3lz^TTrE&MyKps9;A&ySvkO-X6wfYPEl@nW z>T37v0iIplfefF;v#YMQKG3W2AoOZs#J7uHEl_;B=+y$nx2vvp_u|`CSG!-~@a&>j z<3EwQ+C9jh4C-o^;!g&3wM+3QgSy)KP*=MnzFl>-OY!Wgt6hp`S6%H=JiF>@m*Uw~ zSG!*v^6jEmV|LZm?m?bib+t?J?5e9>if7kdt+=Zd^lDgLaaSwuYQ$tH6G-yR?w>jiavJ>dbL3D?4nl-6wfYtHB#Kwio04t zuNDsS?4nl-6wfYtwLtOgqE`zP&#t>#L9fPx+|>$twLtOgYOl76yIOHqEADCqcOdys zHnEl~W);I3BO)rz}XaaSwuYQaJG5aJ7H%+fV=Vw;x`a{>Pt}JpTIszds*-|Nr-eblMuLflh8K|*d|f4fnb|N#Rh_H67?Dgwnk1RAw&$ z5{3cGBm@iu%Or|25G<1rFAyw~kS-7`lMt?#0NuiXWfH0df@Kn#1%hP~iUopY5_7{z|`1=uXjL;hzN+7AJJ4D6q)Toua-X zLwAbuiVWQ;sw*%cKAg~;qPHSLbBflA49zJzD>5{vXspQ4oI-)XKwZTN%_+(%GBl^C zs>slsqNpN6bBdY@49E~CG^gmO$k3dkp&~hUOGy6B(LQR822~O%o?9r|g-?u$)%h2qeRDT5%tc49jW7Z9ppaEf|VMTbO&{kvb$B9USL?pO3k zWZ1v^6-^Qt>QPlas-jHdDf;g|>LfBWrzn)j(43-DB13bk9#zpQ;S}|#ie8Bf%_*8C zGBl^?mdMbYqFn+*J*uilRTNA-MRSUZi44stN+vQir>L38(44AARWwaFMLnvbYa&B) zinfUi%_;gOGBl@XoWM|zs_Ib{r4vuloT7FjLvxDai44stswXlur|MA^?GsK>kE-aO z$k3dkfg(e5iVlhl%_&+aFw~=}dQ?Rb#Zxq=UsO?l?JTN#RK=}5eh$qk?(C6aIc>PH z_c9tT>~X@}wBf5&WLQoczF0+u<+R~zRb*IB8@^NphI&*}k80{sO+BiqM>XxEntD{z zKB}olHSMFCdQ?-7YU)uQPPmsHPs(w2x}) zQBC`(rXJO_k80{sO+BiqM>X}RrhQaXk80XSHT9^beN7!Y%8B<~iW9F#apHZK;>7!Y$_d>m`Y15qQJm17!lTI0ouZE-LwAZk ziVWQ;`Y19qr|>8+;8C2=oT85+Lvsp`B13bEK8g&@Df+0F(cw{?u$*>$?}-e{X-6ML zhUK)Qk0Qfz+R;amVL9#aC@|nroUojBcoZ3y(+-a!!*bf;QDj(7J3I;ucoZizr|>8; zG^g+=GBl^~C^9ss@F+4gr|>8+;8C2=oWi5X(44}f$k3d^qsY*l!lS@|M{z=P3XdW~ za|(|lLvsp`B13Zuk0L{J3XcK<9>odGDLje{%_%&J49zJ#iVV#uJPHhW6el#N@F+4g zr|>8;G^grOT|KJfULQY)=2ShZqmRNV=%YBHJ4GKwhVB%76dC&OKKdv!bf@Z39eor| zQIG2AqsY*I_t8g@q5tlqk0L|=-A5kQMuI)O*TMj~eKs$grFa^igD3P6zrZGAyS9eH0k#QA0gyppW7ymeYYgiVVx?Kp#bh z<#eErB13bk9yQQM;S}|#fj){1%_;gQGBl^?qsY*lqK^VYJ!+^&4fIhwMRSTiiVV#u z`Y19qr|6@|(44AA4fIhsMLlYuk0L{Jiav@A%_;gQGBl^?qrgy)8tPF4eH2g8oT85+ zLvxBgiVV#u`Y19qr|MAyeH2bnj~eKs$k3dkk0L{Jsvb4eqXzEv;d7`*4fUv@9yRd& zD1HvzDZU>?hVB&Kk0L|=-Pb;9s7DR;sG%M;)T4&>QA0gyXdgAyqlWfTLp^G!M-BC; zp&m7~j~ePxL;I+q9yPR&8tPF)`>3fNHPxf0del^pn%YNA^{AGA2rpZruI=&J!)znHPxf0 zdel^pn(9$g`=|+zs?WWgi9U)G@8A2Jc;8Ps@&3KfiTC{!6YwZbyg%pX#M_;4;(h1m z#QQGAiT9nK6S`A)6dAfxcoZ1$C{E~3;ZbDhPT^5x=uY8LWN1#|QDkUN;Zb0~qd1{C zg-4O0IfX}&p*e*|k)b(-M}Yy4;)LcD9z}-c6nzvKnp5;qWN1#&N0Fg9g-3w_kK%;p z6dpx}<`f=9hUOF=MTX`S9t8$GiW8bscoZ3$Q+N~^np1cb8Jbgg6d9J&``^3$;g_F& z`}OC){OR}q{rSh2asP9-I$=3o@F<>QIlccSMEx9=)BB&BQ-I-*8M;$=6dAfxcoZ4BQ+N~@@F-4bPT^5x*uVP=9z}-c6dpx}<`f=9hUOF=1qM8d z6PiQM`Q6i!i(TIi$5(43-=B13bEK8g&@Df%cd)T5Sq)IuM{Q#7aOqsY*lqK_g& zbBaES49%%})IuMHQ`Dmt`Y19qr|6@|(43-=B13bEKI&y`^{A~Lwb4iM6wB#GA4P`c zbfb?V!*aUON0DJU-ReB13nIK8g(8se05#AB9uYqc-{| zGVI^|jXsJD`*(k%k0QhV-QVb=z)+9c>QNhg6i?BA_qC7O>QP%gYUBG+{G_^5d_M{d z^{A~Lwbi3G?)C9=Xijmjj||PJ_EB3sYHJ_0)uXn0)K-t$>QP(!sI4BgwU65BQCs_{ ztsb@2qqcg~R*%}+M{V_}t$oy1kJ{QtZS|QP%gYHJ_0)uXodQCmG~Yag}Mqqg=@TRm#4M{V_}tsb?tkJ{={Tl=W3 z9<{ZP+Uik9J?f}O9rdWAebiBpI@(7a^{AtL)KQN*+D9GrsG}Zr)T54i)PYB}=f2cI zAH|9HU5XR0M{(kPKjp;hQJ8>7apLWxIPrEToOs{)Iq|-qa^iJUPUueIQDo>&;Zb0~ zqd1{Eg-4O0JB3G)p*w{~k)b(7A4P`d6dnZzJc<*VQ+N~^np1cb8Jbgg6d9URcoZ1$ zC{Acj;ZbC0PT^5xXini#WN1#|QDkUN;Zb0~qd1{Cg-4O0IfX}&p*e*|k)b(-M}Yy4 z;)LcD9z}-c6dpx}<`f=9hUOF=MTX`S9t8$GiW8bscoZ3$Q+N~^np1cb8Jbgg6d3R* zPH0ZyQDkUN;ZbC0PT^5xXini#WPCrTfAj6X!~0(B`+Lae$oIGUIr9CeevW*9ub(5| z59{a1_c!}F^8L7ej(mT&pCjK7?B|GY7fQwL!l}ZDZx>b-D8Ah!vU{K@czb$wF&x%-88eNz0n z`-ORZqkN%WpCkUa+86TmN%8D{ppA^sT z7n=4-@$7yfYM&I(?iZ@|Nzv>UlwNqUZ+64TyWKGIW;cwy+r>zQtA&x*gu}?|YGLHfZWwu= z3}HmKTX9V>P;|Q$7Zn4=w+mMz1+ErGe7oq?0>!rrR|^!)ZiTA_ie|UM)kuM>g%Qs# zTrE&MyXe&d#j}fEEl@nWaJ4}3?84PZfvbfP&n{doP&~VEwLtOg!qo!BvkO-v1+ErG zJiBnUK=JIt)dIz{3s(yi&n{doP&~VEHB#VeVZ^fwR|^!+E?g~8JiBnUK=JIt)kuM> zg%Qs#TrE&MyKuEY@$ACY0>!fnR|^!+E?kWixLO$T?84Oo#j^`n3lz^TTrE&MyKps9 z;A&ySvkO-X6wfYPEl@nWaJ4}3?84Oo#j^`nBL%J&Mm)RjYSmq>;(KJ_vv_v#O|n4o z?Bcs*pfq>2hF&dDbh{0`TA=8Yp`lj`6y0t^uNEk}-G*L`6nC}euGY}2g@g3T(9o*| ziar?{dbL2&Cqu&>$UxC2LvvSa=+$_TyIMo97AT%w^lE|P*+s7wD4t#PYNWWUHFvd! zUM(Et*+s7wD4t#1feaMSF77}Eif7kdt)W-rLGEe|cOV1Bvx_^Bf#TW49mqiO?BWh2 zDeh{`U9F*43kP|2aR)L`JiE9987Q7z+<^=f&#t>#L$Ahz+|?THKn99u7k3~7#j}e$ zkb&aa#T`ge+|`=9T0^fE4)W~c4rHKsc5w$XP&~W10~siuU3ayHUX2I2t2Nw#3>42U z?mz~LXBT%M1I4q8JCLNft2KAEhF&ck+WjZd$sPa*1cEj z?rPm#t-GsrceU=lT6b6L-m7(YweG!IcUSA~YTaF}yQ_8Y)w;V{_g<~Lt99?yy1QEU zUah;Ub$7MyuGZbvy7y|`U9Ees*4@>*_iEi;t-GsrceU=W*1cEj?rPn8weGIgy;tk* zYTbLa?ylC|)w;V{cUSA)t95s^?!8)fSL@!Zb$7K6R~zs3t`1iVBkxbo!^r!-8%AE6 z2qW(sSr~b3B8q;o82(-Za0j)*$pG_b~%C<>oD?WH;lYbhA`sW#fx>I_;&GP9VotCyjTZ{ zZx=7tq~OImjQDo(VjU=+UA$Naif0!u)`8;L#fvp5c(D#6o?X0H2a0DGFV=zL*~N=> zpm=uiVjU=+UA$P6f*0#B;@QQ8wLtOg;=)>>cy@7NEl@nWxUfbFUaZ52XBQ<%pm=t1 zVJ%QRyST6xD4tzhSPK--E=mwmP=bUJ&n_;k1&U`EB}kxnc5z`XP&~W1um%b*tc4NH zZbJzYD4N{{R|^!)ZbJzYD4N}d5+qPGyA7^J3QCYLqS|gy+u&+}qSak#3u}Sm+eHZyD85~kAc5lB zMF|opzFl`UURdKnC_%!AZ`WO|p#%vB`I7-9NTB$W0VN12?rOZS7AXGQbysUBLBc_v zU6de!;@L$B5-6TscQsyE<3a9fys#E1{MC2ILNb$3u}Sm*~NvmK+){-!Wt;NuogyiyS%U#D4N}lFBb)hW|tS%0!6dS z3u~mftMS5Gp!jy()p%ho9HiOhg|$G@?DE1|pm=uO)p%iz2f3^9!djqsc5z`XP&~W1 zuoftuU0hfr#a)dT)&j+|iwkRk;@icAwLtOh;=)>>_;zt&jTCn^URVnh-!3k!1&VLi zU5yvk!a@FI@Lr7<)_9P+8ZWE`if`9ljThF!LH^uzSL21XaFA!$do^BI<3YHv7Djx# zxUd!|o?Uk}URVnUd3L>5eO4pm=uO)w=g;-CeDFuh!kwy1QEUUajNHMO>Y`T6b6L-m7(YweG!IcUSA) zs||d)2zwd$a#0x3C&R!U$UxET4tKSIUhO}>7qbSwTogv$ztzIXYZDxStA&xb`3WQM zcEiZ~)AKO$Za0j)?+jt&ecufu@84=+#J7uHjTE?A81e1G)dI!0i(V~Ie7oq?0>!tB zUM)~OyKps9(5r!h7UM)~OySM`x zD4t!oTA+A#;cBGd4rCbd?84Oo#j}e$kb&aa#U02%@$ACYNI|a_Mm)Rd)dIz{i#w2k z;@QO=$UyPz!qo!BvkO-v1$Q9Bh-VkB7AT%wxLTlicHwG);@O3(kpfo>Bc5HjTA+A# zaR)L`JiBnUK=JIt)dIz{3s)lrt`!fnR|^!+E?f9mqiO?4nl-6wfa1Kn99u*IjL*SK~qMY7@O$pm=uCs|AW@7k3~7#j}e$ zkfgY)O?S14U)mQA^6a8l3lz^TdbL3D?4nl-6wj`^+C;C$gWT07dbL3D?4nl-6wfYt zwLtOgqE{ouU2VFnP4sHvAkQv(wLtOgqE`zP&n|kkK+)_jceRCH4F@fEwS`_SP&B&> zy;`7Xc9*-_a#vgUrG5M??rO_jZMmy0+<^?A#kY&^s|AX07vEP46n!!*@73-vqxs>N zpMLxG=fC{v_y7I*M;_#^w%pa0yV~+zZMmy0@70#O+VWm)xvMRAwdJn1+|`!%YRg@1 zd9SwI)t2{a%Ux}GueRLPmb=<=S6l9C%X_uuuC}~aTkdMhd$r}Rw%pa0yV`PBTi&ZJ zceUlc+HzN0-m5KlwdK9qa#vgKYRg@1xvMSj)t0;3@?LGZt1a)n4?NWTZ z>S~wb+f`S)eXnmnYwM+5sYOi)FzFl>-{h_XQNBqg4z1pStlRn4?NWTZ>S~wb+f`S)6yL78+Wt^iyCeQ&P*=MY ze=@+;{_At=>Qde(Ll}9t8%EwILl}9t8%EwI14rO$VdULz7!tBUM)~OyXe&d#j}fEjTE?A81d}F)dIz{3s(yi z&n|kkK=JINR|^!+E?kWixLO$T?84Oo#j^`n3lz^TTrE&MyKps73S2FWXm$%+El@PO z1+Eq-n%x3d3lz<6fvW|IX1BoANP(+`5zTIas|AW?x4_i`MYCJrYJsBJEpRnb;A&yS zvkO-X6wfYPEl@nWaJ4}3?84Oo#j^`nBL%J&Mm)Q4wLtOg!qo!BvkO-X6wfYPjTE?A z81d}F)dIz{3s(yi&n{doP&~VEwLtOg!qrHDtA!EIE?g~8JiG2{1-)80$hV7LEl_;B z?rH_S8V^FR7Dha~xC0p|o?Umff?h2g#MX$z#+|??2wLtOgqE`zP&n|kkK=JIN zS0lw;t-7mK^lITC&#w1s)m^Q+t5tkoEqq+xF21iuio05MSF7%76?Y)RXYuUf4rHKs zcHPyg_iEK$t-7mKceU!SR=roN?rPP0wd$@`y;rO5YSnwS>aJGZ)vCK%byut2t5tWk z>b+WZSF7HuRd==Ou2$XEs=HeCUah*TRqxfRyIS>Lt-7mK@70>ST60%x?rP0lt$DB3 z+|`=*YRz4(d9T*o)tb9nb60EbYR!AK=C0PfS8MKS&3m=xuGYL)Ywl{zU9GvRHFve< zy;^fuYu>9hceUodT60%x?rIIL_TS#?T@9`lM&9?`F!H{Ug^}0Q!pQqZ7DirI3nTCQ zZWwu8jU#ZiF!J_lVdQOo!iaAdy;`97cG0T^if~^>sDR8wg zqS@_mwLsDAcDP!gXm&eXEl@PO9j-o zR~zWn0!6bs(5nTCW_O@h3lz=nK(9uMyV`J98|c--L7LrxUM)~Gy92#iplEgndbL3D z?7FKB^lCiFU2ULO3lz^TdbL3D?4nl-6wfYtHB#KwhP&EuR~znX19u?9$Mx*u4rHKs zc5w$XP&~WtYQtS^;0`1|i@VzJUTwIm4e!;4yV~$xZMdrqceUZJHr&;Q_iDpkZFsLX z+|`EnYQtS^c&|3x)rPy;a911dYQuZA;jT8kR~znX!+W*it~T7&hP&EuR~z1|4R^KS zz1nbB8{Vr8ceUZY+HhAJ?rOtbZMdrq@70F8+VEa&xT_8C)rPy;a911dYQtS^c&|3x z)rR+K!(DB7uQuG(hWBdIU2VFnO?S2Ft~R|_o9=4Ud$sAVHoaGy?rPIrZMv&XceUxg z+H_Z&-m6Wx+JAqqy-oCLVdS-mF!H|das;jxM&4d6jJ(}M7D85~|TA+A#;cBG7)xwBp7p@j4o?W>#nxYtMMRrwS`_SQ2fb&JCK3mPX^q9 z3>1Gd;0`1y?rO_jZJ}2S2YGhUs|AW@7rk1bcy`gN1&U|aU2UOP<3a9f3%y#Pcy`gN z1&U`Ey;`7ncG0Vm;;y#b)fRfSaFAygy;`7ncG0T^if0$STA+A#-PIO)H6G-yw$Q5u zif0$STA+A#(W?cDXBWL1Deh{^U2UOP3kP|2(W?cDXBWL%pm=uCs|AW@*IjL)SK~qM zY74zupm=uCs|AW@7rk1bcy`gNfpY&VVL$xx({I22{Fgud{=Yx}7)I14?tibzDVp8; zAJ=h;W_P1k3;#r#-Hl!?P&B*SU2UUR<3a9f8@*bf_;%5&1&U^OyQ^(?wT_o~|4tG$#vV%xnV zmfbsI+r1-}-8*92y(5<0J7U|tBbMDe;@gF*al3G}Fyh;Vs|AX07p@j4zFoLlp!jy- zYNWu`!iZ-Vt`;bsUAS7Hcy{4xf#TU!SGzx3@$9Or-5~97AT%w^lE|P*+s7wD4tz)wfjRq->$marTBK$ z)h@-itFCq_zFl>-OY!ZhtKFZM`gYN)@lRS^?H=S$26eSd@h5}2+NJoDL0#?s9NFK< z(5rEE>T35O&#t=KrFeGL)h@-etFCq_o?Ugd`%`+)E_yZo0jR6pgFL(HYM0{KRad(d z&#t=KZ7)2#=+(G7b+vntXIEYAQarorYM0{KRad(d&2Dj5EADE=U9GsQ6?e7bu2$UD zio05IS1ayn#a*qqs}=NWh<9;UE9lh%MYCJbs|AW?x1d)G6wj`^T0yVIgWS~$dbL2& z=Wao-7AX4ME!wNy8djgX#a*qqs}*;(;;vTQ)e7!Fa&_)%1$Q6=#j}e$kb&aabyq9y zYQn)ch9c&YM0{K)n4sVJiFSf-TMLh&QRRdio05IS1ayn#a*qqs}*;(;;vTQ z)rz}XaaSwuYQ}fTgz-r;-{C+o_oQr!nIlqAnC#TiI$@wi#I618rPEHAjlhbP9#NRHg#uQjBocP;? z)k2EDU05xo_}himLW;j#SdA&LS~&6U!fGMKy9=v@6z?vq7E-*suv$p*?!szJfz`r^ zcNbO*Dc)UJEu?sNVYQIr-G$Yd0;`1+?=GwsQoOscT1fHk!fGMKy9=v@6z?vq#uQjB zoOpL(wUFZ7h1EiecNbO*Dc)UJ4JkLQ7EZLgH>?&?w7WN~7E-jkH>?&?w7WN~7E-jk zH>}1KSS_4rcW+oNq-b|}ny!yX#iF-D)>>HU3-NYBzSZkm7F_yIM%`w~JjZr1;x)tKHbu_#n61ja@CI z_$LE)wUFYU4A|8|ihnX-S7VA>?RKl(*ww;={F4E@T1fHkVpj_(-d*f!A;r7vR=csQ z@j-628@pOa@$O<*3n|`R>}ny!yNg|oDQ>mft#)Hq3lH+{Vpj_(-d*f!A;r6kT`i<& zcgw9-*wyf$a;p_~wUDCSE$nI`MY~(r)k2DPx3H@*#jRFuwZg6z9;DqZ>}nxJyIa`R zLW*{`u&ad>@2*>|u&ePwZneU$7E-*s*wsRccNe=_Nb&AsS7VA>t=wvbT`fGwyNg{d zqkR&KR&tCd@=d{-;C zTKTS4Zng4Vt=wwmyV`cEZMWKXt8KU1_FZke)wb_y+pV^JSKDs2?N-}vwe42hzN>Aw z+V)*-yVbVuYTK>0eOKFVwe42hZnf=J+rF!9x7zkyZM)UB?`qqvw%uymt+w52+jq6? zR@=U-ZMWL?U2VJ7wp(qx)wWx0yVbT^ZM)UBTW$NUw%uwQyV@`3FHASA#uHdAoSch$ zI5{`raB_an5Khi-al*;@JwrG-B^*x9?-|0$DdBK(en-X=SS_6R+lAFaioac0Eu{F{ zh1Eiezg<`@r1;x~)tCaSg%j^CtQJzdyRceF@$SNEA;r53t1$&u3n$)PSS_S@cVV@V z;@ySSLW*}6RtqWKU097Nuv$3r?!syz#k&iug%s~DtQJzdyRaHk9#}1$Xm=l2Eu?67 zA6PA}uh}-!68wkm7F_yIM%`w~JjZ zr1&R;TkXNF#s|689_(r%#XlLatA!N*WWcT#Qv8zvyBbs6YL8p(!LAk_e2`o1!LAlkyt~-dLW*}6yIM%`?qXMCid*e*t3BA&!h^iK z*wsRccNe=_Nb&AsR|_fH-FB-rb~QYx-D-_pEu?678@pOa(e5^OwUDCSZR~1HajUgk zt+A_x2WfX3yIM%m?lyL{kfPmf>}ny!yX#hK>}q_FTdlFHg%s~DcD0b=-NmjJQoOs^ z)tKT|YqwfsR|^mF?qXL9Dc)V|Y9YnDi(M_Gcz4}uja`ioa;r6VwUFZ7#jX}oyt~-d zLW+0St=4X}cB{2pt??U3{vWy38oz-IDgJix8_1C2Zx_FTWQtp@-D>StYqwhauGVg~ z_Fb*rYVEsPyVcsQ)^4?StF`ZH?N)2w)!MDrzN@uct$kN(w_3Z^+O5`Zwf0@D-D>T- zTD#TSceQq_wOg&-YVB5Q-__c!*1oH?TdjRpYqwhat~PG9ajT76ZQN?(yV|(b#&@-G ztBvn!<5nBD+PKxmtv0@^jazMeR~xt5_^vi?weekT+-l=i8@JlH)y8+VajT8*YU5TL z-_^#gHg2_XtBqT2d{-N{+W4+EZng1UZQN?(yV|(b#;rDPwQ;MB?`q>#8{gH&tv0@^ zjazNpYU5TLx7xVX#;rDPweekT+-iee?bq`crUR?-1Xc?t=O!FZ&c!{PoZsSvlkbzDgJh0wUFX(7gh@?{&rzC zrod|9#JdZtg%s~DtQJzdyRceF@$SNEOo7$HiFX%P3n|`RSS_S@cVV@V;@ySSLW*|x z^B0}J{rUatuRnhO;otv$`w1y8tQJnRyPrRt>krcIe*RRar)YOSe>Blkw7Z`_U#BUs z8ebi(7Eb)_!fGMK-!7~cQvB`0Y9YnnF095BSS_6BC&LS?g%s~DtQJzdyRceF@$SNE zA;r53t1$&u3n$)PSS_S@cVV@V;@ySSLW*}6R$~gR7EZjouv$p*?!syz#k&iug%s~D ztQJzdyRaHlV6|}K-G$Xcigy=Q3n|`RSS_S@cin2QTkXZJ#(xWTwQ%Cy#jX}oyt~-d zLW*}6yIM%`?z+`p>}q_FTkXZJ7E-*s*wsRccNe=_Nb&AsS7VA>?RBfY*ww;=yt~-d zLW*}6yIM%`?qXL9Dc)VT+KXL{4|1!$*wsRccNe=_Nb&AsR|_fLUF>TArTqBy{qL`D LpFf5DKUn(*c!PlY literal 0 HcmV?d00001 From 303bf97289d09a5024929f7ffa256e51cdd469bb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 27 Aug 2021 13:48:46 +0300 Subject: [PATCH 020/472] Backport #28213 to 21.9: Fix reading of custom TLD w/o new line at EOF --- src/Common/TLDListsHolder.cpp | 3 ++- tests/config/config.d/top_level_domains_lists.xml | 1 + tests/config/top_level_domains/no_new_line_list.dat | 1 + tests/queries/0_stateless/01601_custom_tld.reference | 4 ++++ tests/queries/0_stateless/01601_custom_tld.sql | 5 +++++ 5 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 tests/config/top_level_domains/no_new_line_list.dat diff --git a/src/Common/TLDListsHolder.cpp b/src/Common/TLDListsHolder.cpp index 34bef8248b5a..db0a762f8267 100644 --- a/src/Common/TLDListsHolder.cpp +++ b/src/Common/TLDListsHolder.cpp @@ -64,7 +64,8 @@ size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::s while (!in.eof()) { readEscapedStringUntilEOL(line, in); - ++in.position(); + if (!in.eof()) + ++in.position(); /// Skip comments if (line.size() > 2 && line[0] == '/' && line[1] == '/') continue; diff --git a/tests/config/config.d/top_level_domains_lists.xml b/tests/config/config.d/top_level_domains_lists.xml index 7b5e6a5638a1..a10cbae1b43e 100644 --- a/tests/config/config.d/top_level_domains_lists.xml +++ b/tests/config/config.d/top_level_domains_lists.xml @@ -1,5 +1,6 @@ public_suffix_list.dat + no_new_line_list.dat diff --git a/tests/config/top_level_domains/no_new_line_list.dat b/tests/config/top_level_domains/no_new_line_list.dat new file mode 100644 index 000000000000..4d5f9756e551 --- /dev/null +++ b/tests/config/top_level_domains/no_new_line_list.dat @@ -0,0 +1 @@ +foo.bar \ No newline at end of file diff --git a/tests/queries/0_stateless/01601_custom_tld.reference b/tests/queries/0_stateless/01601_custom_tld.reference index 04204ebf02a5..ee326a778349 100644 --- a/tests/queries/0_stateless/01601_custom_tld.reference +++ b/tests/queries/0_stateless/01601_custom_tld.reference @@ -28,3 +28,7 @@ foo -- vector xx.blogspot.co.at +-- no new line +foo.bar +a.foo.bar +foo.baz diff --git a/tests/queries/0_stateless/01601_custom_tld.sql b/tests/queries/0_stateless/01601_custom_tld.sql index ceb00d5ff197..92ce28828f8e 100644 --- a/tests/queries/0_stateless/01601_custom_tld.sql +++ b/tests/queries/0_stateless/01601_custom_tld.sql @@ -37,3 +37,8 @@ select cutToFirstSignificantSubdomainCustom('http://www.foo', 'public_suffix_lis select '-- vector'; select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1); select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1); + +select '-- no new line'; +select cutToFirstSignificantSubdomainCustom('foo.bar', 'no_new_line_list'); +select cutToFirstSignificantSubdomainCustom('a.foo.bar', 'no_new_line_list'); +select cutToFirstSignificantSubdomainCustom('a.foo.baz', 'no_new_line_list'); From adfe9642c2c165fa979ca247aca1e4f464f9f60b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 27 Aug 2021 13:49:51 +0300 Subject: [PATCH 021/472] Backport #28190 to 21.9: More correct list watches semantics in ClickHouse Keeper --- src/Coordination/KeeperStorage.cpp | 42 ++++++++---- .../test_keeper_back_to_back/test.py | 66 ++++++++++++++++++- 2 files changed, 95 insertions(+), 13 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 320754c7d312..46905c53113c 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -151,19 +151,39 @@ static KeeperStorage::ResponsesForSessions processWatchesImpl(const String & pat } auto parent_path = parentPath(path); - it = list_watches.find(parent_path); - if (it != list_watches.end()) + + Strings paths_to_check_for_list_watches; + if (event_type == Coordination::Event::CREATED) { - std::shared_ptr watch_list_response = std::make_shared(); - watch_list_response->path = parent_path; - watch_list_response->xid = Coordination::WATCH_XID; - watch_list_response->zxid = -1; - watch_list_response->type = Coordination::Event::CHILD; - watch_list_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); + paths_to_check_for_list_watches.push_back(parent_path); /// Trigger list watches for parent + } + else if (event_type == Coordination::Event::DELETED) + { + paths_to_check_for_list_watches.push_back(path); /// Trigger both list watches for this path + paths_to_check_for_list_watches.push_back(parent_path); /// And for parent path + } + /// CHANGED event never trigger list wathes - list_watches.erase(it); + for (const auto & path_to_check : paths_to_check_for_list_watches) + { + it = list_watches.find(path_to_check); + if (it != list_watches.end()) + { + std::shared_ptr watch_list_response = std::make_shared(); + watch_list_response->path = path_to_check; + watch_list_response->xid = Coordination::WATCH_XID; + watch_list_response->zxid = -1; + if (path_to_check == parent_path) + watch_list_response->type = Coordination::Event::CHILD; + else + watch_list_response->type = Coordination::Event::DELETED; + + watch_list_response->state = Coordination::State::CONNECTED; + for (auto watcher_session : it->second) + result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); + + list_watches.erase(it); + } } return result; } diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 64f2f42d71e5..f73b46717988 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -218,6 +218,10 @@ def fake_callback(event): print("Fake data", fake_data_watch_data) assert genuine_data_watch_data == fake_data_watch_data + + genuine_zk.create("/test_data_watches/child", b"a") + fake_zk.create("/test_data_watches/child", b"a") + genuine_children = None def genuine_child_callback(event): print("Genuine child watch called") @@ -233,16 +237,74 @@ def fake_child_callback(event): genuine_zk.get_children("/test_data_watches", watch=genuine_child_callback) fake_zk.get_children("/test_data_watches", watch=fake_child_callback) + print("Calling non related genuine child") + genuine_zk.set("/test_data_watches/child", b"q") + genuine_zk.set("/test_data_watches", b"q") + + print("Calling non related fake child") + fake_zk.set("/test_data_watches/child", b"q") + fake_zk.set("/test_data_watches", b"q") + + time.sleep(3) + + assert genuine_children == None + assert fake_children == None + print("Calling genuine child") - genuine_zk.create("/test_data_watches/child", b"b") + genuine_zk.create("/test_data_watches/child_new", b"b") print("Calling fake child") - fake_zk.create("/test_data_watches/child", b"b") + fake_zk.create("/test_data_watches/child_new", b"b") time.sleep(3) print("Genuine children", genuine_children) print("Fake children", fake_children) assert genuine_children == fake_children + + genuine_children_delete = None + def genuine_child_delete_callback(event): + print("Genuine child watch called") + nonlocal genuine_children_delete + genuine_children_delete = event + + fake_children_delete = None + def fake_child_delete_callback(event): + print("Fake child watch called") + nonlocal fake_children_delete + fake_children_delete = event + + genuine_child_delete = None + def genuine_own_delete_callback(event): + print("Genuine child watch called") + nonlocal genuine_child_delete + genuine_child_delete = event + + fake_child_delete = None + def fake_own_delete_callback(event): + print("Fake child watch called") + nonlocal fake_child_delete + fake_child_delete = event + + genuine_zk.get_children("/test_data_watches", watch=genuine_child_delete_callback) + fake_zk.get_children("/test_data_watches", watch=fake_child_delete_callback) + genuine_zk.get_children("/test_data_watches/child", watch=genuine_own_delete_callback) + fake_zk.get_children("/test_data_watches/child", watch=fake_own_delete_callback) + + print("Calling genuine child delete") + genuine_zk.delete("/test_data_watches/child") + print("Calling fake child delete") + fake_zk.delete("/test_data_watches/child") + + time.sleep(3) + + print("Genuine children delete", genuine_children_delete) + print("Fake children delete", fake_children_delete) + assert genuine_children_delete == fake_children_delete + + print("Genuine child delete", genuine_child_delete) + print("Fake child delete", fake_child_delete) + assert genuine_child_delete == fake_child_delete + finally: for zk in [genuine_zk, fake_zk]: stop_zk(zk) From 9e03e81831c60f131694b3570f5d12872b624ab7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 27 Aug 2021 15:44:55 +0300 Subject: [PATCH 022/472] Backport #28178 to 21.9: Projection bug fixes and refactoring. --- programs/local/LocalServer.cpp | 3 + programs/server/Server.cpp | 6 +- src/Interpreters/InterpreterCreateQuery.cpp | 9 +- src/Interpreters/InterpreterCreateQuery.h | 2 +- .../MergeTreeBaseSelectProcessor.cpp | 4 +- src/Storages/MergeTree/MergeTreeData.cpp | 93 +++++++---- src/Storages/MergeTree/MergeTreeData.h | 3 + .../MergeTree/MergeTreeDataMergerMutator.cpp | 34 ++-- .../MergeTree/MergeTreeDataPartInMemory.cpp | 36 ++++ .../MergeTree/MergeTreeDataSelectExecutor.cpp | 14 +- .../MergeTree/MergeTreeDataWriter.cpp | 155 +++++++++++------- src/Storages/MergeTree/MergeTreeDataWriter.h | 28 +++- .../MergeTree/MergeTreeWriteAheadLog.cpp | 14 +- .../MergeTree/MergeTreeWriteAheadLog.h | 2 + .../MergeTree/MergedBlockOutputStream.h | 1 + src/Storages/ProjectionsDescription.cpp | 24 +++ src/Storages/ProjectionsDescription.h | 2 + src/Storages/SelectQueryInfo.h | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 3 + src/Storages/System/StorageSystemParts.cpp | 11 +- 20 files changed, 312 insertions(+), 133 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 44e9880fabb5..6d2cac41692c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -269,6 +269,9 @@ try /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); + /// We load temporary database first, because projections need it. + DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase(); + /** Init dummy default DB * NOTE: We force using isolated default database to avoid conflicts with default database from server environment * Otherwise, metadata of temporary File(format, EXPLICIT_PATH) tables will pollute metadata/ directory; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 5ae0e905b53c..e3ec6feb1a50 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1095,15 +1095,15 @@ if (ThreadFuzzer::instance().isEffective()) try { + auto & database_catalog = DatabaseCatalog::instance(); + /// We load temporary database first, because projections need it. + database_catalog.initializeAndLoadTemporaryDatabase(); loadMetadataSystem(global_context); /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); - auto & database_catalog = DatabaseCatalog::instance(); /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper); - /// We load temporary database first, because projections need it. - database_catalog.initializeAndLoadTemporaryDatabase(); /// Then, load remaining databases loadMetadata(global_context, default_database); database_catalog.loadDatabases(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 76cb6c783bab..a1313a84c36f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -550,7 +550,7 @@ ConstraintsDescription InterpreterCreateQuery::getConstraintsDescription(const A } -InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(ASTCreateQuery & create) const +InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const { TableProperties properties; TableLockHolder as_storage_lock; @@ -589,10 +589,13 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS auto as_storage_metadata = as_storage->getInMemoryMetadataPtr(); properties.columns = as_storage_metadata->getColumns(); - /// Secondary indices make sense only for MergeTree family of storage engines. + /// Secondary indices and projections make sense only for MergeTree family of storage engines. /// We should not copy them for other storages. if (create.storage && endsWith(create.storage->engine->name, "MergeTree")) + { properties.indices = as_storage_metadata->getSecondaryIndices(); + properties.projections = as_storage_metadata->getProjections().clone(); + } properties.constraints = as_storage_metadata->getConstraints(); } @@ -910,7 +913,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) } /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way. - TableProperties properties = setProperties(create); + TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create); DatabasePtr database; bool need_add_to_database = !create.temporary; diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 7bd3ef257462..92f2929ea7b4 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -74,7 +74,7 @@ class InterpreterCreateQuery : public IInterpreter, WithMutableContext BlockIO createTable(ASTCreateQuery & create); /// Calculate list of columns, constraints, indices, etc... of table. Rewrite query in canonical way. - TableProperties setProperties(ASTCreateQuery & create) const; + TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index c91d60c5de78..2f46543b03cc 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -287,7 +287,7 @@ static void injectVirtualColumnsImpl( { ColumnPtr column; if (rows) - column = DataTypeUUID().createColumnConst(rows, task->data_part->uuid)->convertToFullColumnIfConst(); + column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst(); else column = DataTypeUUID().createColumn(); @@ -306,7 +306,7 @@ static void injectVirtualColumnsImpl( else if (virtual_column_name == "_partition_value") { if (rows) - inserter.insertPartitionValueColumn(rows, task->data_part->partition.value, partition_value_type, virtual_column_name); + inserter.insertPartitionValueColumn(rows, part->partition.value, partition_value_type, virtual_column_name); else inserter.insertPartitionValueColumn(rows, {}, partition_value_type, virtual_column_name); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 7b07f4aba766..90bb0b593cf6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -751,16 +751,20 @@ DataTypePtr MergeTreeData::getPartitionValueType() const } -Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part) const +Block MergeTreeData::getSampleBlockWithVirtualColumns() const { DataTypePtr partition_value_type = getPartitionValueType(); - bool has_partition_value = typeid_cast(partition_value_type.get()); - Block block{ + return { ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_part"), ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_partition_id"), ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared(), "_part_uuid"), ColumnWithTypeAndName(partition_value_type->createColumn(), partition_value_type, "_partition_value")}; +} + +Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part) const +{ + auto block = getSampleBlockWithVirtualColumns(); MutableColumns columns = block.mutateColumns(); auto & part_column = columns[0]; @@ -768,6 +772,7 @@ Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPar auto & part_uuid_column = columns[2]; auto & partition_value_column = columns[3]; + bool has_partition_value = typeid_cast(partition_value_column.get()); for (const auto & part_or_projection : parts) { const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get(); @@ -3319,7 +3324,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector( { for (const auto & part : range) { - for (const auto & [p_name, projection_part] : part->getProjectionParts()) + for (const auto & [_, projection_part] : part->getProjectionParts()) res.push_back(projection_part); } } @@ -4005,6 +4010,10 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( if (auto * select = query_ptr->as(); select && select->final()) return false; + // Currently projections don't support sampling yet. + if (settings.parallel_replicas_count > 1) + return false; + InterpreterSelectQuery select( query_ptr, query_context, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreProjections().ignoreAlias()); const auto & analysis_result = select.getAnalysisResult(); @@ -4048,13 +4057,13 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( candidate.remove_where_filter = analysis_result.remove_where_filter; candidate.before_where = analysis_result.before_where->clone(); - required_columns = candidate.before_where->foldActionsByProjection( + auto new_required_columns = candidate.before_where->foldActionsByProjection( required_columns, projection.sample_block_for_keys, candidate.where_column_name); - - if (required_columns.empty()) + if (new_required_columns.empty() && !required_columns.empty()) return false; + required_columns = std::move(new_required_columns); candidate.before_where->addAggregatesViaProjection(aggregates); } @@ -4068,33 +4077,35 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( for (const auto & column : prewhere_actions->getResultColumns()) required_columns.erase(column.name); - // Prewhere_action should not add missing keys. - prewhere_required_columns = prewhere_actions->foldActionsByProjection( - prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->prewhere_column_name, false); - - if (prewhere_required_columns.empty()) - return false; - candidate.prewhere_info->prewhere_actions = prewhere_actions; + { + // Prewhere_action should not add missing keys. + auto new_prewhere_required_columns = prewhere_actions->foldActionsByProjection( + prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->prewhere_column_name, false); + if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) + return false; + prewhere_required_columns = std::move(new_prewhere_required_columns); + candidate.prewhere_info->prewhere_actions = prewhere_actions; + } if (candidate.prewhere_info->row_level_filter) { auto row_level_filter_actions = candidate.prewhere_info->row_level_filter->clone(); - prewhere_required_columns = row_level_filter_actions->foldActionsByProjection( + auto new_prewhere_required_columns = row_level_filter_actions->foldActionsByProjection( prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->row_level_column_name, false); - - if (prewhere_required_columns.empty()) + if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) return false; + prewhere_required_columns = std::move(new_prewhere_required_columns); candidate.prewhere_info->row_level_filter = row_level_filter_actions; } if (candidate.prewhere_info->alias_actions) { auto alias_actions = candidate.prewhere_info->alias_actions->clone(); - prewhere_required_columns + auto new_prewhere_required_columns = alias_actions->foldActionsByProjection(prewhere_required_columns, projection.sample_block_for_keys, {}, false); - - if (prewhere_required_columns.empty()) + if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) return false; + prewhere_required_columns = std::move(new_prewhere_required_columns); candidate.prewhere_info->alias_actions = alias_actions; } required_columns.insert(prewhere_required_columns.begin(), prewhere_required_columns.end()); @@ -4113,11 +4124,20 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( return match; }; - for (const auto & projection : metadata_snapshot->projections) + auto virtual_block = getSampleBlockWithVirtualColumns(); + auto add_projection_candidate = [&](const ProjectionDescription & projection) { ProjectionCandidate candidate{}; candidate.desc = &projection; + auto sample_block = projection.sample_block; + auto sample_block_for_keys = projection.sample_block_for_keys; + for (const auto & column : virtual_block) + { + sample_block.insertUnique(column); + sample_block_for_keys.insertUnique(column); + } + if (projection.type == ProjectionDescription::Type::Aggregate && analysis_result.need_aggregate && can_use_aggregate_projection) { bool match = true; @@ -4125,7 +4145,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( // Let's first check if all aggregates are provided by current projection for (const auto & aggregate : select.getQueryAnalyzer()->aggregates()) { - const auto * column = projection.sample_block.findByName(aggregate.column_name); + const auto * column = sample_block.findByName(aggregate.column_name); if (column) { aggregates.insert(*column); @@ -4138,25 +4158,25 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( } if (!match) - continue; + return; // Check if all aggregation keys can be either provided by some action, or by current // projection directly. Reshape the `before_aggregation` action DAG so that it only - // needs to provide aggregation keys, and certain children DAG might be substituted by - // some keys in projection. + // needs to provide aggregation keys, and the DAG of certain child might be substituted + // by some keys in projection. candidate.before_aggregation = analysis_result.before_aggregation->clone(); - auto required_columns = candidate.before_aggregation->foldActionsByProjection(keys, projection.sample_block_for_keys); + auto required_columns = candidate.before_aggregation->foldActionsByProjection(keys, sample_block_for_keys); // TODO Let's find out the exact required_columns for keys. if (required_columns.empty() && (!keys.empty() && !candidate.before_aggregation->getRequiredColumns().empty())) - continue; + return; if (analysis_result.optimize_aggregation_in_order) { for (const auto & key : keys) { auto actions_dag = analysis_result.before_aggregation->clone(); - actions_dag->foldActionsByProjection({key}, projection.sample_block_for_keys); + actions_dag->foldActionsByProjection({key}, sample_block_for_keys); candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); } } @@ -4165,7 +4185,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( candidate.before_aggregation->reorderAggregationKeysForProjection(key_name_pos_map); candidate.before_aggregation->addAggregatesViaProjection(aggregates); - if (rewrite_before_where(candidate, projection, required_columns, projection.sample_block_for_keys, aggregates)) + if (rewrite_before_where(candidate, projection, required_columns, sample_block_for_keys, aggregates)) { candidate.required_columns = {required_columns.begin(), required_columns.end()}; for (const auto & aggregate : aggregates) @@ -4182,13 +4202,16 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( for (const auto & column : actions->getRequiredColumns()) required_columns.insert(column.name); - if (rewrite_before_where(candidate, projection, required_columns, projection.sample_block, {})) + if (rewrite_before_where(candidate, projection, required_columns, sample_block, {})) { candidate.required_columns = {required_columns.begin(), required_columns.end()}; candidates.push_back(std::move(candidate)); } } - } + }; + + for (const auto & projection : metadata_snapshot->projections) + add_projection_candidate(projection); // Let's select the best projection to execute the query. if (!candidates.empty()) @@ -4263,6 +4286,14 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( if (!selected_candidate) return false; + else if (min_sum_marks == 0) + { + /// If selected_projection indicated an empty result set. Remember it in query_info but + /// don't use projection to run the query, because projection pipeline with empty result + /// set will not work correctly with empty_result_for_aggregation_by_empty_set. + query_info.merge_tree_empty_result = true; + return false; + } if (selected_candidate->desc->type == ProjectionDescription::Type::Aggregate) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 02d1f5e264ea..2871c845ac8c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -782,6 +782,9 @@ class MergeTreeData : public IStorage, public WithMutableContext /// Return the partition expression types as a Tuple type. Return DataTypeUInt8 if partition expression is empty. DataTypePtr getPartitionValueType() const; + /// Construct a sample block of virtual columns. + Block getSampleBlockWithVirtualColumns() const; + /// Construct a block consisting only of possible virtual columns for part pruning. /// If one_part is true, fill in at most one part. Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part) const; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index b6d55828e857..ab774b95212d 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -88,6 +88,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_) future_part_type = std::min(future_part_type, part->getType()); } + /// NOTE: We don't support merging into an in-memory part yet. auto chosen_type = parts_.front()->storage.choosePartTypeOnDisk(sum_bytes_uncompressed, sum_rows); future_part_type = std::min(future_part_type, chosen_type); assign(std::move(parts_), future_part_type); @@ -2021,10 +2022,19 @@ void MergeTreeDataMergerMutator::writeWithProjections( std::map projection_parts; Block block; std::vector projection_squashes; + const auto & settings = context->getSettingsRef(); for (size_t i = 0, size = projections_to_build.size(); i < size; ++i) { - projection_squashes.emplace_back(65536, 65536 * 256); + // If the parent part is an in-memory part, squash projection output into one block and + // build in-memory projection because we don't support merging into a new in-memory part. + // Otherwise we split the materialization into multiple stages similar to the process of + // INSERT SELECT query. + if (new_data_part->getType() == MergeTreeDataPartType::IN_MEMORY) + projection_squashes.emplace_back(0, 0); + else + projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } + while (checkOperationIsNotCanceled(merge_entry) && (block = mutating_stream->read())) { if (minmax_idx) @@ -2035,26 +2045,10 @@ void MergeTreeDataMergerMutator::writeWithProjections( for (size_t i = 0, size = projections_to_build.size(); i < size; ++i) { const auto & projection = projections_to_build[i]->projection; - auto in = InterpreterSelectQuery( - projection.query_ast, - context, - Pipe(std::make_shared(block, Chunk(block.getColumns(), block.rows()))), - SelectQueryOptions{ - projection.type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns : QueryProcessingStage::WithMergeableState}) - .execute() - .getInputStream(); - in = std::make_shared(in, block.rows(), std::numeric_limits::max()); - in->readPrefix(); - auto & projection_squash = projection_squashes[i]; - auto projection_block = projection_squash.add(in->read()); - if (in->read()) - throw Exception("Projection cannot increase the number of rows in a block", ErrorCodes::LOGICAL_ERROR); - in->readSuffix(); + auto projection_block = projection_squashes[i].add(projection.calculate(block, context)); if (projection_block) - { - projection_parts[projection.name].emplace_back( - MergeTreeDataWriter::writeTempProjectionPart(data, log, projection_block, projection, new_data_part.get(), ++block_num)); - } + projection_parts[projection.name].emplace_back(MergeTreeDataWriter::writeTempProjectionPart( + data, log, projection_block, projection, new_data_part.get(), ++block_num)); } merge_entry->rows_written += block.rows(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index e929bfc68620..635da7e2ede7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -94,6 +94,42 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec); out.writePrefix(); out.write(block); + const auto & projections = metadata_snapshot->getProjections(); + for (const auto & [projection_name, projection] : projection_parts) + { + if (projections.has(projection_name)) + { + String projection_destination_path = fs::path(destination_path) / projection_name / ".proj"; + if (disk->exists(projection_destination_path)) + { + throw Exception( + ErrorCodes::DIRECTORY_ALREADY_EXISTS, + "Could not flush projection part {}. Projection part in {} already exists", + projection_name, + fullPath(disk, projection_destination_path)); + } + + auto projection_part = asInMemoryPart(projection); + auto projection_type = storage.choosePartTypeOnDisk(projection_part->block.bytes(), rows_count); + MergeTreePartInfo projection_info("all", 0, 0, 0); + auto projection_data_part + = storage.createPart(projection_name, projection_type, projection_info, volume, projection_name + ".proj", parent_part); + projection_data_part->is_temp = false; // clean up will be done on parent part + projection_data_part->setColumns(projection->getColumns()); + + disk->createDirectories(projection_destination_path); + const auto & desc = projections.get(name); + auto projection_compression_codec = storage.getContext()->chooseCompressionCodec(0, 0); + auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices()); + MergedBlockOutputStream projection_out( + projection_data_part, desc.metadata, projection_part->columns, projection_indices, projection_compression_codec); + projection_out.writePrefix(); + projection_out.write(projection_part->block); + projection_out.writeSuffixAndFinalizePart(projection_data_part); + new_data_part->addProjectionPart(projection_name, std::move(projection_data_part)); + } + } + out.writeSuffixAndFinalizePart(new_data_part); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index f5c1890154a6..004eaa6254cd 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -132,6 +132,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( QueryProcessingStage::Enum processed_stage, std::shared_ptr max_block_numbers_to_read) const { + if (query_info.merge_tree_empty_result) + return std::make_unique(); + const auto & settings = context->getSettingsRef(); if (!query_info.projection) { @@ -181,7 +184,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( max_block_numbers_to_read, query_info.projection->merge_tree_projection_select_result_ptr); - if (plan) + if (plan->isInitialized()) { // If `before_where` is not empty, transform input blocks by adding needed columns // originated from key columns. We already project the block at the end, using @@ -237,7 +240,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( ordinary_query_plan.addStep(std::move(where_step)); } - ordinary_pipe = QueryPipeline::getPipe(interpreter.execute().pipeline); + ordinary_pipe = ordinary_query_plan.convertToPipe( + QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); } if (query_info.projection->desc->type == ProjectionDescription::Type::Aggregate) @@ -351,12 +355,14 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( pipes.emplace_back(std::move(projection_pipe)); pipes.emplace_back(std::move(ordinary_pipe)); auto pipe = Pipe::unitePipes(std::move(pipes)); - pipe.resize(1); + auto plan = std::make_unique(); + if (pipe.empty()) + return plan; + pipe.resize(1); auto step = std::make_unique( std::move(pipe), fmt::format("MergeTree(with {} projection {})", query_info.projection->desc->type, query_info.projection->desc->name)); - auto plan = std::make_unique(); plan->addStep(std::move(step)); return plan; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 0b05650b42c5..180c18ed1b5f 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -386,31 +386,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( sync_guard = disk->getDirectorySyncGuard(full_path); } - if (metadata_snapshot->hasProjections()) - { - for (const auto & projection : metadata_snapshot->getProjections()) - { - auto in = InterpreterSelectQuery( - projection.query_ast, - context, - Pipe(std::make_shared(block, Chunk(block.getColumns(), block.rows()))), - SelectQueryOptions{ - projection.type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns : QueryProcessingStage::WithMergeableState}) - .execute() - .getInputStream(); - in = std::make_shared(in, block.rows(), std::numeric_limits::max()); - in->readPrefix(); - auto projection_block = in->read(); - if (in->read()) - throw Exception("Projection cannot grow block rows", ErrorCodes::LOGICAL_ERROR); - in->readSuffix(); - if (projection_block.rows()) - { - new_data_part->addProjectionPart(projection.name, writeProjectionPart(projection_block, projection, new_data_part.get())); - } - } - } - if (metadata_snapshot->hasRowsTTL()) updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); @@ -439,6 +414,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( out.writePrefix(); out.writeWithPermutation(block, perm_ptr); + + for (const auto & projection : metadata_snapshot->getProjections()) + { + auto projection_block = projection.calculate(block, context); + if (projection_block.rows()) + new_data_part->addProjectionPart( + projection.name, writeProjectionPart(data, log, projection_block, projection, new_data_part.get())); + } out.writeSuffixAndFinalizePart(new_data_part, sync_on_insert); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows()); @@ -449,18 +432,28 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( } MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl( - MergeTreeData & data, + const String part_name, + MergeTreeDataPartType part_type, + const String & relative_path, + bool is_temp, + const IMergeTreeDataPart * parent_part, + const MergeTreeData & data, Poco::Logger * log, Block block, - const StorageMetadataPtr & metadata_snapshot, - MergeTreeData::MutableDataPartPtr && new_data_part) + const StorageMetadataPtr & metadata_snapshot) { + MergeTreePartInfo new_part_info("all", 0, 0, 0); + auto new_data_part = data.createPart( + part_name, + part_type, + new_part_info, + parent_part->volume, + relative_path, + parent_part); + new_data_part->is_temp = is_temp; + NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); - MergeTreePartition partition{}; - IMergeTreeDataPart::MinMaxIndex minmax_idx{}; new_data_part->setColumns(columns); - new_data_part->partition = std::move(partition); - new_data_part->minmax_idx = std::move(minmax_idx); if (new_data_part->isStoredOnDisk()) { @@ -523,27 +516,41 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterUncompressedBytes, block.bytes()); ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterCompressedBytes, new_data_part->getBytesOnDisk()); - return std::move(new_data_part); + return new_data_part; } -MergeTreeData::MutableDataPartPtr -MergeTreeDataWriter::writeProjectionPart(Block block, const ProjectionDescription & projection, const IMergeTreeDataPart * parent_part) +MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPart( + MergeTreeData & data, Poco::Logger * log, Block block, const ProjectionDescription & projection, const IMergeTreeDataPart * parent_part) { - /// Size of part would not be greater than block.bytes() + epsilon - size_t expected_size = block.bytes(); - - // just check if there is enough space on parent volume - data.reserveSpace(expected_size, parent_part->volume); - String part_name = projection.name; - MergeTreePartInfo new_part_info("all", 0, 0, 0); - auto new_data_part = data.createPart( - part_name, data.choosePartType(expected_size, block.rows()), new_part_info, parent_part->volume, part_name + ".proj", parent_part); - new_data_part->is_temp = false; // clean up will be done on parent part + MergeTreeDataPartType part_type; + if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY) + { + part_type = MergeTreeDataPartType::IN_MEMORY; + } + else + { + /// Size of part would not be greater than block.bytes() + epsilon + size_t expected_size = block.bytes(); + // just check if there is enough space on parent volume + data.reserveSpace(expected_size, parent_part->volume); + part_type = data.choosePartTypeOnDisk(expected_size, block.rows()); + } - return writeProjectionPartImpl(data, log, block, projection.metadata, std::move(new_data_part)); + return writeProjectionPartImpl( + part_name, + part_type, + part_name + ".proj" /* relative_path */, + false /* is_temp */, + parent_part, + data, + log, + block, + projection.metadata); } +/// This is used for projection materialization process which may contain multiple stages of +/// projection part merges. MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempProjectionPart( MergeTreeData & data, Poco::Logger * log, @@ -552,24 +559,50 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempProjectionPart( const IMergeTreeDataPart * parent_part, size_t block_num) { - /// Size of part would not be greater than block.bytes() + epsilon - size_t expected_size = block.bytes(); - - // just check if there is enough space on parent volume - data.reserveSpace(expected_size, parent_part->volume); - String part_name = fmt::format("{}_{}", projection.name, block_num); - MergeTreePartInfo new_part_info("all", 0, 0, 0); - auto new_data_part = data.createPart( + MergeTreeDataPartType part_type; + if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY) + { + part_type = MergeTreeDataPartType::IN_MEMORY; + } + else + { + /// Size of part would not be greater than block.bytes() + epsilon + size_t expected_size = block.bytes(); + // just check if there is enough space on parent volume + data.reserveSpace(expected_size, parent_part->volume); + part_type = data.choosePartTypeOnDisk(expected_size, block.rows()); + } + + return writeProjectionPartImpl( part_name, - data.choosePartType(expected_size, block.rows()), - new_part_info, - parent_part->volume, - "tmp_insert_" + part_name + ".proj", - parent_part); - new_data_part->is_temp = true; // It's part for merge + part_type, + "tmp_insert_" + part_name + ".proj" /* relative_path */, + true /* is_temp */, + parent_part, + data, + log, + block, + projection.metadata); +} - return writeProjectionPartImpl(data, log, block, projection.metadata, std::move(new_data_part)); +MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeInMemoryProjectionPart( + const MergeTreeData & data, + Poco::Logger * log, + Block block, + const ProjectionDescription & projection, + const IMergeTreeDataPart * parent_part) +{ + return writeProjectionPartImpl( + projection.name, + MergeTreeDataPartType::IN_MEMORY, + projection.name + ".proj" /* relative_path */, + false /* is_temp */, + parent_part, + data, + log, + block, + projection.metadata); } } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index feb2f1e2b123..006f897c3e2e 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -49,9 +49,15 @@ class MergeTreeDataWriter MergeTreeData::MutableDataPartPtr writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context); - MergeTreeData::MutableDataPartPtr writeProjectionPart( - Block block, const ProjectionDescription & projection, const IMergeTreeDataPart * parent_part); + /// For insertion. + static MergeTreeData::MutableDataPartPtr writeProjectionPart( + MergeTreeData & data, + Poco::Logger * log, + Block block, + const ProjectionDescription & projection, + const IMergeTreeDataPart * parent_part); + /// For mutation: MATERIALIZE PROJECTION. static MergeTreeData::MutableDataPartPtr writeTempProjectionPart( MergeTreeData & data, Poco::Logger * log, @@ -60,15 +66,27 @@ class MergeTreeDataWriter const IMergeTreeDataPart * parent_part, size_t block_num); + /// For WriteAheadLog AddPart. + static MergeTreeData::MutableDataPartPtr writeInMemoryProjectionPart( + const MergeTreeData & data, + Poco::Logger * log, + Block block, + const ProjectionDescription & projection, + const IMergeTreeDataPart * parent_part); + Block mergeBlock(const Block & block, SortDescription sort_description, Names & partition_key_columns, IColumn::Permutation *& permutation); private: static MergeTreeData::MutableDataPartPtr writeProjectionPartImpl( - MergeTreeData & data, + const String part_name, + MergeTreeDataPartType part_type, + const String & relative_path, + bool is_temp, + const IMergeTreeDataPart * parent_part, + const MergeTreeData & data, Poco::Logger * log, Block block, - const StorageMetadataPtr & metadata_snapshot, - MergeTreeData::MutableDataPartPtr && new_data_part); + const StorageMetadataPtr & metadata_snapshot); MergeTreeData & data; diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 1fcd28b70e33..d8fb50a866c0 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ MergeTreeWriteAheadLog::MergeTreeWriteAheadLog( , name(name_) , path(storage.getRelativeDataPath() + name_) , pool(storage.getContext()->getSchedulePool()) + , log(&Poco::Logger::get(storage.getLogName() + " (WriteAheadLog)")) { init(); sync_task = pool.createTask("MergeTreeWriteAheadLog::sync", [this] @@ -172,8 +174,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor || e.code() == ErrorCodes::BAD_DATA_PART_NAME || e.code() == ErrorCodes::CORRUPTED_DATA) { - LOG_WARNING(&Poco::Logger::get(storage.getLogName() + " (WriteAheadLog)"), - "WAL file '{}' is broken. {}", path, e.displayText()); + LOG_WARNING(log, "WAL file '{}' is broken. {}", path, e.displayText()); /// If file is broken, do not write new parts to it. /// But if it contains any part rotate and save them. @@ -203,6 +204,15 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor part_out.writePrefix(); part_out.write(block); + + for (const auto & projection : metadata_snapshot->getProjections()) + { + auto projection_block = projection.calculate(block, context); + if (projection_block.rows()) + part->addProjectionPart( + projection.name, + MergeTreeDataWriter::writeInMemoryProjectionPart(storage, log, projection_block, projection, part.get())); + } part_out.writeSuffixAndFinalizePart(part); min_block_number = std::min(min_block_number, part->info.min_block); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index e01911aa8b88..8d1ea3c332e5 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -91,6 +91,8 @@ class MergeTreeWriteAheadLog bool sync_scheduled = false; mutable std::mutex write_mutex; + + Poco::Logger * log; }; } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index d04df5982185..4c36508ebf5e 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -34,6 +34,7 @@ class MergedBlockOutputStream final : public IMergedBlockOutputStream void writeSuffix() override; /// Finilize writing part and fill inner structures + /// If part is new and contains projections, they should be added before invoking this method. void writeSuffixAndFinalizePart( MergeTreeData::MutableDataPartPtr & new_part, bool sync = false, diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index dd48b23ecc3b..5fc44bc044fd 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -14,6 +14,7 @@ #include #include +#include namespace DB { @@ -23,6 +24,7 @@ namespace ErrorCodes extern const int NO_SUCH_PROJECTION_IN_TABLE; extern const int ILLEGAL_PROJECTION; extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; }; const char * ProjectionDescription::typeToString(Type type) @@ -192,6 +194,28 @@ void ProjectionDescription::recalculateWithNewColumns(const ColumnsDescription & *this = getProjectionFromAST(definition_ast, new_columns, query_context); } + +Block ProjectionDescription::calculate(const Block & block, ContextPtr context) const +{ + auto in = InterpreterSelectQuery( + query_ast, + context, + Pipe(std::make_shared(block, Chunk(block.getColumns(), block.rows()))), + SelectQueryOptions{ + type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns + : QueryProcessingStage::WithMergeableState}) + .execute() + .getInputStream(); + in = std::make_shared(in, block.rows(), 0); + in->readPrefix(); + auto ret = in->read(); + if (in->read()) + throw Exception("Projection cannot increase the number of rows in a block", ErrorCodes::LOGICAL_ERROR); + in->readSuffix(); + return ret; +} + + String ProjectionsDescription::toString() const { if (empty()) diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index fd505c4fe069..2b279c711fec 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -85,6 +85,8 @@ struct ProjectionDescription void recalculateWithNewColumns(const ColumnsDescription & new_columns, ContextPtr query_context); bool isPrimaryKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; + + Block calculate(const Block & block, ContextPtr context) const; }; /// All projections in storage diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index a4536e1ff582..a2db66552237 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -163,6 +163,7 @@ struct SelectQueryInfo std::optional projection; bool ignore_projections = false; bool is_projection_query = false; + bool merge_tree_empty_result = false; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 67a1ba7fc786..222a66bc4f65 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7526,6 +7526,9 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP out.writePrefix(); out.write(block); + /// TODO(ab): What projections should we add to the empty part? How can we make sure that it + /// won't block future merges? Perhaps we should also check part emptiness when selecting parts + /// to merge. out.writeSuffixAndFinalizePart(new_data_part, sync_on_insert); try diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 6a643dbe1b97..dba05d449690 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -75,7 +75,9 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"rows_where_ttl_info.expression", std::make_shared(std::make_shared())}, {"rows_where_ttl_info.min", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.max", std::make_shared(std::make_shared())} + {"rows_where_ttl_info.max", std::make_shared(std::make_shared())}, + + {"projections", std::make_shared(std::make_shared())}, } ) { @@ -253,6 +255,13 @@ void StorageSystemParts::processNextStorage( add_ttl_info_map(part->ttl_infos.group_by_ttl); add_ttl_info_map(part->ttl_infos.rows_where_ttl); + Array projections; + for (const auto & [name, _] : part->getProjectionParts()) + projections.push_back(name); + + if (columns_mask[src_index++]) + columns[res_index++]->insert(projections); + /// _state column should be the latest. /// Do not use part->getState*, it can be changed from different thread if (has_state_column) From c66de301333bfbc02192f0d94db01f3bdd04e2c3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 27 Aug 2021 15:45:17 +0300 Subject: [PATCH 023/472] Backport #28205 to 21.9: Fix insertion of fields with Infinity values in nullable low cardinality columns --- src/Columns/ColumnUnique.h | 2 +- .../0_stateless/2013_lc_nullable_and_infinity.reference | 4 ++++ tests/queries/0_stateless/2013_lc_nullable_and_infinity.sql | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/2013_lc_nullable_and_infinity.reference create mode 100644 tests/queries/0_stateless/2013_lc_nullable_and_infinity.sql diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index bfa80b5e3b21..72904c5ab8fc 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -301,7 +301,7 @@ size_t ColumnUnique::getNullValueIndex() const template size_t ColumnUnique::uniqueInsert(const Field & x) { - if (x.getType() == Field::Types::Null) + if (x.isNull()) return getNullValueIndex(); if (valuesHaveFixedSize()) diff --git a/tests/queries/0_stateless/2013_lc_nullable_and_infinity.reference b/tests/queries/0_stateless/2013_lc_nullable_and_infinity.reference new file mode 100644 index 000000000000..ef5038b2236f --- /dev/null +++ b/tests/queries/0_stateless/2013_lc_nullable_and_infinity.reference @@ -0,0 +1,4 @@ +0 \N + +0 \N +0 \N diff --git a/tests/queries/0_stateless/2013_lc_nullable_and_infinity.sql b/tests/queries/0_stateless/2013_lc_nullable_and_infinity.sql new file mode 100644 index 000000000000..c1c8a9c00b1a --- /dev/null +++ b/tests/queries/0_stateless/2013_lc_nullable_and_infinity.sql @@ -0,0 +1,3 @@ +set receive_timeout = '10', receive_data_timeout_ms = '10000', extremes = '1', allow_suspicious_low_cardinality_types = '1', force_primary_key = '1', join_use_nulls = '1', max_rows_to_read = '1', join_algorithm = 'partial_merge'; + +SELECT * FROM (SELECT dummy AS val FROM system.one) AS s1 ANY LEFT JOIN (SELECT toLowCardinality(dummy) AS rval FROM system.one) AS s2 ON (val + 9223372036854775806) = (rval * 1); From 17f2cbc1c6ea68689706ac56d1a6bc77a75817f1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 28 Aug 2021 03:48:59 +0300 Subject: [PATCH 024/472] Backport #28266 to 21.9: Fix order by for `Merge` tables with `optimize_read_in_order` --- src/Storages/StorageMerge.cpp | 5 +++-- .../02014_storage_merge_order_by.reference | 5 +++++ .../02014_storage_merge_order_by.sql | 22 +++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02014_storage_merge_order_by.reference create mode 100644 tests/queries/0_stateless/02014_storage_merge_order_by.sql diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index ed203db1a354..cbe4a287919f 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -338,9 +338,10 @@ Pipe StorageMerge::read( auto pipe = Pipe::unitePipes(std::move(pipes)); - if (!pipe.empty()) + if (!pipe.empty() && !query_info.input_order_info) // It's possible to have many tables read from merge, resize(num_streams) might open too many files at the same time. - // Using narrowPipe instead. + // Using narrowPipe instead. But in case of reading in order of primary key, we cannot do it, + // because narrowPipe doesn't preserve order. narrowPipe(pipe, num_streams); return pipe; diff --git a/tests/queries/0_stateless/02014_storage_merge_order_by.reference b/tests/queries/0_stateless/02014_storage_merge_order_by.reference new file mode 100644 index 000000000000..0bb816b39871 --- /dev/null +++ b/tests/queries/0_stateless/02014_storage_merge_order_by.reference @@ -0,0 +1,5 @@ +20 +20 +20 +20 +20 diff --git a/tests/queries/0_stateless/02014_storage_merge_order_by.sql b/tests/queries/0_stateless/02014_storage_merge_order_by.sql new file mode 100644 index 000000000000..5b9789ae1d97 --- /dev/null +++ b/tests/queries/0_stateless/02014_storage_merge_order_by.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS short; +DROP TABLE IF EXISTS long; +DROP TABLE IF EXISTS merged; + +CREATE TABLE short (e Int64, t DateTime ) ENGINE = MergeTree PARTITION BY e ORDER BY t; +CREATE TABLE long (e Int64, t DateTime ) ENGINE = MergeTree PARTITION BY (e, toStartOfMonth(t)) ORDER BY t; + +insert into short select number % 11, toDateTime('2021-01-01 00:00:00') + number from numbers(1000); +insert into long select number % 11, toDateTime('2021-01-01 00:00:00') + number from numbers(1000); + +CREATE TABLE merged as short ENGINE = Merge(currentDatabase(), 'short|long'); + +select sum(e) from (select * from merged order by t limit 10) SETTINGS optimize_read_in_order = 0; + +select sum(e) from (select * from merged order by t limit 10) SETTINGS max_threads = 1; +select sum(e) from (select * from merged order by t limit 10) SETTINGS max_threads = 3; +select sum(e) from (select * from merged order by t limit 10) SETTINGS max_threads = 10; +select sum(e) from (select * from merged order by t limit 10) SETTINGS max_threads = 50; + +DROP TABLE IF EXISTS short; +DROP TABLE IF EXISTS long; +DROP TABLE IF EXISTS merged; From 0af30830946a955d432085f6877f6a2f131601e1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 30 Aug 2021 14:02:41 +0300 Subject: [PATCH 025/472] Backport #28197 to 21.9: Fix rare case when watch response received before request response --- src/Coordination/KeeperStateMachine.cpp | 52 +++++++++++-------------- src/Coordination/KeeperStateMachine.h | 8 +++- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index a76b86a81712..431ad41c6d17 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -101,31 +101,26 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n { const Coordination::ZooKeeperSessionIDRequest & session_id_request = dynamic_cast(*request_for_session.request); int64_t session_id; - { - std::lock_guard lock(storage_lock); - session_id = storage->getSessionID(session_id_request.session_timeout_ms); - } - LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); - std::shared_ptr response = std::make_shared(); response->internal_id = session_id_request.internal_id; - response->session_id = session_id; response->server_id = session_id_request.server_id; - KeeperStorage::ResponseForSession response_for_session; response_for_session.session_id = -1; response_for_session.response = response; - responses_queue.push(response_for_session); + { + std::lock_guard lock(storage_and_responses_lock); + session_id = storage->getSessionID(session_id_request.session_timeout_ms); + LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); + response->session_id = session_id; + responses_queue.push(response_for_session); + } } else { - KeeperStorage::ResponsesForSessions responses_for_sessions; - { - std::lock_guard lock(storage_lock); - responses_for_sessions = storage->processRequest(request_for_session.request, request_for_session.session_id, log_idx); - for (auto & response_for_session : responses_for_sessions) - responses_queue.push(response_for_session); - } + std::lock_guard lock(storage_and_responses_lock); + KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session.request, request_for_session.session_id, log_idx); + for (auto & response_for_session : responses_for_sessions) + responses_queue.push(response_for_session); } last_committed_idx = log_idx; @@ -144,8 +139,8 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) latest_snapshot_ptr = latest_snapshot_buf; } - { - std::lock_guard lock(storage_lock); + { /// deserialize and apply snapshot to storage + std::lock_guard lock(storage_and_responses_lock); std::tie(latest_snapshot_meta, storage) = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr); } last_committed_idx = s.get_last_log_idx(); @@ -168,8 +163,8 @@ void KeeperStateMachine::create_snapshot( nuraft::ptr snp_buf = s.serialize(); auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); CreateSnapshotTask snapshot_task; - { - std::lock_guard lock(storage_lock); + { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. + std::lock_guard lock(storage_and_responses_lock); snapshot_task.snapshot = std::make_shared(storage.get(), snapshot_meta_copy); } @@ -191,7 +186,8 @@ void KeeperStateMachine::create_snapshot( { /// Must do it with lock (clearing elements from list) - std::lock_guard lock(storage_lock); + std::lock_guard lock(storage_and_responses_lock); + /// Turn off "snapshot mode" and clear outdate part of storage state storage->clearGarbageAfterSnapshot(); /// Destroy snapshot with lock snapshot.reset(); @@ -226,7 +222,7 @@ void KeeperStateMachine::save_logical_snp_obj( nuraft::ptr cloned_meta; if (obj_id == 0) { - std::lock_guard lock(storage_lock); + std::lock_guard lock(storage_and_responses_lock); KeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx()); cloned_buffer = snapshot_manager.serializeSnapshotToBuffer(snapshot); } @@ -286,24 +282,22 @@ int KeeperStateMachine::read_logical_snp_obj( void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) { - KeeperStorage::ResponsesForSessions responses; - { - std::lock_guard lock(storage_lock); - responses = storage->processRequest(request_for_session.request, request_for_session.session_id, std::nullopt); - } + /// Pure local request, just process it with storage + std::lock_guard lock(storage_and_responses_lock); + auto responses = storage->processRequest(request_for_session.request, request_for_session.session_id, std::nullopt); for (const auto & response : responses) responses_queue.push(response); } std::unordered_set KeeperStateMachine::getDeadSessions() { - std::lock_guard lock(storage_lock); + std::lock_guard lock(storage_and_responses_lock); return storage->getDeadSessions(); } void KeeperStateMachine::shutdownStorage() { - std::lock_guard lock(storage_lock); + std::lock_guard lock(storage_and_responses_lock); storage->finalize(); } diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index fb46f507baf3..c34981b1252d 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -81,8 +81,12 @@ class KeeperStateMachine : public nuraft::state_machine /// Mutex for snapshots std::mutex snapshots_lock; - /// Lock for storage - std::mutex storage_lock; + /// Lock for storage and responses_queue. It's important to process requests + /// and push them to the responses queue while holding this lock. Otherwise + /// we can get strange cases when, for example client send read request with + /// watch and after that receive watch response and only receive response + /// for request. + std::mutex storage_and_responses_lock; /// Last committed Raft log number. std::atomic last_committed_idx; From b4a4f2fbb3d502d90460deb670ef0fe1861fb5a3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 31 Aug 2021 00:09:09 +0300 Subject: [PATCH 026/472] Backport #28063 to 21.9: Set version of tzlocal to 2.1 --- docker/test/integration/runner/Dockerfile | 3 ++- docker/test/testflows/runner/Dockerfile | 2 +- tests/integration/README.md | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 4130fc101786..cb69a00fc63d 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -79,8 +79,9 @@ RUN python3 -m pip install \ pytest-timeout \ pytest-xdist \ pytest-repeat \ + pytz \ redis \ - tzlocal \ + tzlocal==2.1 \ urllib3 \ requests-kerberos \ pyhdfs diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index f170adf10471..81d431635b76 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -37,7 +37,7 @@ RUN apt-get update \ ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal python-dateutil numpy +RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 20.10.6 diff --git a/tests/integration/README.md b/tests/integration/README.md index 8c3536587051..ed96eafdef8e 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -38,7 +38,7 @@ sudo -H pip install \ pytest \ pytest-timeout \ redis \ - tzlocal \ + tzlocal==2.1 \ urllib3 \ requests-kerberos \ dict2xml \ From 7069e1915d2c2408e1315201beaee615bf1dc424 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 31 Aug 2021 14:12:14 +0300 Subject: [PATCH 027/472] Backport #28353 to 21.9: Another try to fix BackgroundPoolTask decrement. --- .../MergeTree/BackgroundJobsExecutor.cpp | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/BackgroundJobsExecutor.cpp b/src/Storages/MergeTree/BackgroundJobsExecutor.cpp index f3d957117e8e..7c784de9ebb6 100644 --- a/src/Storages/MergeTree/BackgroundJobsExecutor.cpp +++ b/src/Storages/MergeTree/BackgroundJobsExecutor.cpp @@ -84,28 +84,55 @@ bool incrementMetricIfLessThanMax(std::atomic & atomic_value, Int64 max_v } +/// This is a RAII class which only decrements metric. +/// It is added because after all other fixes a bug non-executing merges was occurred again. +/// Last hypothesis: task was successfully added to pool, however, was not executed because of internal exception in it. +class ParanoidMetricDecrementor +{ +public: + explicit ParanoidMetricDecrementor(CurrentMetrics::Metric metric_) : metric(metric_) {} + void alarm() { is_alarmed = true; } + void decrement() + { + if (is_alarmed.exchange(false)) + { + CurrentMetrics::values[metric]--; + } + } + + ~ParanoidMetricDecrementor() { decrement(); } + +private: + + CurrentMetrics::Metric metric; + std::atomic_bool is_alarmed = false; +}; + void IBackgroundJobExecutor::execute(JobAndPool job_and_pool) try { auto & pool_config = pools_configs[job_and_pool.pool_type]; const auto max_pool_size = pool_config.get_max_pool_size(); + auto metric_decrementor = std::make_shared(pool_config.tasks_metric); + /// If corresponding pool is not full increment metric and assign new job if (incrementMetricIfLessThanMax(CurrentMetrics::values[pool_config.tasks_metric], max_pool_size)) { + metric_decrementor->alarm(); try /// this try required because we have to manually decrement metric { /// Synchronize pool size, because config could be reloaded pools[job_and_pool.pool_type].setMaxThreads(max_pool_size); pools[job_and_pool.pool_type].setQueueSize(max_pool_size); - pools[job_and_pool.pool_type].scheduleOrThrowOnError([this, pool_config, job{std::move(job_and_pool.job)}] () + pools[job_and_pool.pool_type].scheduleOrThrowOnError([this, metric_decrementor, job{std::move(job_and_pool.job)}] () { try /// We don't want exceptions in background pool { bool job_success = job(); /// Job done, decrement metric and reset no_work counter - CurrentMetrics::values[pool_config.tasks_metric]--; + metric_decrementor->decrement(); if (job_success) { @@ -121,7 +148,7 @@ try } catch (...) { - CurrentMetrics::values[pool_config.tasks_metric]--; + metric_decrementor->decrement(); tryLogCurrentException(__PRETTY_FUNCTION__); scheduleTask(/* with_backoff = */ true); } @@ -133,7 +160,7 @@ try catch (...) { /// With our Pool settings scheduleOrThrowOnError shouldn't throw exceptions, but for safety catch added here - CurrentMetrics::values[pool_config.tasks_metric]--; + metric_decrementor->decrement(); tryLogCurrentException(__PRETTY_FUNCTION__); scheduleTask(/* with_backoff = */ true); } From 0202eb7ecf09dd3653bf681f37c24c537a1d20e9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 31 Aug 2021 14:12:52 +0300 Subject: [PATCH 028/472] Backport #28310 to 21.9: Fix intersecting parts due to new part had been replaced with an empty part --- src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 797d0570fbc2..0efa83237ca4 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -192,7 +192,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible( if (missing_part_search_result == MissingPartSearchResult::LostForever) { auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); - if (lost_part_info.level != 0) + if (lost_part_info.level != 0 || lost_part_info.mutation != 0) { Strings source_parts; bool part_in_queue = storage.queue.checkPartInQueueAndGetSourceParts(part_name, source_parts); From 4601dc6fb751d4bc519fc17b95239e888b3f5375 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 31 Aug 2021 18:15:14 +0300 Subject: [PATCH 029/472] Backport #28298 to 21.9: ODBC connection holder fix dangling reference --- programs/odbc-bridge/ODBCConnectionFactory.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/programs/odbc-bridge/ODBCConnectionFactory.h b/programs/odbc-bridge/ODBCConnectionFactory.h index e1cdf1f09bb3..773b65eb27de 100644 --- a/programs/odbc-bridge/ODBCConnectionFactory.h +++ b/programs/odbc-bridge/ODBCConnectionFactory.h @@ -6,6 +6,7 @@ #include #include + namespace DB { namespace ErrorCodes @@ -28,8 +29,8 @@ class ConnectionHolder { public: ConnectionHolder(PoolPtr pool_, - ConnectionPtr connection_, - const String & connection_string_) + ConnectionPtr connection_, + const String & connection_string_) : pool(pool_) , connection(std::move(connection_)) , connection_string(connection_string_) @@ -57,7 +58,7 @@ class ConnectionHolder private: PoolPtr pool; ConnectionPtr connection; - const String & connection_string; + String connection_string; }; using ConnectionHolderPtr = std::shared_ptr; From ee6b086ac076caeeab4ad1f5d093679ec1dd1e1c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 1 Sep 2021 10:13:36 +0300 Subject: [PATCH 030/472] Backport #28349 to 21.9: Fix non joined rows from nullable column --- src/Interpreters/join_common.cpp | 38 ++++++++++++++++--- .../Transforms/JoiningTransform.cpp | 3 ++ .../00445_join_nullable_keys.reference | 10 +++++ .../0_stateless/00445_join_nullable_keys.sql | 10 +++++ ...1142_join_lc_and_nullable_in_key.reference | 5 +++ .../01142_join_lc_and_nullable_in_key.sql | 9 +++++ 6 files changed, 70 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index e9f3e4f3fdd1..b653e4b8f2ee 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include @@ -30,12 +29,41 @@ namespace ErrorCodes namespace { +void insertFromNullableOrDefault(MutableColumnPtr & dst, const ColumnNullable * nullable_col) +{ + const auto & nested = nullable_col->getNestedColumn(); + const auto & nullmap = nullable_col->getNullMapColumn().getData(); + if (auto * lc = typeid_cast(dst.get()); lc && !nested.lowCardinality()) + { + for (size_t i = 0; i < nullable_col->size(); ++i) + { + if (nullmap[i]) + lc->insertDefault(); + else + lc->insertRangeFromFullColumn(nested, i, 1); + } + } + else + { + for (size_t i = 0; i < nullable_col->size(); ++i) + { + if (nullmap[i]) + dst->insertDefault(); + else + dst->insertFrom(nested, i); + } + } +} + ColumnPtr changeLowCardinality(const ColumnPtr & column, const ColumnPtr & dst_sample) { if (dst_sample->lowCardinality()) { MutableColumnPtr lc = dst_sample->cloneEmpty(); - typeid_cast(*lc).insertRangeFromFullColumn(*column, 0, column->size()); + if (const auto * nullable_col = typeid_cast(column.get())) + insertFromNullableOrDefault(lc, nullable_col); + else + typeid_cast(*lc).insertRangeFromFullColumn(*column, 0, column->size()); return lc; } @@ -190,9 +218,9 @@ void removeColumnNullability(ColumnWithTypeAndName & column) if (column.column && column.column->isNullable()) { - const auto * nullable_column = checkAndGetColumn(*column.column); - ColumnPtr nested_column = nullable_column->getNestedColumnPtr(); - MutableColumnPtr mutable_column = IColumn::mutate(std::move(nested_column)); + const auto * nullable_col = checkAndGetColumn(*column.column); + MutableColumnPtr mutable_column = nullable_col->getNestedColumn().cloneEmpty(); + insertFromNullableOrDefault(mutable_column, nullable_col); column.column = std::move(mutable_column); } } diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index e402fd788bc6..03817f15257b 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -4,6 +4,7 @@ #include #include +#include namespace DB { @@ -16,7 +17,9 @@ namespace ErrorCodes Block JoiningTransform::transformHeader(Block header, const JoinPtr & join) { ExtraBlockPtr tmp; + LOG_DEBUG(&Poco::Logger::get("JoiningTransform"), "Before join block: '{}'", header.dumpStructure()); join->joinBlock(header, tmp); + LOG_DEBUG(&Poco::Logger::get("JoiningTransform"), "After join block: '{}'", header.dumpStructure()); return header; } diff --git a/tests/queries/0_stateless/00445_join_nullable_keys.reference b/tests/queries/0_stateless/00445_join_nullable_keys.reference index f7675766dc97..afc8003910cf 100644 --- a/tests/queries/0_stateless/00445_join_nullable_keys.reference +++ b/tests/queries/0_stateless/00445_join_nullable_keys.reference @@ -22,3 +22,13 @@ 13 13 14 14 \N 8 +0 0 +0 2 +0 4 +0 6 +0 8 +1 1 +3 3 +5 5 +7 7 +9 9 diff --git a/tests/queries/0_stateless/00445_join_nullable_keys.sql b/tests/queries/0_stateless/00445_join_nullable_keys.sql index 2b8f2ca5f44b..a0453356e983 100644 --- a/tests/queries/0_stateless/00445_join_nullable_keys.sql +++ b/tests/queries/0_stateless/00445_join_nullable_keys.sql @@ -30,3 +30,13 @@ ANY RIGHT JOIN ( SELECT nullIf(number, 8) AS k, toString(number) AS b FROM system.numbers LIMIT 5, 10 ) js2 USING (k) ORDER BY k; + +SELECT k, b +FROM +( + SELECT number + 1 AS k FROM numbers(10) +) js1 +RIGHT JOIN +( + SELECT nullIf(number, if(number % 2 == 0, number, 0)) AS k, number AS b FROM numbers(10) +) js2 USING (k) ORDER BY k, b; diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference index c6bdcb773b23..01efbb7c64b8 100644 --- a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference +++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference @@ -13,6 +13,8 @@ 0 \N Nullable(String) 1 l \N Nullable(String) - +0 +- 1 l \N Nullable(String) 2 \N \N Nullable(String) 1 l \N Nullable(String) @@ -27,3 +29,6 @@ \N \N \N Nullable(String) 1 l \N Nullable(String) \N \N \N Nullable(String) +- +\N \N +- diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql index edaf2870e898..38b728371742 100644 --- a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql +++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql @@ -27,6 +27,10 @@ SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r U SELECT '-'; +SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc); + +SELECT '-'; + SET join_use_nulls = 1; SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY x; @@ -45,6 +49,11 @@ SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r U SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +SELECT '-'; + +SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc); + +SELECT '-'; DROP TABLE t; DROP TABLE nr; From 37adf9945c931f6d2688bffc2bf3190d2daa43b9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 1 Sep 2021 14:14:40 +0300 Subject: [PATCH 031/472] Backport #28412 to 21.9: More accurate check that zk root exists. --- src/Common/ZooKeeper/ZooKeeper.cpp | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 9b3c3191b5df..220b52104b52 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -130,8 +130,27 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ throw DB::Exception("Unknown implementation of coordination service: " + implementation, DB::ErrorCodes::NOT_IMPLEMENTED); } - if (!chroot.empty() && !exists("/")) - throw KeeperException("Zookeeper root doesn't exist. You should create root node " + chroot + " before start.", Coordination::Error::ZNONODE); + if (!chroot.empty()) + { + /// Here we check that zk root exists. + /// This check is clumsy. The reason is we do this request under common mutex, and never want to hung here. + /// Otherwise, all threads which need zk will wait for this mutex eternally. + /// + /// Usually, this was possible in case of memory limit exception happened inside zk implementation. + /// This should not happen now, when memory tracker is disabled. + /// But let's keep it just in case (it is also easy to backport). + auto future = asyncExists("/"); + auto res = future.wait_for(std::chrono::milliseconds(operation_timeout_ms)); + if (res != std::future_status::ready) + throw KeeperException("Cannot check if zookeeper root exists.", Coordination::Error::ZOPERATIONTIMEOUT); + + auto code = future.get().error; + if (!(code == Coordination::Error::ZOK || code == Coordination::Error::ZNONODE)) + throw KeeperException(code, "/"); + + if (code == Coordination::Error::ZNONODE) + throw KeeperException("ZooKeeper root doesn't exist. You should create root node " + chroot + " before start.", Coordination::Error::ZNONODE); + } } ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, From 7f1b668cc0b950e4ea560d75fe98761420a05360 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 1 Sep 2021 23:27:15 +0300 Subject: [PATCH 032/472] Auto version update to [21.9.1.2] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 2435335f6694..6b96c720bc5c 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH f063e44131a048ba2d9af8075f03700fd5ec3e69) -SET(VERSION_DESCRIBE v21.9.1.7770-prestable) -SET(VERSION_STRING 21.9.1.7770) +SET(VERSION_GITHASH f6fa3218282532b94a73508de05fd00d0cd65e86) +SET(VERSION_DESCRIBE v21.9.1.2-prestable) +SET(VERSION_STRING 21.9.1.2) # end of autochange From 057f1921f6c40478eb10f60ec21eceb8ab869d69 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 1 Sep 2021 23:31:11 +0300 Subject: [PATCH 033/472] Auto version update to [21.9.2.1] [54454] --- cmake/autogenerated_versions.txt | 6 +-- debian/changelog | 4 +- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- .../StorageSystemContributors.generated.cpp | 47 +++++++++++++++++++ 6 files changed, 55 insertions(+), 8 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 6b96c720bc5c..470c49efcec0 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) -SET(VERSION_PATCH 1) +SET(VERSION_PATCH 2) SET(VERSION_GITHASH f6fa3218282532b94a73508de05fd00d0cd65e86) -SET(VERSION_DESCRIBE v21.9.1.2-prestable) -SET(VERSION_STRING 21.9.1.2) +SET(VERSION_DESCRIBE v21.9.2.1-prestable) +SET(VERSION_STRING 21.9.2.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index 38f740ae062e..f7a7ba9d9d0b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.9.1.1) unstable; urgency=low +clickhouse (21.9.2.1) unstable; urgency=low * Modified source code - -- clickhouse-release Sat, 10 Jul 2021 08:22:49 +0300 + -- clickhouse-release Wed, 01 Sep 2021 23:31:07 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 2391256ec6ac..6b3ef8a23a4b 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.1.* +ARG version=21.9.2.* RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 474ebaaee88d..eaf9db032094 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.1.* +ARG version=21.9.2.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 5768753cd7cb..4b8385ed639b 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.1.* +ARG version=21.9.2.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index bed8eadc19c3..351dc173e063 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -33,6 +33,7 @@ const char * auto_contributors[] { "alesapin", "Alex", "Alexander Avdonkin", + "Alexander Bezpiatov", "Alexander Burmak", "Alexander Ermolaev", "Alexander Gololobov", @@ -43,6 +44,7 @@ const char * auto_contributors[] { "Alexander Krasheninnikov", "Alexander Kuranoff", "Alexander Kuzmenkov", + "Alexander Lodin", "Alexander Lukin", "Alexander Makarov", "Alexander Marshalov", @@ -54,6 +56,7 @@ const char * auto_contributors[] { "Alexander Sapin", "Alexander Tokmakov", "Alexander Tretiakov", + "Alexandra", "Alexandra Latysheva", "Alexandre Snarskii", "Alexandr Kondratev", @@ -63,6 +66,7 @@ const char * auto_contributors[] { "Alexei Averchenko", "Alexey", "Alexey Arno", + "Alexey Boykov", "Alexey Dushechkin", "Alexey Elymanov", "Alexey Ilyukhov", @@ -95,6 +99,7 @@ const char * auto_contributors[] { "Anatoly Pugachev", "ana-uvarova", "AnaUvarova", + "Andr0901", "Andreas Hunkeler", "AndreevDm", "Andrei Bodrov", @@ -115,10 +120,12 @@ const char * auto_contributors[] { "Andrey Skobtsov", "Andrey Urusov", "Andrey Z", + "Andy Liang", "Andy Yang", "Anmol Arora", "Anna", "Anna Shakhova", + "anneji-dev", "annvsh", "anrodigina", "Anthony N. Simon", @@ -140,6 +147,7 @@ const char * auto_contributors[] { "aprudaev", "Ariel Robaldo", "Arsen Hakobyan", + "Arslan G", "ArtCorp", "Artem Andreenko", "Artemeey", @@ -155,6 +163,7 @@ const char * auto_contributors[] { "artpaul", "Artur", "Artur Beglaryan", + "Artur Filatenkov", "AsiaKorushkina", "asiana21", "atereh", @@ -192,11 +201,14 @@ const char * auto_contributors[] { "Boris Granveaud", "Bowen Masco", "bo zeng", + "Braulio Valdivielso", "Brett Hoerner", "BSD_Conqueror", "bseng", "Bulat Gaifullin", "Carbyn", + "caspian", + "Caspian", "cekc", "centos7", "champtar", @@ -263,6 +275,7 @@ const char * auto_contributors[] { "Dmitrii Raev", "dmitriiut", "Dmitriy", + "Dmitriy Lushnikov", "Dmitry", "Dmitry Belyavtsev", "Dmitry Bilunov", @@ -328,6 +341,7 @@ const char * auto_contributors[] { "favstovol", "FawnD2", "FeehanG", + "feihengye", "felixoid", "felixxdu", "feng lv", @@ -335,6 +349,7 @@ const char * auto_contributors[] { "fessmage", "FgoDt", "fibersel", + "Filatenkov Artur", "filimonov", "filipe", "Filipe Caixeta", @@ -361,6 +376,7 @@ const char * auto_contributors[] { "George", "George3d6", "George G", + "Georgy Ginzburg", "Gervasio Varela", "ggerogery", "giordyb", @@ -386,15 +402,18 @@ const char * auto_contributors[] { "hchen9", "hcz", "heng zhao", + "hermano", "hexiaoting", "Hiroaki Nakamura", "hotid", + "huangzhaowei", "HuFuwang", "Hui Wang", "hustnn", "huzhichengdd", "ice1x", "idfer", + "igomac", "igor", "Igor", "Igor Hatarist", @@ -404,6 +423,7 @@ const char * auto_contributors[] { "Igr", "Igr Mineev", "ikarishinjieva", + "Ikko Ashimine", "ikopylov", "Ildar Musin", "Ildus Kurbangaliev", @@ -437,11 +457,13 @@ const char * auto_contributors[] { "Ivan Starkov", "ivanzhukov", "Ivan Zhukov", + "ivoleg", "Jack Song", "JackyWoo", "Jacob Hayes", "jakalletti", "JaosnHsieh", + "jasine", "Jason", "javartisan", "javi", @@ -449,6 +471,7 @@ const char * auto_contributors[] { "Javi Santana", "Javi santana bot", "Jean Baptiste Favre", + "Jeffrey Dang", "jennyma", "jetgm", "Jiading Guo", @@ -467,15 +490,18 @@ const char * auto_contributors[] { "Keiji Yoshida", "Ken Chen", "Kevin Chiang", + "Kevin Michel", "kevin wan", "Kiran", "Kirill Danshin", + "Kirill Ershov", "kirillikoff", "Kirill Malev", "Kirill Shvakov", "kmeaw", "Koblikov Mihail", "KochetovNicolai", + "kolsys", "Konstantin Grabar", "Konstantin Lebedev", "Konstantin Malanchev", @@ -492,6 +518,7 @@ const char * auto_contributors[] { "Kseniia Sumarokova", "kshvakov", "kssenii", + "Ky Li", "l", "l1tsolaiki", "lalex", @@ -502,6 +529,7 @@ const char * auto_contributors[] { "Leopold Schabel", "leozhang", "Lev Borodin", + "levie", "levushkin aleksej", "levysh", "Lewinma", @@ -535,6 +563,7 @@ const char * auto_contributors[] { "Maksim Kita", "Maks Skorokhod", "malkfilipp", + "Malte", "manmitya", "maqroll", "Marat IDRISOV", @@ -595,6 +624,7 @@ const char * auto_contributors[] { "Michal Lisowski", "michon470", "MicrochipQ", + "Miguel Fernández", "miha-g", "Mihail Fandyushin", "Mikahil Nacharov", @@ -617,10 +647,12 @@ const char * auto_contributors[] { "MikuSugar", "Milad Arabi", "millb", + "Misko Lee", "mnkonkova", "Mohammad Hossein Sekhavat", "morty", "moscas", + "Mostafa Dahab", "MovElb", "Mr.General", "Murat Kabilov", @@ -634,7 +666,9 @@ const char * auto_contributors[] { "nauta", "nautaa", "Neeke Gao", + "neng.liu", "Neng Liu", + "NengLiu", "never lee", "NeZeD [Mac Pro]", "nicelulu", @@ -684,6 +718,8 @@ const char * auto_contributors[] { "Olga Khvostikova", "olgarev", "Olga Revyakina", + "OmarBazaraa", + "Onehr7", "orantius", "Orivej Desh", "Oskar Wojciski", @@ -701,6 +737,7 @@ const char * auto_contributors[] { "Pavel Yakunin", "Pavlo Bashynskiy", "Pawel Rog", + "pdv-ru", "Peng Jian", "Persiyanov Dmitriy Andreevich", "Pervakov Grigorii", @@ -755,6 +792,7 @@ const char * auto_contributors[] { "Ruslan Savchenko", "Russ Frank", "Ruzal Ibragimov", + "ryzuo", "Sabyanin Maxim", "SaltTan", "Sami Kerola", @@ -763,6 +801,7 @@ const char * auto_contributors[] { "satanson", "Saulius Valatka", "sdk2", + "Sean Haynes", "Sébastien Launay", "serebrserg", "Sergei Bocharov", @@ -839,12 +878,14 @@ const char * auto_contributors[] { "TCeason", "Tema Novikov", "templarzq", + "terrylin", "The-Alchemist", "Tiaonmmn", "tiger.yan", "tison", "TiunovNN", "Tobias Adamson", + "Tobias Lins", "Tom Bombadil", "topvisor", "Tsarkova Anastasia", @@ -863,6 +904,7 @@ const char * auto_contributors[] { "VadimPE", "Vadim Plakhtinskiy", "Vadim Skipin", + "Vadim Volodin", "Val", "Valera Ryaboshapko", "Vasilyev Nikita", @@ -877,6 +919,7 @@ const char * auto_contributors[] { "Veloman Yunkan", "Veniamin Gvozdikov", "Veselkov Konstantin", + "vgocoder", "vic", "vicdashkov", "Victor", @@ -891,12 +934,14 @@ const char * auto_contributors[] { "Vitaly", "Vitaly Baranov", "Vitaly Samigullin", + "Vitaly Stoyan", "vitstn", "vivarum", "Vivien Maisonneuve", "Vlad Arkhipov", "Vladimir", "Vladimir Bunchuk", + "Vladimir C", "Vladimir Ch", "Vladimir Chebotarev", "vladimir golovchenko", @@ -925,6 +970,7 @@ const char * auto_contributors[] { "wzl", "Xianda Ke", "Xiang Zhou", + "xiedeyantu", "xPoSx", "Yağızcan Değirmenci", "yang", @@ -994,6 +1040,7 @@ const char * auto_contributors[] { "曲正鹏", "未来星___费", "极客青年", + "董海镔", "谢磊", "贾顺名(Jarvis)", "陈小玉", From 24a4fbfbaa9d656f71d8d7cca88a13647509f6d0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 2 Sep 2021 02:23:42 +0300 Subject: [PATCH 034/472] Backport #28446 to 21.9: Fix watch leak in DDLWorker --- src/Interpreters/DDLWorker.cpp | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index c2de6ecbaf14..7abe78472b0d 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -1154,29 +1154,7 @@ void DDLWorker::runMainThread() LOG_DEBUG(log, "Waiting for queue updates (stat: {}, {}, {}, {})", queue_node_stat.version, queue_node_stat.cversion, queue_node_stat.numChildren, queue_node_stat.pzxid); - /// FIXME It may hang for unknown reason. Timeout is just a hotfix. - constexpr int queue_wait_timeout_ms = 10000; - bool updated = queue_updated_event->tryWait(queue_wait_timeout_ms); - if (!updated) - { - Coordination::Stat new_stat; - tryGetZooKeeper()->get(queue_dir, &new_stat); - bool queue_changed = memcmp(&queue_node_stat, &new_stat, sizeof(Coordination::Stat)) != 0; - bool watch_triggered = queue_updated_event->tryWait(0); - if (queue_changed && !watch_triggered) - { - /// It should never happen. - /// Maybe log message, abort() and system.zookeeper_log will help to debug it and remove timeout (#26036). - LOG_TRACE( - log, - "Queue was not updated (stat: {}, {}, {}, {})", - new_stat.version, - new_stat.cversion, - new_stat.numChildren, - new_stat.pzxid); - context->getZooKeeperLog()->flush(); - } - } + queue_updated_event->wait(); } catch (const Coordination::Exception & e) { From eb783614ea2e574ad7e52b883a64a19720b027ef Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 2 Sep 2021 12:26:10 +0300 Subject: [PATCH 035/472] Backport #28433 to 21.9: Fix cannot use non-ordinary table names in materialized postgresql --- .../PostgreSQLReplicationHandler.cpp | 4 +-- .../test.py | 26 ++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 3477397adb72..c8c74d2ddaa0 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -212,7 +212,7 @@ StoragePtr PostgreSQLReplicationHandler::loadFromSnapshot(String & snapshot_name /// Load from snapshot, which will show table state before creation of replication slot. /// Already connected to needed database, no need to add it to query. - query_str = fmt::format("SELECT * FROM {}", table_name); + query_str = fmt::format("SELECT * FROM {}", doubleQuoteString(table_name)); materialized_storage->createNestedIfNeeded(fetchTableStructure(*tx, table_name)); auto nested_storage = materialized_storage->getNested(); @@ -321,7 +321,7 @@ void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::work & tx) { if (!tables_list.empty()) tables_list += ", "; - tables_list += storage_data.first; + tables_list += doubleQuoteString(storage_data.first); } } diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py index 40324089b1b2..68b42d91fb65 100644 --- a/tests/integration/test_postgresql_replica_database_engine/test.py +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -19,15 +19,15 @@ with_postgres=True, stay_alive=True) postgres_table_template = """ - CREATE TABLE IF NOT EXISTS {} ( + CREATE TABLE IF NOT EXISTS "{}" ( key Integer NOT NULL, value Integer, PRIMARY KEY(key)) """ postgres_table_template_2 = """ - CREATE TABLE IF NOT EXISTS {} ( + CREATE TABLE IF NOT EXISTS "{}" ( key Integer NOT NULL, value1 Integer, value2 Integer, value3 Integer, PRIMARY KEY(key)) """ postgres_table_template_3 = """ - CREATE TABLE IF NOT EXISTS {} ( + CREATE TABLE IF NOT EXISTS "{}" ( key1 Integer NOT NULL, value1 Integer, key2 Integer NOT NULL, value2 Integer NOT NULL) """ @@ -76,8 +76,11 @@ def drop_materialized_db(materialized_database='test_database'): instance.query('DROP DATABASE IF EXISTS {}'.format(materialized_database)) assert materialized_database not in instance.query('SHOW DATABASES') +def drop_postgres_table(cursor, table_name): + cursor.execute("""DROP TABLE IF EXISTS "{}" """.format(table_name)) + def create_postgres_table(cursor, table_name, replica_identity_full=False, template=postgres_table_template): - cursor.execute("DROP TABLE IF EXISTS {}".format(table_name)) + drop_postgres_table(cursor, table_name) cursor.execute(template.format(table_name)) if replica_identity_full: cursor.execute('ALTER TABLE {} REPLICA IDENTITY FULL;'.format(table_name)) @@ -923,6 +926,21 @@ def transaction(thread_id): cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) +def test_quoting(started_cluster): + drop_materialized_db() + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + table_name = 'user' + create_postgres_table(cursor, table_name); + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(50)".format(table_name)) + create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + check_tables_are_synchronized(table_name); + drop_postgres_table(cursor, table_name) + drop_materialized_db() + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From 973662f4923d437791827f1ac3d38defec97d78b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 3 Sep 2021 14:32:59 +0300 Subject: [PATCH 036/472] Backport #28543 to 21.9: Fix endless loop for truncated bzip2 archive --- src/IO/Bzip2ReadBuffer.cpp | 7 +++++++ src/IO/Bzip2WriteBuffer.cpp | 2 +- .../0_stateless/02022_bzip2_truncated.reference | 0 .../queries/0_stateless/02022_bzip2_truncated.sh | 15 +++++++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02022_bzip2_truncated.reference create mode 100755 tests/queries/0_stateless/02022_bzip2_truncated.sh diff --git a/src/IO/Bzip2ReadBuffer.cpp b/src/IO/Bzip2ReadBuffer.cpp index 99798bca3258..098e829188f3 100644 --- a/src/IO/Bzip2ReadBuffer.cpp +++ b/src/IO/Bzip2ReadBuffer.cpp @@ -12,6 +12,7 @@ namespace DB namespace ErrorCodes { extern const int BZIP2_STREAM_DECODER_FAILED; + extern const int UNEXPECTED_END_OF_FILE; } @@ -90,6 +91,12 @@ bool Bzip2ReadBuffer::nextImpl() "bzip2 stream decoder failed: error code: {}", ret); + if (in->eof()) + { + eof = true; + throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of bzip2 archive"); + } + return true; } } diff --git a/src/IO/Bzip2WriteBuffer.cpp b/src/IO/Bzip2WriteBuffer.cpp index 39c5356b792c..6e63779d3cf5 100644 --- a/src/IO/Bzip2WriteBuffer.cpp +++ b/src/IO/Bzip2WriteBuffer.cpp @@ -2,7 +2,7 @@ # include #endif -#if USE_BROTLI +#if USE_BZIP2 # include # include // Y_IGNORE diff --git a/tests/queries/0_stateless/02022_bzip2_truncated.reference b/tests/queries/0_stateless/02022_bzip2_truncated.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02022_bzip2_truncated.sh b/tests/queries/0_stateless/02022_bzip2_truncated.sh new file mode 100755 index 000000000000..16d3e22feb96 --- /dev/null +++ b/tests/queries/0_stateless/02022_bzip2_truncated.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +tmp_path=$(mktemp "$CURDIR/02022_bzip2_truncate.XXXXXX.bz2") +trap 'rm -f $tmp_path' EXIT + +${CLICKHOUSE_LOCAL} -q "SELECT * FROM numbers(1e6) FORMAT TSV" | bzip2 > "$tmp_path" +truncate -s10000 "$tmp_path" +# just ensure that it will exit eventually +${CLICKHOUSE_LOCAL} -q "SELECT count() FROM file('$tmp_path', 'TSV', 'n UInt64') FORMAT Null" >& /dev/null + +exit 0 From 05b955738a24d404dff087ab76afe554ac0669f6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 3 Sep 2021 14:34:08 +0300 Subject: [PATCH 037/472] Backport #28528 to 21.9: Fix detach/attach for ReplicatedVersionedCollapsingMergeTree after alter --- .../ReplicatedMergeTreeTableMetadata.cpp | 10 +++++++++- ...lter_add_and_modify_order_zookeeper.reference | 4 ++-- ...1526_alter_add_and_modify_order_zookeeper.sql | 16 +++++++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index db1c2bc89af0..0637a6bb0272 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -46,7 +46,15 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr primary_key = formattedAST(metadata_snapshot->getPrimaryKey().expression_list_ast); if (metadata_snapshot->isPrimaryKeyDefined()) - sorting_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast); + { + /// We don't use preparsed AST `sorting_key.expression_list_ast` because + /// it contain version column for VersionedCollapsingMergeTree, which + /// is not stored in ZooKeeper for compatibility reasons. So the best + /// compatible way is just to convert definition_ast to list and + /// serialize it. In all other places key.expression_list_ast should be + /// used. + sorting_key = formattedAST(extractKeyExpressionList(metadata_snapshot->getSortingKey().definition_ast)); + } data_format_version = data.format_version; diff --git a/tests/queries/0_stateless/01526_alter_add_and_modify_order_zookeeper.reference b/tests/queries/0_stateless/01526_alter_add_and_modify_order_zookeeper.reference index 4063d93d542c..1dcd3543d4e9 100644 --- a/tests/queries/0_stateless/01526_alter_add_and_modify_order_zookeeper.reference +++ b/tests/queries/0_stateless/01526_alter_add_and_modify_order_zookeeper.reference @@ -1,6 +1,6 @@ 2019-10-01 a 1 aa 1 1 1 2019-10-01 a 1 aa 1 1 1 0 -CREATE TABLE default.table_for_alter\n(\n `d` Date,\n `a` String,\n `b` UInt8,\n `x` String,\n `y` Int8,\n `version` UInt64,\n `sign` Int8 DEFAULT 1,\n `order` UInt32\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/tables/01526_alter_add/t1\', \'1\', sign, version)\nPARTITION BY y\nPRIMARY KEY d\nORDER BY (d, order)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_alter\n(\n `d` Date,\n `a` String,\n `b` UInt8,\n `x` String,\n `y` Int8,\n `version` UInt64,\n `sign` Int8 DEFAULT 1,\n `order` UInt32\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/tables/default/01526_alter_add/t1\', \'1\', sign, version)\nPARTITION BY y\nPRIMARY KEY d\nORDER BY (d, order)\nSETTINGS index_granularity = 8192 2019-10-01 a 1 aa 1 1 1 0 0 2019-10-02 b 2 bb 2 2 2 1 2 -CREATE TABLE default.table_for_alter\n(\n `d` Date,\n `a` String,\n `b` UInt8,\n `x` String,\n `y` Int8,\n `version` UInt64,\n `sign` Int8 DEFAULT 1,\n `order` UInt32,\n `datum` UInt32\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/tables/01526_alter_add/t1\', \'1\', sign, version)\nPARTITION BY y\nPRIMARY KEY d\nORDER BY (d, order, datum)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_alter\n(\n `d` Date,\n `a` String,\n `b` UInt8,\n `x` String,\n `y` Int8,\n `version` UInt64,\n `sign` Int8 DEFAULT 1,\n `order` UInt32,\n `datum` UInt32\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/tables/default/01526_alter_add/t1\', \'1\', sign, version)\nPARTITION BY y\nPRIMARY KEY d\nORDER BY (d, order, datum)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01526_alter_add_and_modify_order_zookeeper.sql b/tests/queries/0_stateless/01526_alter_add_and_modify_order_zookeeper.sql index b718ba199c17..db8c2de09504 100644 --- a/tests/queries/0_stateless/01526_alter_add_and_modify_order_zookeeper.sql +++ b/tests/queries/0_stateless/01526_alter_add_and_modify_order_zookeeper.sql @@ -12,17 +12,27 @@ CREATE TABLE table_for_alter `version` UInt64, `sign` Int8 DEFAULT 1 ) -ENGINE = ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/01526_alter_add/t1', '1', sign, version) +ENGINE = ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{database}/01526_alter_add/t1', '1', sign, version) PARTITION BY y ORDER BY d SETTINGS index_granularity = 8192; INSERT INTO table_for_alter VALUES(toDate('2019-10-01'), 'a', 1, 'aa', 1, 1, 1); +DETACH TABLE table_for_alter; + +ATTACH TABLE table_for_alter; + + SELECT * FROM table_for_alter; ALTER TABLE table_for_alter ADD COLUMN order UInt32, MODIFY ORDER BY (d, order); + +DETACH TABLE table_for_alter; + +ATTACH TABLE table_for_alter; + SELECT * FROM table_for_alter; SHOW CREATE TABLE table_for_alter; @@ -35,4 +45,8 @@ SELECT * FROM table_for_alter ORDER BY d; SHOW CREATE TABLE table_for_alter; +DETACH TABLE table_for_alter; + +ATTACH TABLE table_for_alter; + DROP TABLE IF EXISTS table_for_alter; From 8129281d705846b8dd7e7ffd2d1f2ac9ee00dae6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 3 Sep 2021 22:39:53 +0300 Subject: [PATCH 038/472] Backport #28531 to 21.9: Fix usage of nested columns with non-array columns with the same prefix --- src/DataTypes/NestedUtils.cpp | 5 ++- src/DataTypes/tests/gtest_NestedUtils.cpp | 43 +++++++++++++++++++ .../02017_columns_with_dot.reference | 3 ++ .../0_stateless/02017_columns_with_dot.sql | 24 +++++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 src/DataTypes/tests/gtest_NestedUtils.cpp create mode 100644 tests/queries/0_stateless/02017_columns_with_dot.reference create mode 100644 tests/queries/0_stateless/02017_columns_with_dot.sql diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 94b3b2f3cf70..4f804a0ca50a 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -141,7 +141,7 @@ NamesAndTypesList collect(const NamesAndTypesList & names_and_types) auto nested_types = getSubcolumnsOfNested(names_and_types); for (const auto & name_type : names_and_types) - if (!nested_types.count(splitName(name_type.name).first)) + if (!isArray(name_type.type) || !nested_types.count(splitName(name_type.name).first)) res.push_back(name_type); for (const auto & name_type : nested_types) @@ -157,6 +157,9 @@ NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types) for (auto & name_type : res) { + if (!isArray(name_type.type)) + continue; + auto split = splitName(name_type.name); if (name_type.isSubcolumn() || split.second.empty()) continue; diff --git a/src/DataTypes/tests/gtest_NestedUtils.cpp b/src/DataTypes/tests/gtest_NestedUtils.cpp new file mode 100644 index 000000000000..c01758b8f0ff --- /dev/null +++ b/src/DataTypes/tests/gtest_NestedUtils.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include + +using namespace DB; + +GTEST_TEST(NestedUtils, collect) +{ + DataTypePtr uint_type = std::make_shared(); + DataTypePtr array_type = std::make_shared(std::make_shared()); + + const NamesAndTypesList source_columns = + { + {"id", uint_type}, + {"arr1", array_type}, + {"b.id", uint_type}, + {"b.arr1", array_type}, + {"b.arr2", array_type} + }; + + auto nested_type = createNested({uint_type, uint_type}, {"arr1", "arr2"}); + const NamesAndTypesList columns_with_subcolumns = + { + {"id", uint_type}, + {"arr1", array_type}, + {"b.id", uint_type}, + {"b", "arr1", nested_type, array_type}, + {"b", "arr2", nested_type, array_type} + }; + + const NamesAndTypesList columns_with_nested = + { + {"id", uint_type}, + {"arr1", array_type}, + {"b.id", uint_type}, + {"b", nested_type}, + }; + + ASSERT_EQ(Nested::convertToSubcolumns(source_columns).toString(), columns_with_subcolumns.toString()); + ASSERT_EQ(Nested::collect(source_columns).toString(), columns_with_nested.toString()); +} diff --git a/tests/queries/0_stateless/02017_columns_with_dot.reference b/tests/queries/0_stateless/02017_columns_with_dot.reference new file mode 100644 index 000000000000..5922e56fb567 --- /dev/null +++ b/tests/queries/0_stateless/02017_columns_with_dot.reference @@ -0,0 +1,3 @@ +1 [0,0] 2 [1,1,3] +1 [0,0] 2 [1,1,3] +1 [0,0] 2 [1,1,3] diff --git a/tests/queries/0_stateless/02017_columns_with_dot.sql b/tests/queries/0_stateless/02017_columns_with_dot.sql new file mode 100644 index 000000000000..ae901214d757 --- /dev/null +++ b/tests/queries/0_stateless/02017_columns_with_dot.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS t_with_dots; +CREATE TABLE t_with_dots (id UInt32, arr Array(UInt32), `b.id` UInt32, `b.arr` Array(UInt32)) ENGINE = Log; + +INSERT INTO t_with_dots VALUES (1, [0, 0], 2, [1, 1, 3]); +SELECT * FROM t_with_dots; + +DROP TABLE t_with_dots; + +CREATE TABLE t_with_dots (id UInt32, arr Array(UInt32), `b.id` UInt32, `b.arr` Array(UInt32)) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_with_dots VALUES (1, [0, 0], 2, [1, 1, 3]); +SELECT * FROM t_with_dots; + +DROP TABLE t_with_dots; + +CREATE TABLE t_with_dots (id UInt32, arr Array(UInt32), `b.id` UInt32, `b.arr` Array(UInt32)) +ENGINE = MergeTree ORDER BY id +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_with_dots VALUES (1, [0, 0], 2, [1, 1, 3]); +SELECT * FROM t_with_dots; + +DROP TABLE t_with_dots; From a98eaacf2f8088452112aec84d859b8bf22938b8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 4 Sep 2021 00:31:54 +0300 Subject: [PATCH 039/472] Backport #28236 to 21.9: CHJIT custom memory manager --- src/Interpreters/JIT/CHJIT.cpp | 229 ++++++++++++++++++++++++++++----- 1 file changed, 195 insertions(+), 34 deletions(-) diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index c06b4132309a..3f2a078f58ed 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -2,6 +2,10 @@ #if USE_EMBEDDED_COMPILER +#include + +#include + #include #include #include @@ -22,7 +26,10 @@ #include #include +#include #include +#include + namespace DB { @@ -31,6 +38,8 @@ namespace ErrorCodes { extern const int CANNOT_COMPILE_CODE; extern const int LOGICAL_ERROR; + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int CANNOT_MPROTECT; } /** Simple module to object file compiler. @@ -80,6 +89,161 @@ class JITCompiler llvm::TargetMachine & target_machine; }; +/** Arena that allocate all memory with system page_size. + * All allocated pages can be protected with protection_flags using protect method. + * During destruction all allocated pages protection_flags will be reset. + */ +class PageArena : private boost::noncopyable +{ +public: + PageArena() : page_size(::getPageSize()) {} + + char * allocate(size_t size, size_t alignment) + { + /** First check if in some allocated page blocks there are enough free memory to make allocation. + * If there is no such block create it and then allocate from it. + */ + + for (size_t i = 0; i < page_blocks.size(); ++i) + { + char * result = tryAllocateFromPageBlockWithIndex(size, alignment, i); + if (result) + return result; + } + + allocateNextPageBlock(size); + size_t allocated_page_index = page_blocks.size() - 1; + char * result = tryAllocateFromPageBlockWithIndex(size, alignment, allocated_page_index); + assert(result); + + return result; + } + + inline size_t getAllocatedSize() const { return allocated_size; } + + inline size_t getPageSize() const { return page_size; } + + ~PageArena() + { + protect(PROT_READ | PROT_WRITE); + + for (auto & page_block : page_blocks) + free(page_block.base()); + } + + void protect(int protection_flags) + { + /** The code is partially based on the LLVM codebase + * The LLVM Project is under the Apache License v2.0 with LLVM Exceptions. + */ + +# if defined(__NetBSD__) && defined(PROT_MPROTECT) + protection_flags |= PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC); +# endif + + bool invalidate_cache = (protection_flags & PROT_EXEC); + + for (const auto & block : page_blocks) + { +# if defined(__arm__) || defined(__aarch64__) + /// Certain ARM implementations treat icache clear instruction as a memory read, + /// and CPU segfaults on trying to clear cache on !PROT_READ page. + /// Therefore we need to temporarily add PROT_READ for the sake of flushing the instruction caches. + if (invalidate_cache && !(protection_flags & PROT_READ)) + { + int res = mprotect(block.base(), block.blockSize(), protection_flags | PROT_READ); + if (res != 0) + throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT); + + llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize()); + InvalidateCache = false; + } +# endif + int res = mprotect(block.base(), block.blockSize(), protection_flags); + if (res != 0) + throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT); + + if (invalidate_cache) + llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize()); + } + } + +private: + struct PageBlock + { + public: + PageBlock(void * pages_base_, size_t pages_size_, size_t page_size_) + : pages_base(pages_base_), pages_size(pages_size_), page_size(page_size_) + { + } + + inline void * base() const { return pages_base; } + inline size_t pagesSize() const { return pages_size; } + inline size_t pageSize() const { return page_size; } + inline size_t blockSize() const { return pages_size * page_size; } + + private: + void * pages_base; + size_t pages_size; + size_t page_size; + }; + + std::vector page_blocks; + + std::vector page_blocks_allocated_size; + + size_t page_size = 0; + + size_t allocated_size = 0; + + char * tryAllocateFromPageBlockWithIndex(size_t size, size_t alignment, size_t page_block_index) + { + assert(page_block_index < page_blocks.size()); + auto & pages_block = page_blocks[page_block_index]; + + size_t block_size = pages_block.blockSize(); + size_t & block_allocated_size = page_blocks_allocated_size[page_block_index]; + size_t block_free_size = block_size - block_allocated_size; + + uint8_t * pages_start = static_cast(pages_block.base()); + void * pages_offset = pages_start + block_allocated_size; + + auto * result = std::align(alignment, size, pages_offset, block_free_size); + + if (result) + { + block_allocated_size = reinterpret_cast(result) - pages_start; + block_allocated_size += size; + + return static_cast(result); + } + else + { + return nullptr; + } + } + + void allocateNextPageBlock(size_t size) + { + size_t pages_to_allocate_size = ((size / page_size) + 1) * 2; + size_t allocate_size = page_size * pages_to_allocate_size; + + void * buf = nullptr; + int res = posix_memalign(&buf, page_size, allocate_size); + + if (res != 0) + throwFromErrno( + fmt::format("Cannot allocate memory (posix_memalign) alignment {} size {}.", page_size, ReadableSize(allocate_size)), + ErrorCodes::CANNOT_ALLOCATE_MEMORY, + res); + + page_blocks.emplace_back(buf, pages_to_allocate_size, page_size); + page_blocks_allocated_size.emplace_back(0); + + allocated_size += allocate_size; + } +}; + // class AssemblyPrinter // { // public: @@ -104,46 +268,43 @@ class JITCompiler /** MemoryManager for module. * Keep total allocated size during RuntimeDyld linker execution. - * Actual compiled code memory is stored in llvm::SectionMemoryManager member, we cannot use ZeroBase optimization here - * because it is required for llvm::SectionMemoryManager::MemoryMapper to live longer than llvm::SectionMemoryManager. */ -class JITModuleMemoryManager +class JITModuleMemoryManager : public llvm::RTDyldMemoryManager { - class DefaultMMapper final : public llvm::SectionMemoryManager::MemoryMapper - { - public: - llvm::sys::MemoryBlock allocateMappedMemory( - llvm::SectionMemoryManager::AllocationPurpose Purpose [[maybe_unused]], - size_t NumBytes, - const llvm::sys::MemoryBlock * const NearBlock, - unsigned Flags, - std::error_code & EC) override - { - auto allocated_memory_block = llvm::sys::Memory::allocateMappedMemory(NumBytes, NearBlock, Flags, EC); - allocated_size += allocated_memory_block.allocatedSize(); - return allocated_memory_block; - } - - std::error_code protectMappedMemory(const llvm::sys::MemoryBlock & Block, unsigned Flags) override - { - return llvm::sys::Memory::protectMappedMemory(Block, Flags); - } +public: - std::error_code releaseMappedMemory(llvm::sys::MemoryBlock & M) override { return llvm::sys::Memory::releaseMappedMemory(M); } + uint8_t * allocateCodeSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef) override + { + return reinterpret_cast(ex_page_arena.allocate(size, alignment)); + } - size_t allocated_size = 0; - }; + uint8_t * allocateDataSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef, bool is_read_only) override + { + if (is_read_only) + return reinterpret_cast(ro_page_arena.allocate(size, alignment)); + else + return reinterpret_cast(rw_page_arena.allocate(size, alignment)); + } -public: - JITModuleMemoryManager() : manager(&mmaper) { } + bool finalizeMemory(std::string *) override + { + ro_page_arena.protect(PROT_READ); + ex_page_arena.protect(PROT_READ | PROT_EXEC); + return true; + } - inline size_t getAllocatedSize() const { return mmaper.allocated_size; } + inline size_t allocatedSize() const + { + size_t data_size = rw_page_arena.getAllocatedSize() + ro_page_arena.getAllocatedSize(); + size_t code_size = ex_page_arena.getAllocatedSize(); - inline llvm::SectionMemoryManager & getManager() { return manager; } + return data_size + code_size; + } private: - DefaultMMapper mmaper; - llvm::SectionMemoryManager manager; + PageArena rw_page_arena; + PageArena ro_page_arena; + PageArena ex_page_arena; }; class JITSymbolResolver : public llvm::LegacyJITSymbolResolver @@ -249,12 +410,12 @@ CHJIT::CompiledModule CHJIT::compileModule(std::unique_ptr module) } std::unique_ptr module_memory_manager = std::make_unique(); - llvm::RuntimeDyld dynamic_linker = {module_memory_manager->getManager(), *symbol_resolver}; + llvm::RuntimeDyld dynamic_linker = {*module_memory_manager, *symbol_resolver}; std::unique_ptr linked_object = dynamic_linker.loadObject(*object.get()); dynamic_linker.resolveRelocations(); - module_memory_manager->getManager().finalizeMemory(); + module_memory_manager->finalizeMemory(nullptr); CompiledModule compiled_module; @@ -275,7 +436,7 @@ CHJIT::CompiledModule CHJIT::compileModule(std::unique_ptr module) compiled_module.function_name_to_symbol.emplace(std::move(function_name), jit_symbol_address); } - compiled_module.size = module_memory_manager->getAllocatedSize(); + compiled_module.size = module_memory_manager->allocatedSize(); compiled_module.identifier = current_module_key; module_identifier_to_memory_manager[current_module_key] = std::move(module_memory_manager); From c24a42a58da08fca222ce453ae6ab9ee42ed43e7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 4 Sep 2021 16:36:44 +0300 Subject: [PATCH 040/472] Backport #28530 to 21.9: Function dictGet default implementation for nulls --- src/Functions/FunctionsExternalDictionaries.h | 65 ++++++++++++++++--- .../2014_dict_get_nullable_key.reference | 13 ++++ .../2014_dict_get_nullable_key.sql | 29 +++++++++ 3 files changed, 98 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/2014_dict_get_nullable_key.reference create mode 100644 tests/queries/0_stateless/2014_dict_get_nullable_key.sql diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 5f94a1e1f4bd..4f79b06b44a5 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -321,21 +321,32 @@ class FunctionDictGetNoType final : public IFunction Strings attribute_names = getAttributeNamesFromColumn(arguments[1].column, arguments[1].type); - DataTypes types; - auto dictionary_structure = helper.getDictionaryStructure(dictionary_name); + DataTypes attribute_types; + attribute_types.reserve(attribute_names.size()); for (auto & attribute_name : attribute_names) { /// We're extracting the return type from the dictionary's config, without loading the dictionary. - auto attribute = dictionary_structure.getAttribute(attribute_name); - types.emplace_back(attribute.type); + const auto & attribute = dictionary_structure.getAttribute(attribute_name); + attribute_types.emplace_back(attribute.type); } - if (types.size() > 1) - return std::make_shared(types, attribute_names); + bool key_is_nullable = arguments[2].type->isNullable(); + if (attribute_types.size() > 1) + { + if (key_is_nullable) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Function {} support nullable key only for single dictionary attribute", getName()); + + return std::make_shared(attribute_types, attribute_names); + } else - return types.front(); + { + if (key_is_nullable) + return makeNullable(attribute_types.front()); + else + return attribute_types.front(); + } } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -418,7 +429,9 @@ class FunctionDictGetNoType final : public IFunction default_cols = tuple_column->getColumnsCopy(); } else + { default_cols.emplace_back(result); + } } else { @@ -426,7 +439,16 @@ class FunctionDictGetNoType final : public IFunction default_cols.emplace_back(nullptr); } - const auto & key_col_with_type = arguments[2]; + auto key_col_with_type = arguments[2]; + + bool key_is_only_null = key_col_with_type.type->onlyNull(); + if (key_is_only_null) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + bool key_is_nullable = key_col_with_type.type->isNullable(); + if (key_is_nullable) + key_col_with_type = columnGetNested(key_col_with_type); + auto key_column = key_col_with_type.column; Columns key_columns; @@ -482,7 +504,26 @@ class FunctionDictGetNoType final : public IFunction key_types.emplace_back(range_col_type); } - return executeDictionaryRequest(dictionary, attribute_names, key_columns, key_types, result_type, default_cols); + DataTypePtr attribute_type = result_type; + if (key_is_nullable) + { + DataTypes attribute_types; + attribute_types.reserve(attribute_names.size()); + for (auto & attribute_name : attribute_names) + { + const auto & attribute = dictionary->getStructure().getAttribute(attribute_name); + attribute_types.emplace_back(attribute.type); + } + + attribute_type = attribute_types.front(); + } + + auto result_column = executeDictionaryRequest(dictionary, attribute_names, key_columns, key_types, attribute_type, default_cols); + + if (key_is_nullable) + result_column = wrapInNullable(result_column, {arguments[2]}, result_type, input_rows_count); + + return result_column; } private: @@ -511,12 +552,14 @@ class FunctionDictGetNoType final : public IFunction result = ColumnTuple::create(std::move(result_columns)); } else + { result = dictionary->getColumn( attribute_names[0], result_type, key_columns, key_types, default_cols.front()); + } return result; } @@ -526,7 +569,9 @@ class FunctionDictGetNoType final : public IFunction Strings attribute_names; if (const auto * name_col = checkAndGetColumnConst(column.get())) + { attribute_names.emplace_back(name_col->getValue()); + } else if (const auto * tuple_col_const = checkAndGetColumnConst(column.get())) { const ColumnTuple & tuple_col = assert_cast(tuple_col_const->getDataColumn()); @@ -551,10 +596,12 @@ class FunctionDictGetNoType final : public IFunction } } else + { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}, expected a const string or const tuple of const strings.", type->getName(), getName()); + } return attribute_names; } diff --git a/tests/queries/0_stateless/2014_dict_get_nullable_key.reference b/tests/queries/0_stateless/2014_dict_get_nullable_key.reference new file mode 100644 index 000000000000..08127d35829a --- /dev/null +++ b/tests/queries/0_stateless/2014_dict_get_nullable_key.reference @@ -0,0 +1,13 @@ +Non nullable value only null key +\N +Non nullable value nullable key +Test +\N + +Nullable value only null key +\N +Nullable value nullable key +Test +\N +\N +\N diff --git a/tests/queries/0_stateless/2014_dict_get_nullable_key.sql b/tests/queries/0_stateless/2014_dict_get_nullable_key.sql new file mode 100644 index 000000000000..d6c058b285f8 --- /dev/null +++ b/tests/queries/0_stateless/2014_dict_get_nullable_key.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS dictionary_non_nullable_source_table; +CREATE TABLE dictionary_non_nullable_source_table (id UInt64, value String) ENGINE=TinyLog; +INSERT INTO dictionary_non_nullable_source_table VALUES (0, 'Test'); + +DROP DICTIONARY IF EXISTS test_dictionary_non_nullable; +CREATE DICTIONARY test_dictionary_non_nullable (id UInt64, value String) PRIMARY KEY id LAYOUT(DIRECT()) SOURCE(CLICKHOUSE(TABLE 'dictionary_non_nullable_source_table')); + +SELECT 'Non nullable value only null key '; +SELECT dictGet('test_dictionary_non_nullable', 'value', NULL); +SELECT 'Non nullable value nullable key'; +SELECT dictGet('test_dictionary_non_nullable', 'value', arrayJoin([toUInt64(0), NULL, 1])); + +DROP DICTIONARY test_dictionary_non_nullable; +DROP TABLE dictionary_non_nullable_source_table; + +DROP TABLE IF EXISTS dictionary_nullable_source_table; +CREATE TABLE dictionary_nullable_source_table (id UInt64, value Nullable(String)) ENGINE=TinyLog; +INSERT INTO dictionary_nullable_source_table VALUES (0, 'Test'), (1, NULL); + +DROP DICTIONARY IF EXISTS test_dictionary_nullable; +CREATE DICTIONARY test_dictionary_nullable (id UInt64, value Nullable(String)) PRIMARY KEY id LAYOUT(DIRECT()) SOURCE(CLICKHOUSE(TABLE 'dictionary_nullable_source_table')); + +SELECT 'Nullable value only null key '; +SELECT dictGet('test_dictionary_nullable', 'value', NULL); +SELECT 'Nullable value nullable key'; +SELECT dictGet('test_dictionary_nullable', 'value', arrayJoin([toUInt64(0), NULL, 1, 2])); + +DROP DICTIONARY test_dictionary_nullable; +DROP TABLE dictionary_nullable_source_table; From b1d83a45e648d49e60ec0304851777d350f4ab6c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 6 Sep 2021 12:44:55 +0300 Subject: [PATCH 041/472] Backport #28519 to 21.9: Simplier sessions expiration in Keeper and add comments --- src/Coordination/KeeperServer.cpp | 2 +- src/Coordination/KeeperServer.h | 3 +- src/Coordination/KeeperStateMachine.cpp | 2 +- src/Coordination/KeeperStateMachine.h | 2 +- src/Coordination/KeeperStorage.cpp | 6 +- src/Coordination/KeeperStorage.h | 7 +- src/Coordination/SessionExpiryQueue.cpp | 74 +++++++++------ src/Coordination/SessionExpiryQueue.h | 32 +++++-- ...t_for_build.cpp => gtest_coordination.cpp} | 95 +++++++++++++++++++ 9 files changed, 178 insertions(+), 45 deletions(-) rename src/Coordination/tests/{gtest_for_build.cpp => gtest_coordination.cpp} (93%) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index b6b0ab8cb729..9caea5354bf2 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -357,7 +357,7 @@ void KeeperServer::waitInit() throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization"); } -std::unordered_set KeeperServer::getDeadSessions() +std::vector KeeperServer::getDeadSessions() { return state_machine->getDeadSessions(); } diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 282a7b48dfb6..485da144e959 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -63,7 +63,8 @@ class KeeperServer RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests); - std::unordered_set getDeadSessions(); + /// Return set of the non-active sessions + std::vector getDeadSessions(); bool isLeader() const; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 431ad41c6d17..2a8e9a55c66b 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -289,7 +289,7 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi responses_queue.push(response); } -std::unordered_set KeeperStateMachine::getDeadSessions() +std::vector KeeperStateMachine::getDeadSessions() { std::lock_guard lock(storage_and_responses_lock); return storage->getDeadSessions(); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index c34981b1252d..f9cfb8c41fe4 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -60,7 +60,7 @@ class KeeperStateMachine : public nuraft::state_machine void processReadRequest(const KeeperStorage::RequestForSession & request_for_session); - std::unordered_set getDeadSessions(); + std::vector getDeadSessions(); void shutdownStorage(); diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 46905c53113c..e287af061dcf 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1078,8 +1078,10 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina zxid = *new_last_zxid; } - session_expiry_queue.update(session_id, session_and_timeout[session_id]); - if (zk_request->getOpNum() == Coordination::OpNum::Close) + /// ZooKeeper update sessions expirity for each request, not only for heartbeats + session_expiry_queue.addNewSessionOrUpdate(session_id, session_and_timeout[session_id]); + + if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { auto it = ephemerals.find(session_id); if (it != ephemerals.end()) diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index e3cb0f59fdc4..611bc6de69ac 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -106,14 +106,14 @@ class KeeperStorage { auto result = session_id_counter++; session_and_timeout.emplace(result, session_timeout_ms); - session_expiry_queue.update(result, session_timeout_ms); + session_expiry_queue.addNewSessionOrUpdate(result, session_timeout_ms); return result; } void addSessionID(int64_t session_id, int64_t session_timeout_ms) { session_and_timeout.emplace(session_id, session_timeout_ms); - session_expiry_queue.update(session_id, session_timeout_ms); + session_expiry_queue.addNewSessionOrUpdate(session_id, session_timeout_ms); } ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, std::optional new_last_zxid, bool check_acl = true); @@ -145,7 +145,8 @@ class KeeperStorage return session_and_timeout; } - std::unordered_set getDeadSessions() + /// Get all dead sessions + std::vector getDeadSessions() { return session_expiry_queue.getExpiredSessions(); } diff --git a/src/Coordination/SessionExpiryQueue.cpp b/src/Coordination/SessionExpiryQueue.cpp index 51837087af5e..b6d3843f1d7d 100644 --- a/src/Coordination/SessionExpiryQueue.cpp +++ b/src/Coordination/SessionExpiryQueue.cpp @@ -1,82 +1,96 @@ #include #include + namespace DB { bool SessionExpiryQueue::remove(int64_t session_id) { - auto session_it = session_to_timeout.find(session_id); - if (session_it != session_to_timeout.end()) + auto session_it = session_to_expiration_time.find(session_id); + if (session_it != session_to_expiration_time.end()) { auto set_it = expiry_to_sessions.find(session_it->second); if (set_it != expiry_to_sessions.end()) set_it->second.erase(session_id); + /// No more sessions in this bucket + if (set_it->second.empty()) + expiry_to_sessions.erase(set_it); + + session_to_expiration_time.erase(session_it); + return true; } return false; } -bool SessionExpiryQueue::update(int64_t session_id, int64_t timeout_ms) +void SessionExpiryQueue::addNewSessionOrUpdate(int64_t session_id, int64_t timeout_ms) { - auto session_it = session_to_timeout.find(session_id); int64_t now = getNowMilliseconds(); + /// round up to next interval int64_t new_expiry_time = roundToNextInterval(now + timeout_ms); - if (session_it != session_to_timeout.end()) + auto session_it = session_to_expiration_time.find(session_id); + /// We already registered this session + if (session_it != session_to_expiration_time.end()) { - if (new_expiry_time == session_it->second) - return false; + int64_t prev_expiry_time = session_it->second; + session_it->second = new_expiry_time; + /// Nothing changed, session stay in the some bucket + if (new_expiry_time == prev_expiry_time) + return; + /// This bucket doesn't exist, let's create it auto set_it = expiry_to_sessions.find(new_expiry_time); if (set_it == expiry_to_sessions.end()) std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set()); + /// Add session to the next bucket set_it->second.insert(session_id); - int64_t prev_expiry_time = session_it->second; - if (prev_expiry_time != new_expiry_time) - { - auto prev_set_it = expiry_to_sessions.find(prev_expiry_time); - if (prev_set_it != expiry_to_sessions.end()) - prev_set_it->second.erase(session_id); - } - session_it->second = new_expiry_time; - return true; + auto prev_set_it = expiry_to_sessions.find(prev_expiry_time); + /// Remove session from previous bucket + if (prev_set_it != expiry_to_sessions.end()) + prev_set_it->second.erase(session_id); + + /// No more sessions in this bucket + if (prev_set_it->second.empty()) + expiry_to_sessions.erase(prev_set_it); } else { - session_to_timeout[session_id] = new_expiry_time; + /// Just add sessions to the new bucket + session_to_expiration_time[session_id] = new_expiry_time; + auto set_it = expiry_to_sessions.find(new_expiry_time); if (set_it == expiry_to_sessions.end()) std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set()); + set_it->second.insert(session_id); - return false; } } -std::unordered_set SessionExpiryQueue::getExpiredSessions() +std::vector SessionExpiryQueue::getExpiredSessions() const { int64_t now = getNowMilliseconds(); - if (now < next_expiration_time) - return {}; + std::vector result; - auto set_it = expiry_to_sessions.find(next_expiration_time); - int64_t new_expiration_time = next_expiration_time + expiration_interval; - next_expiration_time = new_expiration_time; - if (set_it != expiry_to_sessions.end()) + /// Check all buckets + for (const auto & [expire_time, expired_sessions] : expiry_to_sessions) { - auto result = set_it->second; - expiry_to_sessions.erase(set_it); - return result; + if (expire_time <= now) + result.insert(result.end(), expired_sessions.begin(), expired_sessions.end()); + else + break; } - return {}; + + return result; } void SessionExpiryQueue::clear() { - session_to_timeout.clear(); + session_to_expiration_time.clear(); expiry_to_sessions.clear(); } diff --git a/src/Coordination/SessionExpiryQueue.h b/src/Coordination/SessionExpiryQueue.h index dff629a24322..8581800834da 100644 --- a/src/Coordination/SessionExpiryQueue.h +++ b/src/Coordination/SessionExpiryQueue.h @@ -1,19 +1,32 @@ #pragma once +#include #include #include +#include #include namespace DB { +/// Simple class for checking expired sessions. Main idea -- to round sessions +/// timeouts and place all sessions into buckets rounded by their expired time. +/// So we will have not too many different buckets and can check expired +/// sessions quite fast. +/// So buckets looks like this: +/// [1630580418000] -> {1, 5, 6} +/// [1630580418500] -> {2, 3} +/// ... +/// When new session appear it's added to the existing bucket or create new bucket. class SessionExpiryQueue { private: - std::unordered_map session_to_timeout; - std::unordered_map> expiry_to_sessions; + /// Session -> timeout ms + std::unordered_map session_to_expiration_time; + + /// Expire time -> session expire near this time + std::map> expiry_to_sessions; int64_t expiration_interval; - int64_t next_expiration_time; static int64_t getNowMilliseconds() { @@ -21,23 +34,30 @@ class SessionExpiryQueue return duration_cast(system_clock::now().time_since_epoch()).count(); } + /// Round time to the next expiration interval. The result used as a key for + /// expiry_to_sessions map. int64_t roundToNextInterval(int64_t time) const { return (time / expiration_interval + 1) * expiration_interval; } public: + /// expiration_interval -- how often we will check new sessions and how small + /// buckets we will have. In ZooKeeper normal session timeout is around 30 seconds + /// and expiration_interval is about 500ms. explicit SessionExpiryQueue(int64_t expiration_interval_) : expiration_interval(expiration_interval_) - , next_expiration_time(roundToNextInterval(getNowMilliseconds())) { } + /// Session was actually removed bool remove(int64_t session_id); - bool update(int64_t session_id, int64_t timeout_ms); + /// Update session expiry time (must be called on hearbeats) + void addNewSessionOrUpdate(int64_t session_id, int64_t timeout_ms); - std::unordered_set getExpiredSessions(); + /// Get all expired sessions + std::vector getExpiredSessions() const; void clear(); }; diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_coordination.cpp similarity index 93% rename from src/Coordination/tests/gtest_for_build.cpp rename to src/Coordination/tests/gtest_coordination.cpp index 9a744d2bbedd..da0f39edb02c 100644 --- a/src/Coordination/tests/gtest_for_build.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1299,6 +1299,101 @@ TEST(CoordinationTest, TestEphemeralNodeRemove) } +TEST(CoordinationTest, TestRotateIntervalChanges) +{ + using namespace Coordination; + ChangelogDirTest snapshots("./logs"); + { + DB::KeeperLogStore changelog("./logs", 100, true); + + changelog.init(0, 3); + for (size_t i = 1; i < 55; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog.append(entry); + changelog.end_of_append_batch(0, 0); + } + } + + + EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin")); + + DB::KeeperLogStore changelog_1("./logs", 10, true); + changelog_1.init(0, 50); + for (size_t i = 0; i < 55; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(100 + i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog_1.append(entry); + changelog_1.end_of_append_batch(0, 0); + } + + EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin")); + + DB::KeeperLogStore changelog_2("./logs", 7, true); + changelog_2.init(98, 55); + + for (size_t i = 0; i < 17; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(200 + i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog_2.append(entry); + changelog_2.end_of_append_batch(0, 0); + } + + changelog_2.compact(105); + + EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_111_117.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_118_124.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin")); + + DB::KeeperLogStore changelog_3("./logs", 5, true); + changelog_3.init(116, 3); + for (size_t i = 0; i < 17; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(300 + i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog_3.append(entry); + changelog_3.end_of_append_batch(0, 0); + } + + changelog_3.compact(125); + EXPECT_FALSE(fs::exists("./logs/changelog_101_110.bin")); + EXPECT_FALSE(fs::exists("./logs/changelog_111_117.bin")); + EXPECT_FALSE(fs::exists("./logs/changelog_118_124.bin")); + + EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_132_136.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_137_141.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_142_146.bin")); +} + +TEST(CoordinationTest, TestSessionExpiryQueue) +{ + using namespace Coordination; + SessionExpiryQueue queue(500); + + queue.addNewSessionOrUpdate(1, 1000); + + for (size_t i = 0; i < 2; ++i) + { + EXPECT_EQ(queue.getExpiredSessions(), std::vector({})); + std::this_thread::sleep_for(std::chrono::milliseconds(400)); + } + + std::this_thread::sleep_for(std::chrono::milliseconds(700)); + EXPECT_EQ(queue.getExpiredSessions(), std::vector({1})); +} + + int main(int argc, char ** argv) { Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); From 8e75a0a9d37781827be26f5e50a25d0bee157677 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 6 Sep 2021 12:45:54 +0300 Subject: [PATCH 042/472] Backport #28360 to 21.9: Fix keeper logs disappearance --- src/Coordination/Changelog.cpp | 199 +++++++++++++----- src/Coordination/Changelog.h | 17 +- src/Coordination/KeeperStorageDispatcher.cpp | 13 +- ...t_for_build.cpp => gtest_coordination.cpp} | 76 +++++++ src/Server/KeeperTCPHandler.cpp | 2 +- 5 files changed, 240 insertions(+), 67 deletions(-) rename src/Coordination/tests/{gtest_for_build.cpp => gtest_coordination.cpp} (94%) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 6ec9b17d0a79..5518a5399c99 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -142,8 +142,22 @@ class ChangelogWriter struct ChangelogReadResult { - uint64_t entries_read; + /// Total entries read from log including skipped. + /// Useful when we decide to continue to write in the same log and want to know + /// how many entries was already written in it. + uint64_t total_entries_read_from_log; + + /// First index in log + uint64_t log_start_index; + + /// First entry actually read log (not including skipped) uint64_t first_read_index; + /// Last entry read from log (last entry in log) + /// When we don't skip anything last_read_index - first_read_index = total_entries_read_from_log. + /// But when some entries from the start of log can be skipped because they are not required. + uint64_t last_read_index; + + /// last offset we were able to read from log off_t last_position; bool error; }; @@ -156,6 +170,7 @@ class ChangelogReader , read_buf(filepath) {} + /// start_log_index -- all entries with index < start_log_index will be skipped, but accounted into total_entries_read_from_log ChangelogReadResult readChangelog(IndexToLogEntry & logs, uint64_t start_log_index, IndexToOffset & index_to_offset, Poco::Logger * log) { uint64_t previous_index = 0; @@ -205,7 +220,7 @@ class ChangelogReader if (logs.count(record.header.index) != 0) throw Exception(ErrorCodes::CORRUPTED_DATA, "Duplicated index id {} in log {}", record.header.index, filepath); - result.entries_read += 1; + result.total_entries_read_from_log += 1; if (record.header.index < start_log_index) { @@ -218,8 +233,10 @@ class ChangelogReader logs.emplace(record.header.index, log_entry); index_to_offset[record.header.index] = result.last_position; - if (result.entries_read % 50000 == 0) - LOG_TRACE(log, "Reading changelog from path {}, entries {}", filepath, result.entries_read); + result.last_read_index = record.header.index; + + if (result.total_entries_read_from_log % 50000 == 0) + LOG_TRACE(log, "Reading changelog from path {}, entries {}", filepath, result.total_entries_read_from_log); } } catch (const Exception & ex) @@ -235,7 +252,7 @@ class ChangelogReader result.error = true; tryLogCurrentException(log); } - LOG_TRACE(log, "Totally read from changelog {} {} entries", filepath, result.entries_read); + LOG_TRACE(log, "Totally read from changelog {} {} entries", filepath, result.total_entries_read_from_log); return result; } @@ -268,11 +285,10 @@ Changelog::Changelog( void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep) { - uint64_t total_read = 0; - uint64_t entries_in_last = 0; - uint64_t incomplete_log_index = 0; - ChangelogReadResult result{}; - uint64_t first_read_index = 0; + std::optional last_log_read_result; + + /// Last log has some free space to write + bool last_log_is_not_complete = false; uint64_t start_to_read_from = last_commited_log_index; if (start_to_read_from > logs_to_keep) @@ -280,20 +296,22 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin else start_to_read_from = 1; - bool started = false; + /// Got through changelog files in order of start_index for (const auto & [changelog_start_index, changelog_description] : existing_changelogs) { - entries_in_last = changelog_description.to_log_index - changelog_description.from_log_index + 1; - if (changelog_description.to_log_index >= start_to_read_from) { - if (!started) + if (!last_log_read_result) /// still nothing was read { if (changelog_description.from_log_index > last_commited_log_index && (changelog_description.from_log_index - last_commited_log_index) > 1) { LOG_ERROR(log, "Some records was lost, last committed log index {}, smallest available log index on disk {}. Hopefully will receive missing records from leader.", last_commited_log_index, changelog_description.from_log_index); - incomplete_log_index = changelog_start_index; - break; + /// Nothing to do with our more fresh log, leader will overwrite them, so remove everything and just start from last_commited_index + removeAllLogs(); + min_log_id = last_commited_log_index; + max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1; + rotate(max_log_id + 1); + return; } else if (changelog_description.from_log_index > start_to_read_from) LOG_WARNING(log, "Don't have required amount of reserved log records. Need to read from {}, smallest available log index on disk {}.", start_to_read_from, changelog_description.from_log_index); @@ -302,60 +320,99 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin started = true; ChangelogReader reader(changelog_description.path); - result = reader.readChangelog(logs, start_to_read_from, index_to_start_pos, log); - if (first_read_index == 0) - first_read_index = result.first_read_index; + last_log_read_result = reader.readChangelog(logs, start_to_read_from, index_to_start_pos, log); + + /// Otherwise we have already initialized it + if (min_log_id == 0) + min_log_id = last_log_read_result->first_read_index; - total_read += result.entries_read; + if (last_log_read_result->last_read_index != 0) + max_log_id = last_log_read_result->last_read_index; + last_log_read_result->log_start_index = changelog_description.from_log_index; + + /// How many entries we have in the last changelog + uint64_t expected_entries_in_log = changelog_description.expectedEntriesCountInLog(); /// May happen after truncate, crash or simply unfinished log - if (result.entries_read < entries_in_last) + if (last_log_read_result->total_entries_read_from_log < expected_entries_in_log) { - incomplete_log_index = changelog_start_index; + last_log_is_not_complete = true; break; } } } - if (first_read_index != 0) - start_index = first_read_index; - else - start_index = last_commited_log_index; + /// we can have empty log (with zero entries) and last_log_read_result will be initialized + if (!last_log_read_result || min_log_id == 0) /// We just may have no logs (only snapshot or nothing) + { + /// Just to be sure they don't exist + removeAllLogs(); - if (incomplete_log_index != 0) + min_log_id = last_commited_log_index; + max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1; + } + else if (last_log_is_not_complete) /// if it's complete just start new one { - auto start_remove_from = existing_changelogs.begin(); - if (started) - start_remove_from = existing_changelogs.upper_bound(incomplete_log_index); + assert(last_log_read_result != std::nullopt); + /// Actually they shouldn't exist, but to be sure we remove them + removeAllLogsAfter(last_log_read_result->log_start_index); - /// All subsequent logs shouldn't exist. But they may exist if we crashed after writeAt started. Remove them. - for (auto itr = start_remove_from; itr != existing_changelogs.end();) - { - LOG_WARNING(log, "Removing changelog {}, because it's goes after broken changelog entry", itr->second.path); - std::filesystem::remove(itr->second.path); - itr = existing_changelogs.erase(itr); - } + assert(!existing_changelogs.empty()); + assert(existing_changelogs.find(last_log_read_result->log_start_index)->first == existing_changelogs.rbegin()->first); - /// Continue to write into existing log - if (!existing_changelogs.empty()) - { - auto description = existing_changelogs.rbegin()->second; - LOG_TRACE(log, "Continue to write into {}", description.path); - current_writer = std::make_unique(description.path, WriteMode::Append, description.from_log_index); - current_writer->setEntriesWritten(result.entries_read); + /// Continue to write into incomplete existing log + auto description = existing_changelogs[last_log_read_result->log_start_index]; - /// Truncate all broken entries from log - if (result.error) - { - LOG_WARNING(log, "Read finished with error, truncating all broken log entries"); - current_writer->truncateToLength(result.last_position); - } - } + if (last_log_read_result->error) + initWriter(description, last_log_read_result->total_entries_read_from_log, /* truncate_to_offset = */ last_log_read_result->last_position); + else + initWriter(description, last_log_read_result->total_entries_read_from_log); } - /// Start new log if we don't initialize writer from previous log + /// Start new log if we don't initialize writer from previous log. All logs can be "complete". if (!current_writer) - rotate(start_index + total_read); + rotate(max_log_id + 1); +} + + +void Changelog::initWriter(const ChangelogFileDescription & description, uint64_t entries_already_written, std::optional truncate_to_offset) +{ + if (description.expectedEntriesCountInLog() != rotate_interval) + LOG_TRACE(log, "Looks like rotate_logs_interval was changed, current {}, expected entries in last log {}", rotate_interval, description.expectedEntriesCountInLog()); + + LOG_TRACE(log, "Continue to write into {}", description.path); + current_writer = std::make_unique(description.path, WriteMode::Append, description.from_log_index); + current_writer->setEntriesWritten(entries_already_written); + + if (truncate_to_offset) + { + LOG_WARNING(log, "Changelog {} contain broken enties, truncating all broken log entries", description.path); + current_writer->truncateToLength(*truncate_to_offset); + } +} + +void Changelog::removeAllLogsAfter(uint64_t start_to_remove_from_id) +{ + auto start_to_remove_from = existing_changelogs.upper_bound(start_to_remove_from_id); + + /// All subsequent logs shouldn't exist. But they may exist if we crashed after writeAt started. Remove them. + for (auto itr = start_to_remove_from; itr != existing_changelogs.end();) + { + LOG_WARNING(log, "Removing changelog {}, because it's goes after broken changelog entry", itr->second.path); + std::filesystem::remove(itr->second.path); + itr = existing_changelogs.erase(itr); + } +} + +void Changelog::removeAllLogs() +{ + LOG_WARNING(log, "Removing all changelogs"); + for (auto itr = existing_changelogs.begin(); itr != existing_changelogs.end();) + { + LOG_WARNING(log, "Removing changelog {}, because it's goes after broken changelog entry", itr->second.path); + std::filesystem::remove(itr->second.path); + itr = existing_changelogs.erase(itr); + } } void Changelog::rotate(uint64_t new_start_log_index) @@ -399,7 +456,7 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry) throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records"); if (logs.empty()) - start_index = index; + min_log_id = index; if (current_writer->getEntriesWritten() == rotate_interval) rotate(index); @@ -409,6 +466,7 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry) throw Exception(ErrorCodes::LOGICAL_ERROR, "Record with index {} already exists", index); logs[index] = makeClone(log_entry); + max_log_id = index; } void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) @@ -462,11 +520,29 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) void Changelog::compact(uint64_t up_to_log_index) { + LOG_INFO(log, "Compact logs up to log index {}, our max log id is {}", up_to_log_index, max_log_id); + + bool remove_all_logs = false; + if (up_to_log_index > max_log_id) + { + LOG_INFO(log, "Seems like this node recovers from leaders snapshot, removing all logs"); + /// If we received snapshot from leader we may compact up to more fresh log + max_log_id = up_to_log_index; + remove_all_logs = true; + } + + bool need_rotate = false; for (auto itr = existing_changelogs.begin(); itr != existing_changelogs.end();) { /// Remove all completely outdated changelog files - if (itr->second.to_log_index <= up_to_log_index) + if (remove_all_logs || itr->second.to_log_index <= up_to_log_index) { + if (current_writer && itr->second.from_log_index == current_writer->getStartIndex()) + { + LOG_INFO(log, "Trying to remove log {} which is current active log for write. Possibly this node recovers from snapshot", itr->second.path); + need_rotate = true; + current_writer.reset(); + } LOG_INFO(log, "Removing changelog {} because of compaction", itr->second.path); std::erase_if(index_to_start_pos, [right_index = itr->second.to_log_index] (const auto & item) { return item.first <= right_index; }); @@ -476,18 +552,25 @@ void Changelog::compact(uint64_t up_to_log_index) else /// Files are ordered, so all subsequent should exist break; } - start_index = up_to_log_index + 1; + /// Compaction from the past is possible, so don't make our min_log_id smaller. + min_log_id = std::max(min_log_id, up_to_log_index + 1); std::erase_if(logs, [up_to_log_index] (const auto & item) { return item.first <= up_to_log_index; }); + + if (need_rotate) + rotate(up_to_log_index + 1); + + LOG_INFO(log, "Compaction up to {} finished new min index {}, new max index {}", up_to_log_index, min_log_id, max_log_id); } LogEntryPtr Changelog::getLastEntry() const { static LogEntryPtr fake_entry = nuraft::cs_new(0, nuraft::buffer::alloc(sizeof(uint64_t))); - uint64_t next_index = getNextEntryIndex() - 1; - auto entry = logs.find(next_index); + auto entry = logs.find(max_log_id); if (entry == logs.end()) + { return fake_entry; + } return entry->second; } diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 893fe16abdf6..6c75fba46639 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -2,6 +2,7 @@ #include // Y_IGNORE #include +#include #include #include #include @@ -81,12 +82,12 @@ class Changelog uint64_t getNextEntryIndex() const { - return start_index + logs.size(); + return max_log_id + 1; } uint64_t getStartIndex() const { - return start_index; + return min_log_id; } /// Last entry in log, or fake entry with term 0 if log is empty @@ -122,6 +123,13 @@ class Changelog /// Starts new file [new_start_log_index, new_start_log_index + rotate_interval] void rotate(uint64_t new_start_log_index); + /// Remove all changelogs from disk with start_index bigger than start_to_remove_from_id + void removeAllLogsAfter(uint64_t start_to_remove_from_id); + /// Remove all logs from disk + void removeAllLogs(); + /// Init writer for existing log with some entries already written + void initWriter(const ChangelogFileDescription & description, uint64_t entries_already_written, std::optional truncate_to_offset = {}); + private: const std::string changelogs_dir; const uint64_t rotate_interval; @@ -132,7 +140,10 @@ class Changelog std::unique_ptr current_writer; IndexToOffset index_to_start_pos; IndexToLogEntry logs; - uint64_t start_index = 0; + /// Start log_id which exists in all "active" logs + /// min_log_id + 1 == max_log_id means empty log storage for NuRaft + uint64_t min_log_id = 0; + uint64_t max_log_id = 0; }; } diff --git a/src/Coordination/KeeperStorageDispatcher.cpp b/src/Coordination/KeeperStorageDispatcher.cpp index 7c416b38d8ba..dbb8ca1183f3 100644 --- a/src/Coordination/KeeperStorageDispatcher.cpp +++ b/src/Coordination/KeeperStorageDispatcher.cpp @@ -288,10 +288,12 @@ void KeeperStorageDispatcher::shutdown() if (session_cleaner_thread.joinable()) session_cleaner_thread.join(); - /// FIXME not the best way to notify - requests_queue->push({}); - if (request_thread.joinable()) - request_thread.join(); + if (requests_queue) + { + requests_queue->push({}); + if (request_thread.joinable()) + request_thread.join(); + } responses_queue.push({}); if (responses_thread.joinable()) @@ -306,7 +308,8 @@ void KeeperStorageDispatcher::shutdown() server->shutdown(); KeeperStorage::RequestForSession request_for_session; - while (requests_queue->tryPop(request_for_session)) + /// Set session expired for all pending requests + while (requests_queue && requests_queue->tryPop(request_for_session)) { if (request_for_session.request) { diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_coordination.cpp similarity index 94% rename from src/Coordination/tests/gtest_for_build.cpp rename to src/Coordination/tests/gtest_coordination.cpp index 9a744d2bbedd..d3fa4511fa66 100644 --- a/src/Coordination/tests/gtest_for_build.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -404,6 +404,7 @@ TEST(CoordinationTest, ChangelogTestCompaction) /// And we able to read it DB::KeeperLogStore changelog_reader("./logs", 5, true); changelog_reader.init(7, 0); + EXPECT_EQ(changelog_reader.size(), 1); EXPECT_EQ(changelog_reader.start_index(), 7); EXPECT_EQ(changelog_reader.next_slot(), 8); @@ -1298,6 +1299,81 @@ TEST(CoordinationTest, TestEphemeralNodeRemove) EXPECT_EQ(storage.ephemerals.size(), 0); } +TEST(CoordinationTest, TestRotateIntervalChanges) +{ + using namespace Coordination; + ChangelogDirTest snapshots("./logs"); + { + DB::KeeperLogStore changelog("./logs", 100, true); + + changelog.init(0, 3); + for (size_t i = 1; i < 55; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog.append(entry); + changelog.end_of_append_batch(0, 0); + } + } + + EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin")); + + DB::KeeperLogStore changelog_1("./logs", 10, true); + changelog_1.init(0, 50); + for (size_t i = 0; i < 55; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(100 + i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog_1.append(entry); + changelog_1.end_of_append_batch(0, 0); + } + + EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin")); + + DB::KeeperLogStore changelog_2("./logs", 7, true); + changelog_2.init(98, 55); + + for (size_t i = 0; i < 17; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(200 + i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog_2.append(entry); + changelog_2.end_of_append_batch(0, 0); + } + + changelog_2.compact(105); + EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_111_117.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_118_124.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin")); + + DB::KeeperLogStore changelog_3("./logs", 5, true); + changelog_3.init(116, 3); + for (size_t i = 0; i < 17; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(300 + i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog_3.append(entry); + changelog_3.end_of_append_batch(0, 0); + } + + changelog_3.compact(125); + EXPECT_FALSE(fs::exists("./logs/changelog_101_110.bin")); + EXPECT_FALSE(fs::exists("./logs/changelog_111_117.bin")); + EXPECT_FALSE(fs::exists("./logs/changelog_118_124.bin")); + + EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_132_136.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_137_141.bin")); + EXPECT_TRUE(fs::exists("./logs/changelog_142_146.bin")); +} + int main(int argc, char ** argv) { diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index df40a78749bf..c94c3ed1874e 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -192,7 +192,7 @@ struct SocketInterruptablePollWrapper KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_) : Poco::Net::TCPServerConnection(socket_) , server(server_) - , log(&Poco::Logger::get("NuKeeperTCPHandler")) + , log(&Poco::Logger::get("KeeperTCPHandler")) , global_context(Context::createCopy(server.context())) , keeper_dispatcher(global_context->getKeeperStorageDispatcher()) , operation_timeout(0, global_context->getConfigRef().getUInt("keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000) From d2b83039e641b1192b16a214409686ca68d384f7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 6 Sep 2021 16:44:43 +0300 Subject: [PATCH 043/472] Backport #28533 to 21.9: Fix UUID overlap in DROP TABLE for internal DDL from MaterializeMySQL --- programs/server/Server.cpp | 4 ++++ src/Interpreters/DatabaseCatalog.cpp | 1 - src/Interpreters/DatabaseCatalog.h | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index e3ec6feb1a50..f97c8d314e99 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1104,6 +1104,10 @@ if (ThreadFuzzer::instance().isEffective()) global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper); + /// Firstly remove partially dropped databases, to avoid race with MaterializedMySQLSyncThread, + /// that may execute DROP before loadMarkedAsDroppedTables() in background, + /// and so loadMarkedAsDroppedTables() will find it and try to add, and UUID will overlap. + database_catalog.loadMarkedAsDroppedTables(); /// Then, load remaining databases loadMetadata(global_context, default_database); database_catalog.loadDatabases(); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index fd6b5b9a8103..0cf85fdde681 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -146,7 +146,6 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase() void DatabaseCatalog::loadDatabases() { - loadMarkedAsDroppedTables(); auto task_holder = getContext()->getSchedulePool().createTask("DatabaseCatalog", [this](){ this->dropTableDataTask(); }); drop_task = std::make_unique(std::move(task_holder)); (*drop_task)->activate(); diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 74bfb814ce43..071b80690df2 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -130,6 +130,7 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext void initializeAndLoadTemporaryDatabase(); void loadDatabases(); + void loadMarkedAsDroppedTables(); /// Get an object that protects the table from concurrently executing multiple DDL operations. DDLGuardPtr getDDLGuard(const String & database, const String & table); @@ -240,7 +241,6 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext }; using TablesMarkedAsDropped = std::list; - void loadMarkedAsDroppedTables(); void dropTableDataTask(); void dropTableFinally(const TableMarkedAsDropped & table); From 89f73336563eb1afaa935037e0cbe5f5fc5468cb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 7 Sep 2021 14:51:18 +0300 Subject: [PATCH 044/472] Backport #28315 to 21.9: Fix NOT-IN index optimization when not all key columns are used. --- src/Interpreters/Set.cpp | 8 ++++---- src/Interpreters/Set.h | 4 +++- .../01891_not_in_partition_prune.reference | 2 ++ .../0_stateless/01891_not_in_partition_prune.sql | 15 +++++++++++++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 5ab59ba3f079..5304859aeea6 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -402,8 +402,8 @@ void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) c + data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH); } -MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector && index_mapping_) - : indexes_mapping(std::move(index_mapping_)) +MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector && indexes_mapping_) + : has_all_keys(set_elements.size() == indexes_mapping_.size()), indexes_mapping(std::move(indexes_mapping_)) { std::sort(indexes_mapping.begin(), indexes_mapping.end(), [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) @@ -548,11 +548,11 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, break; } } - if (one_element_range) + if (one_element_range && has_all_keys) { /// Here we know that there is one element in range. /// The main difference with the normal case is that we can definitely say that - /// condition in this range always TRUE (can_be_false = 0) xor always FALSE (can_be_true = 0). + /// condition in this range is always TRUE (can_be_false = 0) or always FALSE (can_be_true = 0). /// Check if it's an empty range if (!left_included || !right_included) diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 727a2c144a19..578913dd0d2d 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -208,7 +208,7 @@ class MergeTreeSetIndex std::vector functions; }; - MergeTreeSetIndex(const Columns & set_elements, std::vector && index_mapping_); + MergeTreeSetIndex(const Columns & set_elements, std::vector && indexes_mapping_); size_t size() const { return ordered_set.at(0)->size(); } @@ -217,6 +217,8 @@ class MergeTreeSetIndex BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types) const; private: + // If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element. + bool has_all_keys; Columns ordered_set; std::vector indexes_mapping; diff --git a/tests/queries/0_stateless/01891_not_in_partition_prune.reference b/tests/queries/0_stateless/01891_not_in_partition_prune.reference index 628053cd4f88..9d2517ad760e 100644 --- a/tests/queries/0_stateless/01891_not_in_partition_prune.reference +++ b/tests/queries/0_stateless/01891_not_in_partition_prune.reference @@ -4,3 +4,5 @@ 7 107 8 108 9 109 +1970-01-01 1 one +1970-01-01 3 three diff --git a/tests/queries/0_stateless/01891_not_in_partition_prune.sql b/tests/queries/0_stateless/01891_not_in_partition_prune.sql index edbfad93e5df..5bf90fdd65c0 100644 --- a/tests/queries/0_stateless/01891_not_in_partition_prune.sql +++ b/tests/queries/0_stateless/01891_not_in_partition_prune.sql @@ -8,3 +8,18 @@ set max_rows_to_read = 5; select * from test1 where i not in (1,2,3,4,5) order by i; drop table test1; + +drop table if exists t1; +drop table if exists t2; + +create table t1 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date; +create table t2 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date; + +insert into t1(a, b) values (1, 'one'), (2, 'two'); +insert into t2(a, b) values (2, 'two'), (3, 'three'); + +select date, a, b from t1 where (date, a, b) NOT IN (select date,a,b from t2); +select date, a, b from t2 where (date, a, b) NOT IN (select date,a,b from t1); + +drop table t1; +drop table t2; From cdcfd4f61a3ee8bac37773b49f3299060b304f57 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 7 Sep 2021 18:55:54 +0300 Subject: [PATCH 045/472] Backport #28663 to 21.9: fix getNumberOfArguments() for s2Rect functions --- src/Functions/s2RectAdd.cpp | 2 +- src/Functions/s2RectContains.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/s2RectAdd.cpp b/src/Functions/s2RectAdd.cpp index 90578567da23..d9b12ce22a31 100644 --- a/src/Functions/s2RectAdd.cpp +++ b/src/Functions/s2RectAdd.cpp @@ -41,7 +41,7 @@ class FunctionS2RectAdd : public IFunction return name; } - size_t getNumberOfArguments() const override { return 4; } + size_t getNumberOfArguments() const override { return 3; } bool useDefaultImplementationForConstants() const override { return true; } diff --git a/src/Functions/s2RectContains.cpp b/src/Functions/s2RectContains.cpp index 5f556c3ec14f..27fed9e20310 100644 --- a/src/Functions/s2RectContains.cpp +++ b/src/Functions/s2RectContains.cpp @@ -41,7 +41,7 @@ class FunctionS2RectContains : public IFunction return name; } - size_t getNumberOfArguments() const override { return 4; } + size_t getNumberOfArguments() const override { return 3; } bool useDefaultImplementationForConstants() const override { return true; } From 12ad669b6a6e08d9bb08520bf830aa9c9be44d1a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 8 Sep 2021 04:53:55 +0300 Subject: [PATCH 046/472] Backport #28685 to 21.9: Add Settings.Names, Settings.Values aliases for system.processes table --- src/Storages/System/StorageSystemProcesses.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index e2685af77181..5e6ba37226cc 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -73,7 +73,9 @@ NamesAndAliases StorageSystemProcesses::getNamesAndAliases() return { {"ProfileEvents.Names", {std::make_shared(std::make_shared())}, "mapKeys(ProfileEvents)"}, - {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"} + {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"}, + {"Settings.Names", {std::make_shared(std::make_shared())}, "mapKeys(Settings)" }, + {"Settings.Values", {std::make_shared(std::make_shared())}, "mapValues(Settings)"} }; } From 5b2f3726e57f620cd020346dab5ad4dccbb72655 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 8 Sep 2021 14:20:23 +0300 Subject: [PATCH 047/472] Fix build --- src/Coordination/Changelog.cpp | 2 -- src/Coordination/Changelog.h | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 5518a5399c99..9b89ab22fa28 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -317,8 +317,6 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin LOG_WARNING(log, "Don't have required amount of reserved log records. Need to read from {}, smallest available log index on disk {}.", start_to_read_from, changelog_description.from_log_index); } - started = true; - ChangelogReader reader(changelog_description.path); last_log_read_result = reader.readChangelog(logs, start_to_read_from, index_to_start_pos, log); diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 6c75fba46639..cde99fb738c4 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -54,6 +54,12 @@ struct ChangelogFileDescription uint64_t to_log_index; std::string path; + + /// How many entries should be stored in this log + uint64_t expectedEntriesCountInLog() const + { + return to_log_index - from_log_index + 1; + } }; class ChangelogWriter; From 7299c1687ed72df8ce85d2ac0f5d03eb8ee6f263 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 8 Sep 2021 14:57:42 +0300 Subject: [PATCH 048/472] Backport #28686 to 21.9: Fix coredump in creating distributed table --- src/Storages/StorageDistributed.cpp | 7 ++++++- ...02017_create_distributed_table_coredump.reference | 0 .../02017_create_distributed_table_coredump.sql | 12 ++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02017_create_distributed_table_coredump.reference create mode 100644 tests/queries/0_stateless/02017_create_distributed_table_coredump.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index df7d568deb95..1ad80f8aea66 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1332,7 +1332,12 @@ void registerStorageDistributed(StorageFactory & factory) String remote_table = engine_args[2]->as().value.safeGet(); const auto & sharding_key = engine_args.size() >= 4 ? engine_args[3] : nullptr; - const auto & storage_policy = engine_args.size() >= 5 ? engine_args[4]->as().value.safeGet() : "default"; + String storage_policy = "default"; + if (engine_args.size() >= 5) + { + engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], local_context); + storage_policy = engine_args[4]->as().value.safeGet(); + } /// Check that sharding_key exists in the table and has numeric type. if (sharding_key) diff --git a/tests/queries/0_stateless/02017_create_distributed_table_coredump.reference b/tests/queries/0_stateless/02017_create_distributed_table_coredump.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02017_create_distributed_table_coredump.sql b/tests/queries/0_stateless/02017_create_distributed_table_coredump.sql new file mode 100644 index 000000000000..27c98c3e2372 --- /dev/null +++ b/tests/queries/0_stateless/02017_create_distributed_table_coredump.sql @@ -0,0 +1,12 @@ +drop table if exists t; +drop table if exists td1; +drop table if exists td2; +drop table if exists td3; +create table t (val UInt32) engine = MergeTree order by val; +create table td1 engine = Distributed(test_shard_localhost, currentDatabase(), 't') as t; +create table td2 engine = Distributed(test_shard_localhost, currentDatabase(), 't', xxHash32(val), default) as t; +create table td3 engine = Distributed(test_shard_localhost, currentDatabase(), 't', xxHash32(val), 'default') as t; +drop table if exists t; +drop table if exists td1; +drop table if exists td2; +drop table if exists td3; From b75a2b27498c9388807ca28c20f99d51b1c2f76f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 8 Sep 2021 16:57:52 +0300 Subject: [PATCH 049/472] Backport #28658 to 21.9: Fix optimization of disjunctions chain in distributed queries --- .../LogicalExpressionsOptimizer.cpp | 24 +++++++------------ .../02023_transform_or_to_in.reference | 2 ++ .../0_stateless/02023_transform_or_to_in.sql | 15 ++++++++++++ 3 files changed, 26 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/02023_transform_or_to_in.reference create mode 100644 tests/queries/0_stateless/02023_transform_or_to_in.sql diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index c0d5a16fa65a..936ed0149d2f 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -225,22 +225,19 @@ void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain /// 1. Create a new IN expression based on information from the OR-chain. - /// Construct a list of literals `x1, ..., xN` from the string `expr = x1 OR ... OR expr = xN` - ASTPtr value_list = std::make_shared(); + /// Construct a tuple of literals `x1, ..., xN` from the string `expr = x1 OR ... OR expr = xN` + + Tuple tuple; + tuple.reserve(equality_functions.size()); + for (const auto * function : equality_functions) { const auto & operands = getFunctionOperands(function); - value_list->children.push_back(operands[1]); + tuple.push_back(operands[1]->as()->value); } /// Sort the literals so that they are specified in the same order in the IN expression. - /// Otherwise, they would be specified in the order of the ASTLiteral addresses, which is nondeterministic. - std::sort(value_list->children.begin(), value_list->children.end(), [](const DB::ASTPtr & lhs, const DB::ASTPtr & rhs) - { - const auto * val_lhs = lhs->as(); - const auto * val_rhs = rhs->as(); - return val_lhs->value < val_rhs->value; - }); + std::sort(tuple.begin(), tuple.end()); /// Get the expression `expr` from the chain `expr = x1 OR ... OR expr = xN` ASTPtr equals_expr_lhs; @@ -250,14 +247,11 @@ void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain equals_expr_lhs = operands[0]; } - auto tuple_function = std::make_shared(); - tuple_function->name = "tuple"; - tuple_function->arguments = value_list; - tuple_function->children.push_back(tuple_function->arguments); + auto tuple_literal = std::make_shared(std::move(tuple)); ASTPtr expression_list = std::make_shared(); expression_list->children.push_back(equals_expr_lhs); - expression_list->children.push_back(tuple_function); + expression_list->children.push_back(tuple_literal); /// Construct the expression `expr IN (x1, ..., xN)` auto in_function = std::make_shared(); diff --git a/tests/queries/0_stateless/02023_transform_or_to_in.reference b/tests/queries/0_stateless/02023_transform_or_to_in.reference new file mode 100644 index 000000000000..aa47d0d46d47 --- /dev/null +++ b/tests/queries/0_stateless/02023_transform_or_to_in.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02023_transform_or_to_in.sql b/tests/queries/0_stateless/02023_transform_or_to_in.sql new file mode 100644 index 000000000000..c4ceeb76931e --- /dev/null +++ b/tests/queries/0_stateless/02023_transform_or_to_in.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS t_transform_or; + +CREATE TABLE t_transform_or(B AggregateFunction(uniq, String), A String) Engine=MergeTree ORDER BY (A); + +INSERT INTO t_transform_or SELECT uniqState(''), '0'; + +SELECT uniqMergeIf(B, (A = '1') OR (A = '2') OR (A = '3')) +FROM cluster(test_cluster_two_shards, currentDatabase(), t_transform_or) +SETTINGS legacy_column_name_of_tuple_literal = 0; + +SELECT uniqMergeIf(B, (A = '1') OR (A = '2') OR (A = '3')) +FROM cluster(test_cluster_two_shards, currentDatabase(), t_transform_or) +SETTINGS legacy_column_name_of_tuple_literal = 1; + +DROP TABLE t_transform_or; From b03f4fb373525591fd3446ef925b688c940cfb60 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 9 Sep 2021 11:09:27 +0300 Subject: [PATCH 050/472] Auto version update to [21.9.2.17] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 470c49efcec0..27c4c9426dd2 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) SET(VERSION_PATCH 2) -SET(VERSION_GITHASH f6fa3218282532b94a73508de05fd00d0cd65e86) -SET(VERSION_DESCRIBE v21.9.2.1-prestable) -SET(VERSION_STRING 21.9.2.1) +SET(VERSION_GITHASH 871ee96fd4a30eb1c544d9855e01aebd01053df5) +SET(VERSION_DESCRIBE v21.9.2.17-stable) +SET(VERSION_STRING 21.9.2.17) # end of autochange From 0dfdd122bc55d416988a4735785897257aa2f535 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 9 Sep 2021 11:13:23 +0300 Subject: [PATCH 051/472] Auto version update to [21.9.3.1] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 27c4c9426dd2..58c08138027f 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) -SET(VERSION_PATCH 2) +SET(VERSION_PATCH 3) SET(VERSION_GITHASH 871ee96fd4a30eb1c544d9855e01aebd01053df5) -SET(VERSION_DESCRIBE v21.9.2.17-stable) -SET(VERSION_STRING 21.9.2.17) +SET(VERSION_DESCRIBE v21.9.3.1-stable) +SET(VERSION_STRING 21.9.3.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index f7a7ba9d9d0b..c1ece6dd348a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.9.2.1) unstable; urgency=low +clickhouse (21.9.3.1) unstable; urgency=low * Modified source code - -- clickhouse-release Wed, 01 Sep 2021 23:31:07 +0300 + -- clickhouse-release Thu, 09 Sep 2021 11:13:20 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 6b3ef8a23a4b..080f2c6da225 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.2.* +ARG version=21.9.3.* RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index eaf9db032094..36ef7dad33d0 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.2.* +ARG version=21.9.3.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 4b8385ed639b..4a8c4739fb2c 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.2.* +ARG version=21.9.3.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 0c3bd6a293944b6897e76c7b2eb54b3eea1a9a7d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 9 Sep 2021 15:02:18 +0300 Subject: [PATCH 052/472] Backport #28734 to 21.9: Fix race on mutation_pointer update --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index c71a79d20097..4c5838cde9bb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -144,7 +144,10 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper) updated = true; } - zookeeper->tryGet(fs::path(replica_path) / "mutation_pointer", mutation_pointer); + { /// Mutation pointer is a part of "state" and must be updated with state mutex + std::lock_guard lock(state_mutex); + zookeeper->tryGet(fs::path(replica_path) / "mutation_pointer", mutation_pointer); + } } updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, {}); From 6f765c5fe4df6842e77be9a59680390a4baec080 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 9 Sep 2021 19:05:59 +0300 Subject: [PATCH 053/472] Backport #28700 to 21.9: Fix crash on exception with projection aggregate --- src/Interpreters/Aggregator.cpp | 20 +++++----- src/Interpreters/Aggregator.h | 27 +++++++------- .../Transforms/AggregatingTransform.cpp | 15 +++++--- .../Transforms/AggregatingTransform.h | 37 ++++++++++++++++--- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 8 +++- ...ference => 01707_join_use_nulls.reference} | 0 ...use_nulls.sql => 01707_join_use_nulls.sql} | 0 ...0_projection_with_mixed_pipeline.reference | 0 .../01710_projection_with_mixed_pipeline.sql | 9 +++++ 9 files changed, 81 insertions(+), 35 deletions(-) rename tests/queries/0_stateless/{01710_join_use_nulls.reference => 01707_join_use_nulls.reference} (100%) rename tests/queries/0_stateless/{01710_join_use_nulls.sql => 01707_join_use_nulls.sql} (100%) create mode 100644 tests/queries/0_stateless/01710_projection_with_mixed_pipeline.reference create mode 100644 tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index c26eb10e697e..a2896127d04e 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -786,7 +786,7 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t rows, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena) + Arena * arena) const { #if USE_EMBEDDED_COMPILER if constexpr (use_compiled_functions) @@ -865,7 +865,7 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl( void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns & aggregate_columns, Columns & materialized_columns, - AggregateFunctionInstructions & aggregate_functions_instructions, NestedColumnsHolder & nested_columns_holder) + AggregateFunctionInstructions & aggregate_functions_instructions, NestedColumnsHolder & nested_columns_holder) const { for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_columns[i].resize(params.aggregates[i].arguments.size()); @@ -917,7 +917,7 @@ void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & result, - ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys) + ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys) const { UInt64 num_rows = block.rows(); return executeOnBlock(block.getColumns(), num_rows, result, key_columns, aggregate_columns, no_more_keys); @@ -925,7 +925,7 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result, - ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys) + ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys) const { /// `result` will destroy the states of aggregate functions in the destructor result.aggregator = this; @@ -1058,7 +1058,7 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData } -void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path) +void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path) const { Stopwatch watch; size_t rows = data_variants.size(); @@ -1130,7 +1130,7 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, co } -void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants) +void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants) const { String tmp_path = params.tmp_volume->getDisk()->getPath(); return writeToTemporaryFile(data_variants, tmp_path); @@ -1192,7 +1192,7 @@ template void Aggregator::writeToTemporaryFileImpl( AggregatedDataVariants & data_variants, Method & method, - IBlockOutputStream & out) + IBlockOutputStream & out) const { size_t max_temporary_block_size_rows = 0; size_t max_temporary_block_size_bytes = 0; @@ -2311,7 +2311,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( block.clear(); } -bool Aggregator::mergeBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) +bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const { /// `result` will destroy the states of aggregate functions in the destructor result.aggregator = this; @@ -2661,7 +2661,7 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( } -std::vector Aggregator::convertBlockToTwoLevel(const Block & block) +std::vector Aggregator::convertBlockToTwoLevel(const Block & block) const { if (!block) return {}; @@ -2753,7 +2753,7 @@ void Aggregator::destroyWithoutKey(AggregatedDataVariants & result) const } -void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) +void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) const { if (result.empty()) return; diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index fde6ba219dff..e72fe4baea31 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -506,7 +506,7 @@ struct AggregatedDataVariants : private boost::noncopyable * But this can hardly be done simply because it is planned to put variable-length strings into the same pool. * In this case, the pool will not be able to know with what offsets objects are stored. */ - Aggregator * aggregator = nullptr; + const Aggregator * aggregator = nullptr; size_t keys_size{}; /// Number of keys. NOTE do we need this field? Sizes key_sizes; /// Dimensions of keys, if keys of fixed length @@ -975,11 +975,14 @@ class Aggregator final /// Process one block. Return false if the processing should be aborted (with group_by_overflow_mode = 'break'). bool executeOnBlock(const Block & block, AggregatedDataVariants & result, ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block - bool & no_more_keys); + bool & no_more_keys) const; bool executeOnBlock(Columns columns, UInt64 num_rows, AggregatedDataVariants & result, ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block - bool & no_more_keys); + bool & no_more_keys) const; + + /// Used for aggregate projection. + bool mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const; /** Convert the aggregation data structure into a block. * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block. @@ -996,8 +999,6 @@ class Aggregator final /// Merge partially aggregated blocks separated to buckets into one data structure. void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads); - bool mergeBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys); - /// Merge several partially aggregated blocks into one. /// Precondition: for all blocks block.info.is_overflows flag must be the same. /// (either all blocks are from overflow data or none blocks are). @@ -1007,11 +1008,11 @@ class Aggregator final /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used. * This is needed to simplify merging of that data with other results, that are already two-level. */ - std::vector convertBlockToTwoLevel(const Block & block); + std::vector convertBlockToTwoLevel(const Block & block) const; /// For external aggregation. - void writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path); - void writeToTemporaryFile(AggregatedDataVariants & data_variants); + void writeToTemporaryFile(AggregatedDataVariants & data_variants, const String & tmp_path) const; + void writeToTemporaryFile(AggregatedDataVariants & data_variants) const; bool hasTemporaryFiles() const { return !temporary_files.empty(); } @@ -1083,7 +1084,7 @@ class Aggregator final Poco::Logger * log = &Poco::Logger::get("Aggregator"); /// For external aggregation. - TemporaryFiles temporary_files; + mutable TemporaryFiles temporary_files; #if USE_EMBEDDED_COMPILER std::shared_ptr compiled_aggregate_functions_holder; @@ -1106,7 +1107,7 @@ class Aggregator final /** Call `destroy` methods for states of aggregate functions. * Used in the exception handler for aggregation, since RAII in this case is not applicable. */ - void destroyAllAggregateStates(AggregatedDataVariants & result); + void destroyAllAggregateStates(AggregatedDataVariants & result) const; /// Process one data block, aggregate the data into a hash table. @@ -1136,7 +1137,7 @@ class Aggregator final AggregatedDataWithoutKey & res, size_t rows, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena); + Arena * arena) const; static void executeOnIntervalWithoutKeyImpl( AggregatedDataWithoutKey & res, @@ -1149,7 +1150,7 @@ class Aggregator final void writeToTemporaryFileImpl( AggregatedDataVariants & data_variants, Method & method, - IBlockOutputStream & out); + IBlockOutputStream & out) const; /// Merge NULL key data from hash table `src` into `dst`. template @@ -1304,7 +1305,7 @@ class Aggregator final AggregateColumns & aggregate_columns, Columns & materialized_columns, AggregateFunctionInstructions & instructions, - NestedColumnsHolder & nested_columns_holder); + NestedColumnsHolder & nested_columns_holder) const; void addSingleKeyToAggregateColumns( const AggregatedDataVariants & data_variants, diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 7802bf6e3bf3..a8a93e536630 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -395,9 +395,14 @@ AggregatingTransform::AggregatingTransform(Block header, AggregatingTransformPar } AggregatingTransform::AggregatingTransform( - Block header, AggregatingTransformParamsPtr params_, ManyAggregatedDataPtr many_data_, - size_t current_variant, size_t max_threads_, size_t temporary_data_merge_threads_) - : IProcessor({std::move(header)}, {params_->getHeader()}), params(std::move(params_)) + Block header, + AggregatingTransformParamsPtr params_, + ManyAggregatedDataPtr many_data_, + size_t current_variant, + size_t max_threads_, + size_t temporary_data_merge_threads_) + : IProcessor({std::move(header)}, {params_->getHeader()}) + , params(std::move(params_)) , key_columns(params->params.keys_size) , aggregate_columns(params->params.aggregates_size) , many_data(std::move(many_data_)) @@ -525,7 +530,7 @@ void AggregatingTransform::consume(Chunk chunk) { auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns()); block = materializeBlock(block); - if (!params->aggregator.mergeBlock(block, variants, no_more_keys)) + if (!params->aggregator.mergeOnBlock(block, variants, no_more_keys)) is_consume_finished = true; } else @@ -547,7 +552,7 @@ void AggregatingTransform::initGenerate() if (variants.empty() && params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set) { if (params->only_merge) - params->aggregator.mergeBlock(getInputs().front().getHeader(), variants, no_more_keys); + params->aggregator.mergeOnBlock(getInputs().front().getHeader(), variants, no_more_keys); else params->aggregator.executeOnBlock(getInputs().front().getHeader(), variants, key_columns, aggregate_columns, no_more_keys); } diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 9512a7a2811f..1639bc4df4b1 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -27,15 +27,38 @@ class AggregatedChunkInfo : public ChunkInfo class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; +using AggregatorList = std::list; +using AggregatorListPtr = std::shared_ptr; + struct AggregatingTransformParams { Aggregator::Params params; - Aggregator aggregator; + + /// Each params holds a list of aggregators which are used in query. It's needed because we need + /// to use a pointer of aggregator to proper destroy complex aggregation states on exception + /// (See comments in AggregatedDataVariants). However, this pointer might not be valid because + /// we can have two different aggregators at the same time due to mixed pipeline of aggregate + /// projections, and one of them might gets destroyed before used. + AggregatorListPtr aggregator_list_ptr; + Aggregator & aggregator; bool final; bool only_merge = false; AggregatingTransformParams(const Aggregator::Params & params_, bool final_) - : params(params_), aggregator(params), final(final_) {} + : params(params_) + , aggregator_list_ptr(std::make_shared()) + , aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), params)) + , final(final_) + { + } + + AggregatingTransformParams(const Aggregator::Params & params_, const AggregatorListPtr & aggregator_list_ptr_, bool final_) + : params(params_) + , aggregator_list_ptr(aggregator_list_ptr_) + , aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), params)) + , final(final_) + { + } Block getHeader() const { return aggregator.getHeader(final); } @@ -82,9 +105,13 @@ class AggregatingTransform : public IProcessor AggregatingTransform(Block header, AggregatingTransformParamsPtr params_); /// For Parallel aggregating. - AggregatingTransform(Block header, AggregatingTransformParamsPtr params_, - ManyAggregatedDataPtr many_data, size_t current_variant, - size_t max_threads, size_t temporary_data_merge_threads); + AggregatingTransform( + Block header, + AggregatingTransformParamsPtr params_, + ManyAggregatedDataPtr many_data, + size_t current_variant, + size_t max_threads, + size_t temporary_data_merge_threads); ~AggregatingTransform() override; String getName() const override { return "AggregatingTransform"; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 004eaa6254cd..d57ccf645af9 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -260,6 +260,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( auto many_data = std::make_shared(projection_pipe.numOutputPorts() + ordinary_pipe.numOutputPorts()); size_t counter = 0; + AggregatorListPtr aggregator_list_ptr = std::make_shared(); + // TODO apply in_order_optimization here auto build_aggregate_pipe = [&](Pipe & pipe, bool projection) { @@ -299,7 +301,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( settings.min_count_to_compile_aggregate_expression, header_before_aggregation); // The source header is also an intermediate header - transform_params = std::make_shared(std::move(params), query_info.projection->aggregate_final); + transform_params = std::make_shared( + std::move(params), aggregator_list_ptr, query_info.projection->aggregate_final); /// This part is hacky. /// We want AggregatingTransform to work with aggregate states instead of normal columns. @@ -329,7 +332,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( settings.compile_aggregate_expressions, settings.min_count_to_compile_aggregate_expression); - transform_params = std::make_shared(std::move(params), query_info.projection->aggregate_final); + transform_params = std::make_shared( + std::move(params), aggregator_list_ptr, query_info.projection->aggregate_final); } pipe.resize(pipe.numOutputPorts(), true, true); diff --git a/tests/queries/0_stateless/01710_join_use_nulls.reference b/tests/queries/0_stateless/01707_join_use_nulls.reference similarity index 100% rename from tests/queries/0_stateless/01710_join_use_nulls.reference rename to tests/queries/0_stateless/01707_join_use_nulls.reference diff --git a/tests/queries/0_stateless/01710_join_use_nulls.sql b/tests/queries/0_stateless/01707_join_use_nulls.sql similarity index 100% rename from tests/queries/0_stateless/01710_join_use_nulls.sql rename to tests/queries/0_stateless/01707_join_use_nulls.sql diff --git a/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.reference b/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql b/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql new file mode 100644 index 000000000000..734aa659146f --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql @@ -0,0 +1,9 @@ +drop table if exists t; + +create table t (x UInt32) engine = MergeTree order by tuple() settings index_granularity = 8; +insert into t select number from numbers(100); +alter table t add projection p (select uniqHLL12(x)); +insert into t select number + 100 from numbers(100); +select uniqHLL12(x) from t settings allow_experimental_projection_optimization = 1, max_bytes_to_read=400, max_block_size=8; -- { serverError 307; } + +drop table if exists t; From 407bcbc59c761de949a5bc12d0cfd88777e68927 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 10 Sep 2021 01:03:02 +0300 Subject: [PATCH 054/472] Backport #28816 to 21.9: Lower compiled_expression_cache_size to 128MB --- programs/server/Server.cpp | 2 +- programs/server/config.xml | 2 +- programs/server/config.yaml.example | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index f97c8d314e99..3e1804d6fe6b 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -946,7 +946,7 @@ if (ThreadFuzzer::instance().isEffective()) global_context->setMMappedFileCache(mmap_cache_size); #if USE_EMBEDDED_COMPILER - constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 1024; + constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size); #endif diff --git a/programs/server/config.xml b/programs/server/config.xml index 510a5e230f8f..f0a89a34d24b 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -330,7 +330,7 @@ 1000 - 1073741824 + 134217728 /var/lib/clickhouse/ diff --git a/programs/server/config.yaml.example b/programs/server/config.yaml.example index 5b2da1d3128e..ae4eac49a641 100644 --- a/programs/server/config.yaml.example +++ b/programs/server/config.yaml.example @@ -280,7 +280,7 @@ mark_cache_size: 5368709120 mmap_cache_size: 1000 # Cache size for compiled expressions. -compiled_expression_cache_size: 1073741824 +compiled_expression_cache_size: 134217728 # Path to data directory, with trailing slash. path: /var/lib/clickhouse/ From ab62c98264da31ed8ee5143877c576e705b6c276 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 10 Sep 2021 05:03:44 +0300 Subject: [PATCH 055/472] Backport #28821 to 21.9: Fix expressions compilation with short circuit evaluation --- src/Interpreters/ExpressionJIT.cpp | 39 ++++++++++--------- ...ns_with_short_circuit_evaluation.reference | 3 ++ ...ressions_with_short_circuit_evaluation.sql | 2 + 3 files changed, 25 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02024_compile_expressions_with_short_circuit_evaluation.reference create mode 100644 tests/queries/0_stateless/02024_compile_expressions_with_short_circuit_evaluation.sql diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index 27089ab8d373..9005b24b044e 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -353,7 +353,8 @@ static bool isCompilableFunction(const ActionsDAG::Node & node, const std::unord static CompileDAG getCompilableDAG( const ActionsDAG::Node * root, - ActionsDAG::NodeRawConstPtrs & children) + ActionsDAG::NodeRawConstPtrs & children, + const std::unordered_set & lazy_executed_nodes) { /// Extract CompileDAG from root actions dag node. @@ -376,29 +377,29 @@ static CompileDAG getCompilableDAG( const auto * node = frame.node; bool is_compilable_constant = isCompilableConstant(*node); - bool is_compilable_function = isCompilableFunction(*node, {}); + bool is_compilable_function = isCompilableFunction(*node, lazy_executed_nodes); if (!is_compilable_function || is_compilable_constant) { - CompileDAG::Node compile_node; - compile_node.function = node->function_base; - compile_node.result_type = node->result_type; - - if (is_compilable_constant) - { - compile_node.type = CompileDAG::CompileType::CONSTANT; - compile_node.column = node->column; - } - else - { + CompileDAG::Node compile_node; + compile_node.function = node->function_base; + compile_node.result_type = node->result_type; + + if (is_compilable_constant) + { + compile_node.type = CompileDAG::CompileType::CONSTANT; + compile_node.column = node->column; + } + else + { compile_node.type = CompileDAG::CompileType::INPUT; children.emplace_back(node); - } + } - visited_node_to_compile_dag_position[node] = dag.getNodesCount(); - dag.addNode(std::move(compile_node)); - stack.pop(); - continue; + visited_node_to_compile_dag_position[node] = dag.getNodesCount(); + dag.addNode(std::move(compile_node)); + stack.pop(); + continue; } while (frame.next_child_to_visit < node->children.size()) @@ -568,7 +569,7 @@ void ActionsDAG::compileFunctions(size_t min_count_to_compile_expression, const for (auto & node : nodes_to_compile) { NodeRawConstPtrs new_children; - auto dag = getCompilableDAG(node, new_children); + auto dag = getCompilableDAG(node, new_children, lazy_executed_nodes); if (dag.getInputNodesCount() == 0) continue; diff --git a/tests/queries/0_stateless/02024_compile_expressions_with_short_circuit_evaluation.reference b/tests/queries/0_stateless/02024_compile_expressions_with_short_circuit_evaluation.reference new file mode 100644 index 000000000000..af23232bb2ea --- /dev/null +++ b/tests/queries/0_stateless/02024_compile_expressions_with_short_circuit_evaluation.reference @@ -0,0 +1,3 @@ +-- { echo } +select 1+number+multiIf(number == 1, cityHash64(number), number) from numbers(1) settings compile_expressions=1, min_count_to_compile_expression=0; +1 diff --git a/tests/queries/0_stateless/02024_compile_expressions_with_short_circuit_evaluation.sql b/tests/queries/0_stateless/02024_compile_expressions_with_short_circuit_evaluation.sql new file mode 100644 index 000000000000..113d0d9d4f7f --- /dev/null +++ b/tests/queries/0_stateless/02024_compile_expressions_with_short_circuit_evaluation.sql @@ -0,0 +1,2 @@ +-- { echo } +select 1+number+multiIf(number == 1, cityHash64(number), number) from numbers(1) settings compile_expressions=1, min_count_to_compile_expression=0; From ff5649c99e6a34b8c48463e4968b4840b8f72f14 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 10 Sep 2021 19:15:44 +0300 Subject: [PATCH 056/472] Backport #27827 to 21.9: Add more checks for LC in native protocol. --- src/Columns/ColumnLowCardinality.cpp | 77 +++-- src/Columns/ColumnLowCardinality.h | 1 - src/DataStreams/NativeBlockInputStream.cpp | 1 + src/DataTypes/Serializations/ISerialization.h | 3 + .../SerializationLowCardinality.cpp | 29 +- .../queries/0_stateless/2010_lc_native.python | 302 ++++++++++++++++++ .../0_stateless/2010_lc_native.reference | 8 + tests/queries/0_stateless/2010_lc_native.sh | 13 + 8 files changed, 390 insertions(+), 44 deletions(-) create mode 100755 tests/queries/0_stateless/2010_lc_native.python create mode 100644 tests/queries/0_stateless/2010_lc_native.reference create mode 100755 tests/queries/0_stateless/2010_lc_native.sh diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 5b16c7757e8d..a77bb7a4754e 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int LOGICAL_ERROR; + extern const int INCORRECT_DATA; } namespace @@ -131,14 +132,14 @@ namespace ColumnLowCardinality::ColumnLowCardinality(MutableColumnPtr && column_unique_, MutableColumnPtr && indexes_, bool is_shared) : dictionary(std::move(column_unique_), is_shared), idx(std::move(indexes_)) { - idx.check(getDictionary().size()); + // idx.check(getDictionary().size()); } void ColumnLowCardinality::insert(const Field & x) { compactIfSharedDictionary(); idx.insertPosition(dictionary.getColumnUnique().uniqueInsert(x)); - idx.check(getDictionary().size()); + // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertDefault() @@ -167,14 +168,14 @@ void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(nested, position)); } - idx.check(getDictionary().size()); + // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) { compactIfSharedDictionary(); idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(src, n)); - idx.check(getDictionary().size()); + // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -204,7 +205,7 @@ void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, si auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size()); idx.insertPositionsRange(*inserted_indexes->index(*sub_idx, 0), 0, length); } - idx.check(getDictionary().size()); + // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length) @@ -212,23 +213,55 @@ void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t compactIfSharedDictionary(); auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(src, start, length); idx.insertPositionsRange(*inserted_indexes, 0, length); - idx.check(getDictionary().size()); + // idx.check(getDictionary().size()); +} + +static void checkPositionsAreLimited(const IColumn & positions, UInt64 limit) +{ + auto check_for_type = [&](auto type) + { + using ColumnType = decltype(type); + const auto * column_ptr = typeid_cast *>(&positions); + + if (!column_ptr) + return false; + + const auto & data = column_ptr->getData(); + size_t num_rows = data.size(); + UInt64 max_position = 0; + for (size_t i = 0; i < num_rows; ++i) + max_position = std::max(max_position, data[i]); + + if (max_position >= limit) + throw Exception(ErrorCodes::INCORRECT_DATA, + "Index for LowCardinality is out of range. Dictionary size is {}, " + "but found index with value {}", limit, max_position); + + return true; + }; + + if (!check_for_type(UInt8()) && + !check_for_type(UInt16()) && + !check_for_type(UInt32()) && + !check_for_type(UInt64())) + throw Exception("Invalid column for ColumnLowCardinality index. Expected UInt, got " + positions.getName(), + ErrorCodes::ILLEGAL_COLUMN); } void ColumnLowCardinality::insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions) { - Index(positions.getPtr()).check(keys.size()); + checkPositionsAreLimited(positions, keys.size()); compactIfSharedDictionary(); auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(keys, 0, keys.size()); idx.insertPositionsRange(*inserted_indexes->index(positions, 0), 0, positions.size()); - idx.check(getDictionary().size()); + // idx.check(getDictionary().size()); } void ColumnLowCardinality::insertData(const char * pos, size_t length) { compactIfSharedDictionary(); idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length)); - idx.check(getDictionary().size()); + // idx.check(getDictionary().size()); } StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const @@ -243,7 +276,7 @@ const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * po const char * new_pos; idx.insertPosition(dictionary.getColumnUnique().uniqueDeserializeAndInsertFromArena(pos, new_pos)); - idx.check(getDictionary().size()); + // idx.check(getDictionary().size()); return new_pos; } @@ -750,30 +783,6 @@ void ColumnLowCardinality::Index::insertPositionsRange(const IColumn & column, U checkSizeOfType(); } -void ColumnLowCardinality::Index::check(size_t /*max_dictionary_size*/) -{ - /// TODO: remove - /* - auto check = [&](auto cur_type) - { - using CurIndexType = decltype(cur_type); - auto & positions_data = getPositionsData(); - - for (size_t i = 0; i < positions_data.size(); ++i) - { - if (positions_data[i] >= max_dictionary_size) - { - throw Exception("Found index " + toString(positions_data[i]) + " at position " + toString(i) - + " which is grated or equal than dictionary size " + toString(max_dictionary_size), - ErrorCodes::LOGICAL_ERROR); - } - } - }; - - callForType(std::move(check), size_of_type); - */ -} - void ColumnLowCardinality::Index::checkSizeOfType() { if (size_of_type != getSizeOfIndexType(*positions, size_of_type)) diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 1f84c2431e33..d578106e048d 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -266,7 +266,6 @@ class ColumnLowCardinality final : public COWHelper ReadBuffer * { return &istr; }; settings.avg_value_size_hint = avg_value_size_hint; settings.position_independent_encoding = false; + settings.native_format = true; ISerialization::DeserializeBinaryBulkStatePtr state; auto serialization = type.getDefaultSerialization(); diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 03785fc07f4e..f1d82a2000af 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -136,6 +136,9 @@ class ISerialization bool continuous_reading = true; bool position_independent_encoding = true; + + bool native_format = false; + /// If not zero, may be used to avoid reallocations while reading column of String type. double avg_value_size_hint = 0; }; diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index 69a41d831380..e9bb62f74c53 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -18,6 +18,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int INCORRECT_DATA; } namespace @@ -64,7 +65,7 @@ struct KeysSerializationVersion static void checkVersion(UInt64 version) { if (version != SharedDictionariesWithAdditionalKeys) - throw Exception("Invalid version for SerializationLowCardinality key column.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Invalid version for SerializationLowCardinality key column.", ErrorCodes::INCORRECT_DATA); } explicit KeysSerializationVersion(UInt64 version) : value(static_cast(version)) { checkVersion(version); } @@ -105,7 +106,7 @@ struct IndexesSerializationType if (value <= TUInt64) return; - throw Exception("Invalid type for SerializationLowCardinality index column.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Invalid type for SerializationLowCardinality index column.", ErrorCodes::INCORRECT_DATA); } void serialize(WriteBuffer & buffer) const @@ -120,15 +121,24 @@ struct IndexesSerializationType writeIntBinary(val, buffer); } - void deserialize(ReadBuffer & buffer) + void deserialize(ReadBuffer & buffer, const ISerialization::DeserializeBinaryBulkSettings & settings) { SerializationType val; readIntBinary(val, buffer); + checkType(val); has_additional_keys = (val & HasAdditionalKeysBit) != 0; need_global_dictionary = (val & NeedGlobalDictionaryBit) != 0; need_update_dictionary = (val & NeedUpdateDictionary) != 0; type = static_cast(resetFlags(val)); + + if (settings.native_format) + { + if (need_global_dictionary) + throw Exception(ErrorCodes::INCORRECT_DATA, + "LowCardinality indexes serialization type for Native format " + "cannot use global dictionary"); + } } IndexesSerializationType(const IColumn & column, @@ -519,8 +529,8 @@ void SerializationLowCardinality::serializeBinaryBulkWithMultipleStreams( /// Insert used_keys into global dictionary and update sub_index. auto indexes_with_overflow = global_dictionary->uniqueInsertRangeWithOverflow(*keys, 0, keys->size(), settings.low_cardinality_max_dictionary_size); - size_t max_size = settings.low_cardinality_max_dictionary_size + indexes_with_overflow.overflowed_keys->size(); - ColumnLowCardinality::Index(indexes_with_overflow.indexes->getPtr()).check(max_size); + // size_t max_size = settings.low_cardinality_max_dictionary_size + indexes_with_overflow.overflowed_keys->size(); + // ColumnLowCardinality::Index(indexes_with_overflow.indexes->getPtr()).check(max_size); if (global_dictionary->size() > settings.low_cardinality_max_dictionary_size) throw Exception("Got dictionary with size " + toString(global_dictionary->size()) + @@ -614,6 +624,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( { UInt64 num_keys; readIntBinary(num_keys, *indexes_stream); + auto keys_type = removeNullable(dictionary_type); auto additional_keys = keys_type->createColumn(); dict_inner_serialization->deserializeBinaryBulk(*additional_keys, *indexes_stream, num_keys, 0); @@ -660,10 +671,10 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( { auto maps = mapIndexWithAdditionalKeys(*indexes_column, global_dictionary->size()); - ColumnLowCardinality::Index(maps.additional_keys_map->getPtr()).check(additional_keys->size()); + // ColumnLowCardinality::Index(maps.additional_keys_map->getPtr()).check(additional_keys->size()); - ColumnLowCardinality::Index(indexes_column->getPtr()).check( - maps.dictionary_map->size() + maps.additional_keys_map->size()); + // ColumnLowCardinality::Index(indexes_column->getPtr()).check( + // maps.dictionary_map->size() + maps.additional_keys_map->size()); auto used_keys = IColumn::mutate(global_dictionary->getNestedColumn()->index(*maps.dictionary_map, 0)); @@ -702,7 +713,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( auto & index_type = low_cardinality_state->index_type; auto & global_dictionary = low_cardinality_state->global_dictionary; - index_type.deserialize(*indexes_stream); + index_type.deserialize(*indexes_stream, settings); bool need_update_dictionary = !global_dictionary || index_type.need_update_dictionary || low_cardinality_state->need_update_dictionary; diff --git a/tests/queries/0_stateless/2010_lc_native.python b/tests/queries/0_stateless/2010_lc_native.python new file mode 100755 index 000000000000..c850bf3f9060 --- /dev/null +++ b/tests/queries/0_stateless/2010_lc_native.python @@ -0,0 +1,302 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import socket +import os + +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') +CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) +CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') + +def writeVarUInt(x, ba): + for _ in range(0, 9): + + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + + +def writeStringBinary(s, ba): + b = bytes(s, 'utf-8') + writeVarUInt(len(s), ba) + ba.extend(b) + + +def readStrict(s, size = 1): + res = bytearray() + while size: + cur = s.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + + +def readUInt(s, size=1): + res = readStrict(s, size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + +def readUInt8(s): + return readUInt(s) + +def readUInt16(s): + return readUInt(s, 2) + +def readUInt32(s): + return readUInt(s, 4) + +def readUInt64(s): + return readUInt(s, 8) + +def readVarUInt(s): + x = 0 + for i in range(9): + byte = readStrict(s)[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + + +def readStringBinary(s): + size = readVarUInt(s) + s = readStrict(s, size) + return s.decode('utf-8') + + +def sendHello(s): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary('simple native protocol', ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary('default', ba) # database + writeStringBinary('default', ba) # user + writeStringBinary('', ba) # pwd + s.sendall(ba) + + +def receiveHello(s): + p_type = readVarUInt(s) + assert (p_type == 0) # Hello + server_name = readStringBinary(s) + # print("Server name: ", server_name) + server_version_major = readVarUInt(s) + # print("Major: ", server_version_major) + server_version_minor = readVarUInt(s) + # print("Minor: ", server_version_minor) + server_revision = readVarUInt(s) + # print("Revision: ", server_revision) + server_timezone = readStringBinary(s) + # print("Timezone: ", server_timezone) + server_display_name = readStringBinary(s) + # print("Display name: ", server_display_name) + server_version_patch = readVarUInt(s) + # print("Version patch: ", server_version_patch) + + +def serializeClientInfo(ba): + writeStringBinary('default', ba) # initial_user + writeStringBinary('123456', ba) # initial_query_id + writeStringBinary('127.0.0.1:9000', ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary('os_user', ba) # os_user + writeStringBinary('client_hostname', ba) # client_hostname + writeStringBinary('client_name', ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary('', ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + + +def sendQuery(s, query): + ba = bytearray() + writeVarUInt(1, ba) # query + writeStringBinary('123456', ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba) + + writeStringBinary('', ba) # No settings + writeStringBinary('', ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query + ' settings input_format_defaults_for_omitted_fields=0', ba) # query, finally + s.sendall(ba) + + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + + +def sendEmptyBlock(s): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary('', ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + s.sendall(ba) + + +def readHeader(s): + readVarUInt(s) # Data + readStringBinary(s) # external table name + # BlockInfo + readVarUInt(s) # 1 + readUInt8(s) # is_overflows + readVarUInt(s) # 2 + readUInt32(s) # bucket_num + readVarUInt(s) # 0 + columns = readVarUInt(s) # rows + rows = readVarUInt(s) # columns + print("Rows {} Columns {}".format(rows, columns)) + for _ in range(columns): + col_name = readStringBinary(s) + type_name = readStringBinary(s) + print("Column {} type {}".format(col_name, type_name)) + + +def readException(s): + assert(readVarUInt(s) == 2) + code = readUInt32(s) + name = readStringBinary(s) + text = readStringBinary(s) + readStringBinary(s) # trace + assert(readUInt8(s) == 0) # has_nested + print("code {}: {}".format(code, text.replace('DB::Exception:', ''))) + + +def insertValidLowCardinalityRow(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(10) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, 'insert into {}.tab format TSV'.format(CLICKHOUSE_DATABASE)) + + # external tables + sendEmptyBlock(s) + readHeader(s) + + # Data + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary('', ba) + serializeBlockInfo(ba) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary('x', ba) + writeStringBinary('LowCardinality(String)', ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary('hello', ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (0 for 'hello') + s.sendall(ba) + + # Fin block + sendEmptyBlock(s) + + assert(readVarUInt(s) == 5) # End of stream + s.close() + + +def insertLowCardinalityRowWithIndexOverflow(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(10) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, 'insert into {}.tab format TSV'.format(CLICKHOUSE_DATABASE)) + + # external tables + sendEmptyBlock(s) + readHeader(s) + + # Data + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary('', ba) + serializeBlockInfo(ba) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary('x', ba) + writeStringBinary('LowCardinality(String)', ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary('hello', ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 7 + [1]) # UInt64 index (overflow) + s.sendall(ba) + + readException(s) + s.close() + + +def insertLowCardinalityRowWithIncorrectDictType(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(10) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, 'insert into {}.tab format TSV'.format(CLICKHOUSE_DATABASE)) + + # external tables + sendEmptyBlock(s) + readHeader(s) + + # Data + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary('', ba) + serializeBlockInfo(ba) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary('x', ba) + writeStringBinary('LowCardinality(String)', ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend([3, 3] + [0] * 6) # indexes type: UInt64 [3], with global dict and add keys [1 + 2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary('hello', ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (overflow) + s.sendall(ba) + + readException(s) + s.close() + + +def main(): + insertValidLowCardinalityRow() + insertLowCardinalityRowWithIndexOverflow() + insertLowCardinalityRowWithIncorrectDictType() + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/2010_lc_native.reference b/tests/queries/0_stateless/2010_lc_native.reference new file mode 100644 index 000000000000..0167f05c952b --- /dev/null +++ b/tests/queries/0_stateless/2010_lc_native.reference @@ -0,0 +1,8 @@ +Rows 0 Columns 1 +Column x type LowCardinality(String) +Rows 0 Columns 1 +Column x type LowCardinality(String) +code 117: Index for LowCardinality is out of range. Dictionary size is 1, but found index with value 72057594037927936 +Rows 0 Columns 1 +Column x type LowCardinality(String) +code 117: LowCardinality indexes serialization type for Native format cannot use global dictionary diff --git a/tests/queries/0_stateless/2010_lc_native.sh b/tests/queries/0_stateless/2010_lc_native.sh new file mode 100755 index 000000000000..0890e271318c --- /dev/null +++ b/tests/queries/0_stateless/2010_lc_native.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists tab;" +$CLICKHOUSE_CLIENT -q "create table tab(x LowCardinality(String)) engine = MergeTree order by tuple();" + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/2010_lc_native.python + +$CLICKHOUSE_CLIENT -q "drop table if exists tab;" From 7f9862b1dfa76c10c2c5e96717abf5a570c5fbae Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 10 Sep 2021 21:11:16 +0300 Subject: [PATCH 057/472] Backport #28762 to 21.9: Fix usage of nested columns with non-array columns with the same prefix [2] --- src/Storages/MergeTree/IMergeTreeReader.cpp | 5 +++-- .../02017_columns_with_dot_2.reference | 2 ++ .../0_stateless/02017_columns_with_dot_2.sql | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02017_columns_with_dot_2.reference create mode 100644 tests/queries/0_stateless/02017_columns_with_dot_2.sql diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index d659259e1a9d..d0d845ed6a31 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -135,10 +135,11 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e String offsets_name = Nested::extractTableName(name); auto offset_it = offset_columns.find(offsets_name); - if (offset_it != offset_columns.end()) + const auto * array_type = typeid_cast(type.get()); + if (offset_it != offset_columns.end() && array_type) { + const auto & nested_type = array_type->getNestedType(); ColumnPtr offsets_column = offset_it->second; - DataTypePtr nested_type = typeid_cast(*type).getNestedType(); size_t nested_rows = typeid_cast(*offsets_column).getData().back(); ColumnPtr nested_column = diff --git a/tests/queries/0_stateless/02017_columns_with_dot_2.reference b/tests/queries/0_stateless/02017_columns_with_dot_2.reference new file mode 100644 index 000000000000..8d43601632c4 --- /dev/null +++ b/tests/queries/0_stateless/02017_columns_with_dot_2.reference @@ -0,0 +1,2 @@ +123 asd [1,2] +123 asd [1,2] 0 diff --git a/tests/queries/0_stateless/02017_columns_with_dot_2.sql b/tests/queries/0_stateless/02017_columns_with_dot_2.sql new file mode 100644 index 000000000000..eefe52b74f34 --- /dev/null +++ b/tests/queries/0_stateless/02017_columns_with_dot_2.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test_nested; + +CREATE TABLE test_nested +( + `id` String, + `with_dot.str` String, + `with_dot.array` Array(Int32) +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO test_nested VALUES('123', 'asd', [1,2]); +SELECT * FROM test_nested; + +ALTER TABLE test_nested ADD COLUMN `with_dot.bool` UInt8; +SELECT * FROM test_nested; + +DROP TABLE test_nested; From 177697ef0a2d7156c7a9af553cea8aea961cac6d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 11 Sep 2021 19:16:28 +0300 Subject: [PATCH 058/472] =?UTF-8?q?Backport=20#28299=20to=2021.9:=20Use=20?= =?UTF-8?q?real=20tmp=20file=20instead=20of=20predefined=20=E2=80=9Crows?= =?UTF-8?q?=5Fsources\"=20for=20vertical=20merges?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index ab774b95212d..7296e173ed10 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -791,7 +791,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor auto compression_codec = data.getCompressionCodecForPart(merge_entry->total_size_bytes_compressed, new_data_part->ttl_infos, time_of_merge); auto tmp_disk = context->getTemporaryVolume()->getDisk(); - String rows_sources_file_path; + std::unique_ptr rows_sources_file; std::unique_ptr rows_sources_uncompressed_write_buf; std::unique_ptr rows_sources_write_buf; std::optional column_sizes; @@ -800,9 +800,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor if (chosen_merge_algorithm == MergeAlgorithm::Vertical) { - tmp_disk->createDirectories(new_part_tmp_path); - rows_sources_file_path = new_part_tmp_path + "rows_sources"; - rows_sources_uncompressed_write_buf = tmp_disk->writeFile(rows_sources_file_path); + rows_sources_file = createTemporaryFile(tmp_disk->getPath()); + rows_sources_uncompressed_write_buf = tmp_disk->writeFile(fileName(rows_sources_file->path())); rows_sources_write_buf = std::make_unique(*rows_sources_uncompressed_write_buf); MergeTreeData::DataPart::ColumnToSize merged_column_to_size; @@ -1038,7 +1037,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor + ") differs from number of bytes written to rows_sources file (" + toString(rows_sources_count) + "). It is a bug.", ErrorCodes::LOGICAL_ERROR); - CompressedReadBufferFromFile rows_sources_read_buf(tmp_disk->readFile(rows_sources_file_path)); + CompressedReadBufferFromFile rows_sources_read_buf(tmp_disk->readFile(fileName(rows_sources_file->path()))); IMergedBlockOutputStream::WrittenOffsetColumns written_offset_columns; for (size_t column_num = 0, gathering_column_names_size = gathering_column_names.size(); @@ -1109,8 +1108,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merge_entry->bytes_written_uncompressed += column_gathered_stream.getProfileInfo().bytes; merge_entry->progress.store(progress_before + column_sizes->columnWeight(column_name), std::memory_order_relaxed); } - - tmp_disk->removeFile(rows_sources_file_path); } for (const auto & part : parts) From 2b0f7e6e51c4989063fc213dc559c772497ff55d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 12 Sep 2021 13:16:52 +0300 Subject: [PATCH 059/472] Backport #28887 to 21.9: Fix bug with LowCardinality in short-curcuit function evaluation --- src/Columns/MaskOperations.cpp | 12 +++++--- src/Core/Settings.h | 2 +- .../0_stateless/01822_short_circuit.reference | 30 +++++++++++++++++++ .../0_stateless/01822_short_circuit.sql | 5 ++++ 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 759d0af71272..b63f2d256653 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB @@ -177,19 +178,21 @@ MaskInfo extractMaskFromConstOrNull( template MaskInfo extractMaskImpl( PaddedPODArray & mask, - const ColumnPtr & column, + const ColumnPtr & col, UInt8 null_value, const PaddedPODArray * null_bytemap, PaddedPODArray * nulls = nullptr) { + auto column = col->convertToFullColumnIfLowCardinality(); + /// Special implementation for Null and Const columns. if (column->onlyNull() || checkAndGetColumn(*column)) return extractMaskFromConstOrNull(mask, column, null_value, nulls); - if (const auto * col = checkAndGetColumn(*column)) + if (const auto * nullable_column = checkAndGetColumn(*column)) { - const PaddedPODArray & null_map = col->getNullMapData(); - return extractMaskImpl(mask, col->getNestedColumnPtr(), null_value, &null_map, nulls); + const PaddedPODArray & null_map = nullable_column->getNullMapData(); + return extractMaskImpl(mask, nullable_column->getNestedColumnPtr(), null_value, &null_map, nulls); } MaskInfo mask_info; @@ -314,3 +317,4 @@ void copyMask(const PaddedPODArray & from, PaddedPODArray & to) } } + diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 30f3c69ff4ac..5b4c3d2d2d3a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -492,7 +492,7 @@ class IColumn; M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \ \ M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ - M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable', 'disable', 'force_enable'", 0) \ + M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ diff --git a/tests/queries/0_stateless/01822_short_circuit.reference b/tests/queries/0_stateless/01822_short_circuit.reference index 96c4e1612447..949d2fa4985b 100644 --- a/tests/queries/0_stateless/01822_short_circuit.reference +++ b/tests/queries/0_stateless/01822_short_circuit.reference @@ -1803,3 +1803,33 @@ Decimal32 \N \N \N +0 +1 +0 +1 +0 +0 +1 +1 +1 +1 +1 +1 +3 +3 +5 +5 +7 +7 +9 +9 +1 +1 +3 +3 +5 +5 +7 +7 +9 +9 diff --git a/tests/queries/0_stateless/01822_short_circuit.sql b/tests/queries/0_stateless/01822_short_circuit.sql index fe8a0315d4a4..1f0e04cb4b5b 100644 --- a/tests/queries/0_stateless/01822_short_circuit.sql +++ b/tests/queries/0_stateless/01822_short_circuit.sql @@ -148,3 +148,8 @@ select if(isNull(x), Null, 42 / x) from (select CAST(materialize(Null), 'Nullabl select if(isNull(x), Null, x / 0) from (select CAST(materialize(Null), 'Nullable(Decimal32(2))') as x); select if(isNull(x), Null, intDiv(42, x)) from (select CAST(materialize(Null), 'Nullable(Int64)') as x); + +select number % 2 and toLowCardinality(number) from numbers(5); +select number % 2 or toLowCardinality(number) from numbers(5); +select if(toLowCardinality(number) % 2, number, number + 1) from numbers(10); +select multiIf(toLowCardinality(number) % 2, number, number + 1) from numbers(10); From 35d56491f7838ef4e7fc9dc60ebcd6dde19b94e1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 12 Sep 2021 13:17:29 +0300 Subject: [PATCH 060/472] Backport #28904 to 21.9: Fix higher-order array functions (arrayCompact/arrayDifference/arrayCumSumNonNegative) with consts --- src/Functions/array/arrayCompact.cpp | 2 +- src/Functions/array/arrayCumSumNonNegative.cpp | 2 +- src/Functions/array/arrayDifference.cpp | 2 +- .../queries/0_stateless/01020_function_array_compact.reference | 2 ++ tests/queries/0_stateless/01020_function_array_compact.sql | 2 ++ tests/queries/0_stateless/02026_arrayDifference_const.reference | 1 + tests/queries/0_stateless/02026_arrayDifference_const.sql | 1 + .../0_stateless/02027_arrayCumSumNonNegative_const.reference | 1 + .../queries/0_stateless/02027_arrayCumSumNonNegative_const.sql | 1 + 9 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02026_arrayDifference_const.reference create mode 100644 tests/queries/0_stateless/02026_arrayDifference_const.sql create mode 100644 tests/queries/0_stateless/02027_arrayCumSumNonNegative_const.reference create mode 100644 tests/queries/0_stateless/02027_arrayCumSumNonNegative_const.sql diff --git a/src/Functions/array/arrayCompact.cpp b/src/Functions/array/arrayCompact.cpp index e0f73207da8d..4126f4035165 100644 --- a/src/Functions/array/arrayCompact.cpp +++ b/src/Functions/array/arrayCompact.cpp @@ -16,7 +16,6 @@ namespace ErrorCodes struct ArrayCompactImpl { - static bool useDefaultImplementationForConstants() { return true; } static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } @@ -129,6 +128,7 @@ struct ArrayCompactImpl { ColumnPtr res; + mapped = mapped->convertToFullColumnIfConst(); if (!(executeType< UInt8 >(mapped, array, res) || executeType< UInt16>(mapped, array, res) || executeType< UInt32>(mapped, array, res) || diff --git a/src/Functions/array/arrayCumSumNonNegative.cpp b/src/Functions/array/arrayCumSumNonNegative.cpp index 288422c1c9c1..2d4928f35b6a 100644 --- a/src/Functions/array/arrayCumSumNonNegative.cpp +++ b/src/Functions/array/arrayCumSumNonNegative.cpp @@ -19,7 +19,6 @@ namespace ErrorCodes */ struct ArrayCumSumNonNegativeImpl { - static bool useDefaultImplementationForConstants() { return true; } static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } @@ -100,6 +99,7 @@ struct ArrayCumSumNonNegativeImpl { ColumnPtr res; + mapped = mapped->convertToFullColumnIfConst(); if (executeType< UInt8 , UInt64>(mapped, array, res) || executeType< UInt16, UInt64>(mapped, array, res) || executeType< UInt32, UInt64>(mapped, array, res) || diff --git a/src/Functions/array/arrayDifference.cpp b/src/Functions/array/arrayDifference.cpp index 7d11c6e89c8b..ce223b5c9d86 100644 --- a/src/Functions/array/arrayDifference.cpp +++ b/src/Functions/array/arrayDifference.cpp @@ -20,7 +20,6 @@ namespace ErrorCodes */ struct ArrayDifferenceImpl { - static bool useDefaultImplementationForConstants() { return true; } static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } @@ -129,6 +128,7 @@ struct ArrayDifferenceImpl { ColumnPtr res; + mapped = mapped->convertToFullColumnIfConst(); if (executeType< UInt8 , Int16>(mapped, array, res) || executeType< UInt16, Int32>(mapped, array, res) || executeType< UInt32, Int64>(mapped, array, res) || diff --git a/tests/queries/0_stateless/01020_function_array_compact.reference b/tests/queries/0_stateless/01020_function_array_compact.reference index 6627a4372519..4a6265b4f55f 100644 --- a/tests/queries/0_stateless/01020_function_array_compact.reference +++ b/tests/queries/0_stateless/01020_function_array_compact.reference @@ -7,3 +7,5 @@ [1,2,1] [2,1] [1,2,3,4,5] +[0] +[0] diff --git a/tests/queries/0_stateless/01020_function_array_compact.sql b/tests/queries/0_stateless/01020_function_array_compact.sql index eea69dcb6da0..d4aaa4d3fca6 100644 --- a/tests/queries/0_stateless/01020_function_array_compact.sql +++ b/tests/queries/0_stateless/01020_function_array_compact.sql @@ -7,3 +7,5 @@ select arrayCompact([1,1,2]); select arrayCompact([1,2,1]); select arrayCompact([2,1,1]); select arrayCompact([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]); +SELECT arrayCompact(x->0, [NULL]); +SELECT toString(arrayCompact(x->0, [NULL])); diff --git a/tests/queries/0_stateless/02026_arrayDifference_const.reference b/tests/queries/0_stateless/02026_arrayDifference_const.reference new file mode 100644 index 000000000000..7a0beed21a5e --- /dev/null +++ b/tests/queries/0_stateless/02026_arrayDifference_const.reference @@ -0,0 +1 @@ +[0,0] diff --git a/tests/queries/0_stateless/02026_arrayDifference_const.sql b/tests/queries/0_stateless/02026_arrayDifference_const.sql new file mode 100644 index 000000000000..55a48d2bedb2 --- /dev/null +++ b/tests/queries/0_stateless/02026_arrayDifference_const.sql @@ -0,0 +1 @@ +SELECT toString(arrayDifference(x->0, [1, 2])); diff --git a/tests/queries/0_stateless/02027_arrayCumSumNonNegative_const.reference b/tests/queries/0_stateless/02027_arrayCumSumNonNegative_const.reference new file mode 100644 index 000000000000..7a0beed21a5e --- /dev/null +++ b/tests/queries/0_stateless/02027_arrayCumSumNonNegative_const.reference @@ -0,0 +1 @@ +[0,0] diff --git a/tests/queries/0_stateless/02027_arrayCumSumNonNegative_const.sql b/tests/queries/0_stateless/02027_arrayCumSumNonNegative_const.sql new file mode 100644 index 000000000000..f95220731544 --- /dev/null +++ b/tests/queries/0_stateless/02027_arrayCumSumNonNegative_const.sql @@ -0,0 +1 @@ +SELECT toString(arrayCumSumNonNegative(x->0, [1, 2])); From 73f8ac6b4c68e54f1c4a574d8806e103f4866c9c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 12 Sep 2021 19:19:00 +0300 Subject: [PATCH 061/472] Backport #28873 to 21.9: Fix reading of subcolumns from compact parts --- src/Storages/MergeTree/MergeTreeReaderCompact.cpp | 9 ++++++++- .../02025_subcolumns_compact_parts.reference | 8 ++++++++ .../0_stateless/02025_subcolumns_compact_parts.sql | 14 ++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02025_subcolumns_compact_parts.reference create mode 100644 tests/queries/0_stateless/02025_subcolumns_compact_parts.sql diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 783a37cce604..bdbec71e785c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -226,7 +226,14 @@ void MergeTreeReaderCompact::readData( auto serialization = type_in_storage->getDefaultSerialization(); serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr); - column = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), *temp_column); + + auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), *temp_column); + + /// TODO: Avoid extra copying. + if (column->empty()) + column = subcolumn; + else + column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size()); } else { diff --git a/tests/queries/0_stateless/02025_subcolumns_compact_parts.reference b/tests/queries/0_stateless/02025_subcolumns_compact_parts.reference new file mode 100644 index 000000000000..431d62da742e --- /dev/null +++ b/tests/queries/0_stateless/02025_subcolumns_compact_parts.reference @@ -0,0 +1,8 @@ +0 +0 +0 +0 +0 +0 +40000 +219970 diff --git a/tests/queries/0_stateless/02025_subcolumns_compact_parts.sql b/tests/queries/0_stateless/02025_subcolumns_compact_parts.sql new file mode 100644 index 000000000000..7d1957a1efc6 --- /dev/null +++ b/tests/queries/0_stateless/02025_subcolumns_compact_parts.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS t_comp_subcolumns; + +CREATE TABLE t_comp_subcolumns (id UInt32, n Nullable(String), arr Array(Array(UInt32))) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_comp_subcolumns SELECT number, 'a', [range(number % 11), range(number % 13)] FROM numbers(20000); + +SELECT sum(n.null) FROM t_comp_subcolumns; +SELECT n.null FROM t_comp_subcolumns LIMIT 10000, 5; + +SELECT sum(arr.size0) FROM t_comp_subcolumns; +SELECT sumArray(arr.size1) FROM t_comp_subcolumns; + +DROP TABLE t_comp_subcolumns; From 68662f268298566654d7a5cfe7d2165ec12fb9d9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 13 Sep 2021 15:23:52 +0300 Subject: [PATCH 062/472] Backport #28863 to 21.9: Column default dictGet identifier fix --- src/Interpreters/inplaceBlockConversions.cpp | 5 ++- ...lumn_default_dict_get_identifier.reference | 1 + ...015_column_default_dict_get_identifier.sql | 37 +++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/2015_column_default_dict_get_identifier.reference create mode 100644 tests/queries/0_stateless/2015_column_default_dict_get_identifier.sql diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index e40e0635a857..4ba924821d71 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -63,8 +63,11 @@ void addDefaultRequiredExpressionsRecursively( for (const auto & next_required_column_name : required_columns_names) addDefaultRequiredExpressionsRecursively(block, next_required_column_name, required_column_type, columns, default_expr_list_accum, added_columns, null_as_default); } - else + else if (columns.has(required_column_name)) { + /// In case of dictGet function we allow to use it with identifier dictGet(identifier, 'column_name', key_expression) + /// and this identifier will be in required columns. If such column is not in ColumnsDescription we ignore it. + /// This column is required, but doesn't have default expression, so lets use "default default" auto column = columns.get(required_column_name); auto default_value = column.type->getDefault(); diff --git a/tests/queries/0_stateless/2015_column_default_dict_get_identifier.reference b/tests/queries/0_stateless/2015_column_default_dict_get_identifier.reference new file mode 100644 index 000000000000..29e04d559e12 --- /dev/null +++ b/tests/queries/0_stateless/2015_column_default_dict_get_identifier.reference @@ -0,0 +1 @@ +5 0 diff --git a/tests/queries/0_stateless/2015_column_default_dict_get_identifier.sql b/tests/queries/0_stateless/2015_column_default_dict_get_identifier.sql new file mode 100644 index 000000000000..292f53952d03 --- /dev/null +++ b/tests/queries/0_stateless/2015_column_default_dict_get_identifier.sql @@ -0,0 +1,37 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + key_column UInt64, + data_column_1 UInt64, + data_column_2 UInt8 +) +ENGINE = MergeTree +ORDER BY key_column; + +INSERT INTO test_table VALUES (0, 0, 0); + +DROP DICTIONARY IF EXISTS test_dictionary; +CREATE DICTIONARY test_dictionary +( + key_column UInt64 DEFAULT 0, + data_column_1 UInt64 DEFAULT 1, + data_column_2 UInt8 DEFAULT 1 +) +PRIMARY KEY key_column +LAYOUT(DIRECT()) +SOURCE(CLICKHOUSE(TABLE 'test_table')); + +DROP TABLE IF EXISTS test_table_default; +CREATE TABLE test_table_default +( + data_1 DEFAULT dictGetUInt64('test_dictionary', 'data_column_1', toUInt64(0)), + data_2 DEFAULT dictGet(test_dictionary, 'data_column_2', toUInt64(0)) +) +ENGINE=TinyLog; + +INSERT INTO test_table_default(data_1) VALUES (5); +SELECT * FROM test_table_default; + +DROP DICTIONARY test_dictionary; +DROP TABLE test_table; +DROP TABLE test_table_default; From 88abcb0e8a4e5ea16587a2be31fec735574e0ea5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 13 Sep 2021 19:25:46 +0300 Subject: [PATCH 063/472] Backport #28864 to 21.9: Fix reordering of REPLACE_RANGE and DROP PART --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 31 +++++++++++++++++++ src/Storages/StorageReplicatedMergeTree.cpp | 4 +++ 2 files changed, 35 insertions(+) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 4c5838cde9bb..2c2542b6eb34 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1254,6 +1254,37 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.typeToString(), entry.new_part_name); return false; } + + if (entry.isDropPart(format_version)) + { + /// We should avoid reordering of REPLACE_RANGE and DROP PART (DROP_RANGE), + /// because if replace_range_entry->new_part_names contains drop_range_entry->new_part_name + /// and we execute DROP PART before REPLACE_RANGE, then DROP PART will be no-op + /// (because part is not created yet, so there is nothing to drop; + /// DROP_RANGE does not cover all parts of REPLACE_RANGE, so removePartProducingOpsInRange(...) will not remove anything too) + /// and part will never be removed. Replicas may diverge due to such reordering. + /// We don't need to do anything for other entry types, because removePartProducingOpsInRange(...) will remove them as expected. + + auto drop_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); + for (const auto & replace_entry : queue) + { + if (replace_entry->type != LogEntry::REPLACE_RANGE) + continue; + + for (const auto & new_part_name : replace_entry->replace_range_entry->new_part_names) + { + auto new_part_info = MergeTreePartInfo::fromPartName(new_part_name, format_version); + if (!new_part_info.isDisjoint(drop_part_info)) + { + const char * format_str = "Not executing log entry {} of type {} for part {} " + "because it probably depends on {} (REPLACE_RANGE)."; + LOG_TRACE(log, format_str, entry.znode_name, entry.typeToString(), entry.new_part_name, replace_entry->znode_name); + out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.typeToString(), entry.new_part_name, replace_entry->znode_name); + return false; + } + } + } + } } return true; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 222a66bc4f65..f3ac36b6660e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2233,7 +2233,11 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) auto data_parts_lock = lockParts(); parts_to_remove = removePartsInRangeFromWorkingSet(drop_range_info, true, data_parts_lock); if (parts_to_remove.empty()) + { + if (!drop_range_info.isFakeDropRangePart()) + LOG_INFO(log, "Log entry {} tried to drop single part {}, but part does not exist", entry.znode_name, entry.new_part_name); return; + } } if (entry.detach) From 471649f5dc5b30c6ce732e163661809cb2829c7a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 14 Sep 2021 17:32:29 +0300 Subject: [PATCH 064/472] Backport #28997 to 21.9: Fix num threads in global in subquery --- src/Interpreters/GlobalSubqueriesVisitor.h | 2 +- .../queries/0_stateless/2015_global_in_threads.reference | 2 ++ tests/queries/0_stateless/2015_global_in_threads.sh | 9 +++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/2015_global_in_threads.reference create mode 100755 tests/queries/0_stateless/2015_global_in_threads.sh diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 6a87527dc9c0..99197e81f803 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -164,7 +164,7 @@ class GlobalSubqueriesMatcher return table_out; }); auto executor = io.pipeline.execute(); - executor->execute(io.pipeline.getNumStreams()); + executor->execute(io.pipeline.getNumThreads()); } else { diff --git a/tests/queries/0_stateless/2015_global_in_threads.reference b/tests/queries/0_stateless/2015_global_in_threads.reference new file mode 100644 index 000000000000..af81158ecae0 --- /dev/null +++ b/tests/queries/0_stateless/2015_global_in_threads.reference @@ -0,0 +1,2 @@ +10 +1 diff --git a/tests/queries/0_stateless/2015_global_in_threads.sh b/tests/queries/0_stateless/2015_global_in_threads.sh new file mode 100755 index 000000000000..c112e47fe92f --- /dev/null +++ b/tests/queries/0_stateless/2015_global_in_threads.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --log_queries=1 --max_threads=32 --query_id "2015_${CLICKHOUSE_DATABASE}_query" -q "select count() from remote('127.0.0.{2,3}', numbers(10)) where number global in (select number % 5 from numbers_mt(1000000))" +${CLICKHOUSE_CLIENT} -q "system flush logs" +${CLICKHOUSE_CLIENT} -q "select length(thread_ids) >= 32 from system.query_log where event_date = today() and query_id = '2015_${CLICKHOUSE_DATABASE}_query' and type = 'QueryFinish' and current_database = currentDatabase()" From 002851188da09d6235ffcf85e32e0a33a7f340f2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 15 Sep 2021 19:35:39 +0300 Subject: [PATCH 065/472] Backport #28636 to 21.9: Fix nullable/lowcardinality primary key with constant conversion --- src/Storages/MergeTree/KeyCondition.cpp | 28 ++++++++++++++----- .../02023_nullable_int_uint_where.reference | 1 + .../02023_nullable_int_uint_where.sql | 10 +++++++ 3 files changed, 32 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02023_nullable_int_uint_where.reference create mode 100644 tests/queries/0_stateless/02023_nullable_int_uint_where.sql diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index b8896d535b4e..9521feabac5a 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1342,34 +1342,48 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context, } } + key_expr_type = recursiveRemoveLowCardinality(key_expr_type); + DataTypePtr key_expr_type_not_null; + bool key_expr_type_is_nullable = false; + if (const auto * nullable_type = typeid_cast(key_expr_type.get())) + { + key_expr_type_is_nullable = true; + key_expr_type_not_null = nullable_type->getNestedType(); + } + else + key_expr_type_not_null = key_expr_type; + bool cast_not_needed = is_set_const /// Set args are already casted inside Set::createFromAST - || ((isNativeNumber(key_expr_type) || isDateTime(key_expr_type)) + || ((isNativeNumber(key_expr_type_not_null) || isDateTime(key_expr_type_not_null)) && (isNativeNumber(const_type) || isDateTime(const_type))); /// Numbers and DateTime are accurately compared without cast. - if (!cast_not_needed && !key_expr_type->equals(*const_type)) + if (!cast_not_needed && !key_expr_type_not_null->equals(*const_type)) { if (const_value.getType() == Field::Types::String) { - const_value = convertFieldToType(const_value, *key_expr_type); + const_value = convertFieldToType(const_value, *key_expr_type_not_null); if (const_value.isNull()) return false; // No need to set is_constant_transformed because we're doing exact conversion } else { - DataTypePtr common_type = getLeastSupertype({key_expr_type, const_type}); + DataTypePtr common_type = getLeastSupertype({key_expr_type_not_null, const_type}); if (!const_type->equals(*common_type)) { castValueToType(common_type, const_value, const_type, node); // Need to set is_constant_transformed unless we're doing exact conversion - if (!key_expr_type->equals(*common_type)) + if (!key_expr_type_not_null->equals(*common_type)) is_constant_transformed = true; } - if (!key_expr_type->equals(*common_type)) + if (!key_expr_type_not_null->equals(*common_type)) { + auto common_type_maybe_nullable + = key_expr_type_is_nullable ? DataTypePtr(std::make_shared(common_type)) : common_type; ColumnsWithTypeAndName arguments{ - {nullptr, key_expr_type, ""}, {DataTypeString().createColumnConst(1, common_type->getName()), common_type, ""}}; + {nullptr, key_expr_type, ""}, + {DataTypeString().createColumnConst(1, common_type_maybe_nullable->getName()), common_type_maybe_nullable, ""}}; FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); auto func_cast = func_builder_cast->build(arguments); diff --git a/tests/queries/0_stateless/02023_nullable_int_uint_where.reference b/tests/queries/0_stateless/02023_nullable_int_uint_where.reference new file mode 100644 index 000000000000..0811eaa4efb9 --- /dev/null +++ b/tests/queries/0_stateless/02023_nullable_int_uint_where.reference @@ -0,0 +1 @@ +21585718595728998 diff --git a/tests/queries/0_stateless/02023_nullable_int_uint_where.sql b/tests/queries/0_stateless/02023_nullable_int_uint_where.sql new file mode 100644 index 000000000000..4318fbf506fc --- /dev/null +++ b/tests/queries/0_stateless/02023_nullable_int_uint_where.sql @@ -0,0 +1,10 @@ +drop table if exists t1; + +set allow_suspicious_low_cardinality_types = 1; +create table t1 (id LowCardinality(Nullable(Int64))) engine MergeTree order by id settings allow_nullable_key = 1, index_granularity = 1; + +insert into t1 values (21585718595728998), (null); + +select * from t1 where id = 21585718595728998; + +drop table t1; From aafd51a77cbb05242e1108555d97cc932d452ad8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 11:08:05 +0300 Subject: [PATCH 066/472] Merge pull request #29059 from ClickHouse/fix_images1 Fix some images in release PRs (cherry picked from commit bb686f5a60c6b46572c7106c52ea09ddf2954357) --- docker/packager/binary/Dockerfile | 2 +- docker/test/integration/kerberos_kdc/Dockerfile | 4 +++- docker/test/integration/mysql_golang_client/Dockerfile | 2 +- docker/test/pvs/Dockerfile | 2 +- docker/test/sqlancer/Dockerfile | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 0393669df48d..413ed4af3933 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -97,7 +97,7 @@ RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacO # Download toolchain for ARM # It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling. -RUN wget -nv "https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en" -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz +RUN wget --no-check-certificate -nv "https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en" -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz # Download toolchain for FreeBSD 11.3 RUN wget -nv https://clickhouse-datasets.s3.yandex.net/toolchains/toolchains/freebsd-11.3-toolchain.tar.xz diff --git a/docker/test/integration/kerberos_kdc/Dockerfile b/docker/test/integration/kerberos_kdc/Dockerfile index ea231b1191db..7391e7df77cc 100644 --- a/docker/test/integration/kerberos_kdc/Dockerfile +++ b/docker/test/integration/kerberos_kdc/Dockerfile @@ -1,8 +1,10 @@ # docker build -t yandex/clickhouse-kerberos-kdc . -FROM centos:6.6 +FROM centos:6 # old OS to make is faster and smaller +RUN sed -i '/^mirrorlist/s/^/#/;/^#baseurl/{s/#//;s/mirror.centos.org\/centos\/$releasever/vault.centos.org\/6.10/}' /etc/yum.repos.d/*B* + RUN yum install -y krb5-server krb5-libs krb5-auth-dialog krb5-workstation EXPOSE 88 749 diff --git a/docker/test/integration/mysql_golang_client/Dockerfile b/docker/test/integration/mysql_golang_client/Dockerfile index 4380383d1fb3..767494fb5763 100644 --- a/docker/test/integration/mysql_golang_client/Dockerfile +++ b/docker/test/integration/mysql_golang_client/Dockerfile @@ -1,7 +1,7 @@ # docker build -t yandex/clickhouse-mysql-golang-client . # MySQL golang client docker container -FROM golang:1.12.2 +FROM golang:1.13 RUN go get "github.com/go-sql-driver/mysql" diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 7bd45ba40182..35e07748845d 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -28,7 +28,7 @@ RUN apt-get update --yes \ ENV PKG_VERSION="pvs-studio-latest" RUN set -x \ - && export PUBKEY_HASHSUM="686e5eb8b3c543a5c54442c39ec876b6c2d912fe8a729099e600017ae53c877dda3368fe38ed7a66024fe26df6b5892a" \ + && export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \ && wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \ && echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \ && apt-key add /tmp/pubkey.txt \ diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile index 672364023525..3a0e489d1a39 100644 --- a/docker/test/sqlancer/Dockerfile +++ b/docker/test/sqlancer/Dockerfile @@ -3,7 +3,7 @@ FROM ubuntu:20.04 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list -RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends +RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git default-jdk maven python3 --yes --no-install-recommends RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip RUN mkdir /sqlancer && \ cd /sqlancer && \ From b2c5e2ff790c50f03a148e4df74ca7dca7429ecc Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 16 Sep 2021 21:45:20 +0300 Subject: [PATCH 067/472] Backport #28975 to 21.9: Don not add const group by key for query with only having. --- src/Interpreters/TreeOptimizer.cpp | 5 ----- .../2016_agg_empty_result_bug_28880.reference | 5 +++++ .../0_stateless/2016_agg_empty_result_bug_28880.sql | 10 ++++++++++ 3 files changed, 15 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/2016_agg_empty_result_bug_28880.reference create mode 100644 tests/queries/0_stateless/2016_agg_empty_result_bug_28880.sql diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c1a265d9a06d..75832a6f1fa8 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -88,12 +88,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum const FunctionFactory & function_factory = FunctionFactory::instance(); if (!select_query->groupBy()) - { - // If there is a HAVING clause without GROUP BY, make sure we have some aggregation happen. - if (select_query->having()) - appendUnusedGroupByColumn(select_query, source_columns); return; - } const auto is_literal = [] (const ASTPtr & ast) -> bool { diff --git a/tests/queries/0_stateless/2016_agg_empty_result_bug_28880.reference b/tests/queries/0_stateless/2016_agg_empty_result_bug_28880.reference new file mode 100644 index 000000000000..9edaf84f2959 --- /dev/null +++ b/tests/queries/0_stateless/2016_agg_empty_result_bug_28880.reference @@ -0,0 +1,5 @@ +0 +0 +0 +0 +\N diff --git a/tests/queries/0_stateless/2016_agg_empty_result_bug_28880.sql b/tests/queries/0_stateless/2016_agg_empty_result_bug_28880.sql new file mode 100644 index 000000000000..005358eb4254 --- /dev/null +++ b/tests/queries/0_stateless/2016_agg_empty_result_bug_28880.sql @@ -0,0 +1,10 @@ +SELECT count() AS cnt WHERE 0 HAVING cnt = 0; + +select cnt from (select count() cnt where 0) where cnt = 0; + +select cnt from (select count() cnt from system.one where 0) where cnt = 0; + +select sum from (select sum(dummy) sum from system.one where 0) where sum = 0; + +set aggregate_functions_null_for_empty=1; +select sum from (select sum(dummy) sum from system.one where 0) where sum is null; From ac5d8f03dff1384a457a309d6c4dd2fa33cf9671 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 17 Sep 2021 01:45:48 +0300 Subject: [PATCH 068/472] Backport #28889 to 21.9: Fix waiting for mutation with mutations_sync=2 --- src/Storages/StorageReplicatedMergeTree.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f3ac36b6660e..e17653414420 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6047,7 +6047,18 @@ void StorageReplicatedMergeTree::waitMutation(const String & znode_name, size_t auto zookeeper = getZooKeeper(); Strings replicas; if (mutations_sync == 2) /// wait for all replicas + { replicas = zookeeper->getChildren(fs::path(zookeeper_path) / "replicas"); + /// This replica should be first, to ensure that the mutation will be loaded into memory + for (auto it = replicas.begin(); it != replicas.end(); ++it) + { + if (*it == replica_name) + { + std::iter_swap(it, replicas.rbegin()); + break; + } + } + } else if (mutations_sync == 1) /// just wait for ourself replicas.push_back(replica_name); From f28319c81792dda93d708832d387ea5a8d5ee237 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 17 Sep 2021 13:48:16 +0300 Subject: [PATCH 069/472] Backport #29071 to 21.9: Fix deserializeCheckVersionTxn version. --- src/Coordination/ZooKeeperDataReader.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index cf6441107860..4d213d760f6d 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -339,6 +339,9 @@ Coordination::ZooKeeperRequestPtr deserializeCheckVersionTxn(ReadBuffer & in) Coordination::read(result->path, in); Coordination::read(result->version, in); result->restored_from_zookeeper_log = true; + /// It stores version + 1 (which should be, not for request) + result->version -= 1; + return result; } From 3edbee1c86d1b85bb62222fdb408c4d6900b0531 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 17 Sep 2021 19:51:16 +0300 Subject: [PATCH 070/472] Backport #29063 to 21.9: Merging #27963 --- base/common/arithmeticOverflow.h | 13 ++++++++++ src/IO/ReadHelpers.h | 26 ++++++++++++++----- .../2020_cast_integer_overflow.reference | 2 ++ .../2020_cast_integer_overflow.sql | 2 ++ 4 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/2020_cast_integer_overflow.reference create mode 100644 tests/queries/0_stateless/2020_cast_integer_overflow.sql diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h index 175e75a62f4b..0957342bbb44 100644 --- a/base/common/arithmeticOverflow.h +++ b/base/common/arithmeticOverflow.h @@ -145,6 +145,19 @@ namespace common return __builtin_mul_overflow(x, y, &res); } + template + inline bool mulOverflow(T x, U y, R & res) + { + // not built in type, wide integer + if constexpr (is_big_int_v || is_big_int_v || is_big_int_v) + { + res = mulIgnoreOverflow(x, y); + return false; + } + else + return __builtin_mul_overflow(x, y, &res); + } + template <> inline bool mulOverflow(int x, int y, int & res) { diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index f985070788c6..f362a88e90ba 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -333,12 +333,24 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) if (buf.count() - initial_pos + 1 >= std::numeric_limits::max_digits10) { - T signed_res = res; - if (common::mulOverflow(signed_res, 10, signed_res) - || common::addOverflow(signed_res, (*buf.position() - '0'), signed_res)) - return ReturnType(false); - - res = signed_res; + if (negative) + { + T signed_res = -res; + if (common::mulOverflow(signed_res, 10, signed_res) || + common::subOverflow(signed_res, (*buf.position() - '0'), signed_res)) + return ReturnType(false); + + res = -static_cast(signed_res); + } + else + { + T signed_res = res; + if (common::mulOverflow(signed_res, 10, signed_res) || + common::addOverflow(signed_res, (*buf.position() - '0'), signed_res)) + return ReturnType(false); + + res = signed_res; + } break; } } @@ -368,7 +380,7 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) { if constexpr (check_overflow == ReadIntTextCheckOverflow::CHECK_OVERFLOW) { - if (common::mulOverflow(x, -1, x)) + if (common::mulOverflow(res, -1, x)) return ReturnType(false); } else diff --git a/tests/queries/0_stateless/2020_cast_integer_overflow.reference b/tests/queries/0_stateless/2020_cast_integer_overflow.reference new file mode 100644 index 000000000000..acceae4a72e1 --- /dev/null +++ b/tests/queries/0_stateless/2020_cast_integer_overflow.reference @@ -0,0 +1,2 @@ +-2147483648 +-2147483648 diff --git a/tests/queries/0_stateless/2020_cast_integer_overflow.sql b/tests/queries/0_stateless/2020_cast_integer_overflow.sql new file mode 100644 index 000000000000..57aeff9a9828 --- /dev/null +++ b/tests/queries/0_stateless/2020_cast_integer_overflow.sql @@ -0,0 +1,2 @@ +SELECT toInt32('-2147483648'); +SELECT toInt32OrNull('-2147483648'); From 0cd5bacf7bfff985c2e8814bdd0b7cf7b31b11d8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 17 Sep 2021 23:16:34 +0300 Subject: [PATCH 071/472] Auto version update to [21.9.3.30] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 58c08138027f..09bcaf096e3c 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) SET(VERSION_PATCH 3) -SET(VERSION_GITHASH 871ee96fd4a30eb1c544d9855e01aebd01053df5) -SET(VERSION_DESCRIBE v21.9.3.1-stable) -SET(VERSION_STRING 21.9.3.1) +SET(VERSION_GITHASH aafd51a77cbb05242e1108555d97cc932d452ad8) +SET(VERSION_DESCRIBE v21.9.3.30-stable) +SET(VERSION_STRING 21.9.3.30) # end of autochange From 707f0267e9ff6887758a1fad76c31485b91818ab Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 17 Sep 2021 23:21:32 +0300 Subject: [PATCH 072/472] Auto version update to [21.9.4.1] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 09bcaf096e3c..370808a516e3 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) -SET(VERSION_PATCH 3) +SET(VERSION_PATCH 4) SET(VERSION_GITHASH aafd51a77cbb05242e1108555d97cc932d452ad8) -SET(VERSION_DESCRIBE v21.9.3.30-stable) -SET(VERSION_STRING 21.9.3.30) +SET(VERSION_DESCRIBE v21.9.4.1-stable) +SET(VERSION_STRING 21.9.4.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index c1ece6dd348a..6a8a7c11eda8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.9.3.1) unstable; urgency=low +clickhouse (21.9.4.1) unstable; urgency=low * Modified source code - -- clickhouse-release Thu, 09 Sep 2021 11:13:20 +0300 + -- clickhouse-release Fri, 17 Sep 2021 23:21:29 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 080f2c6da225..4b9f573f0b57 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.3.* +ARG version=21.9.4.* RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 36ef7dad33d0..0c3bea605241 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.3.* +ARG version=21.9.4.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 4a8c4739fb2c..f8153ec9b210 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.3.* +ARG version=21.9.4.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 198167be4badd169ac8f3f8710c0af08005de4a1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 19 Sep 2021 20:23:48 +0300 Subject: [PATCH 073/472] Backport #28888 to 21.9: Fix queries to external databases (i.e. MySQL) with multiple columns in IN ( i.e. `(k,v) IN ((1, 2))` ) --- base/mysqlxx/Query.cpp | 4 +- programs/copier/ClusterCopier.cpp | 15 ++-- src/Parsers/ExpressionElementParsers.cpp | 28 ++++--- ..._transform_query_for_external_database.cpp | 14 +++- .../transformQueryForExternalDatabase.cpp | 14 +++- tests/integration/test_storage_mysql/test.py | 75 +++++++++++++++++++ tests/queries/0_stateless/00132_sets.sql | 2 +- .../01016_input_null_as_default.sh | 4 +- 8 files changed, 131 insertions(+), 25 deletions(-) diff --git a/base/mysqlxx/Query.cpp b/base/mysqlxx/Query.cpp index c0d5c20fdfd7..e7d1e0c1d69e 100644 --- a/base/mysqlxx/Query.cpp +++ b/base/mysqlxx/Query.cpp @@ -77,7 +77,9 @@ void Query::executeImpl() case CR_SERVER_LOST: throw ConnectionLost(errorMessage(mysql_driver), err_no); default: - throw BadQuery(errorMessage(mysql_driver), err_no); + /// Add query to the exception message, since it may differs from the user input query. + /// (also you can use this and create query with an error to see what query ClickHouse created) + throw BadQuery(errorMessage(mysql_driver) + " (query: " + query_string + ")", err_no); } } } diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index cf0b6cc76a40..de26e34bf2ec 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -1274,13 +1274,14 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( auto get_select_query = [&] (const DatabaseAndTableName & from_table, const String & fields, bool enable_splitting, String limit = "") { String query; + query += "WITH " + task_partition.name + " AS partition_key "; query += "SELECT " + fields + " FROM " + getQuotedTable(from_table); if (enable_splitting && experimental_use_sample_offset) query += " SAMPLE 1/" + toString(number_of_splits) + " OFFSET " + toString(current_piece_number) + "/" + toString(number_of_splits); /// TODO: Bad, it is better to rewrite with ASTLiteral(partition_key_field) - query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = (" + task_partition.name + " AS partition_key))"; + query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = partition_key)"; if (enable_splitting && !experimental_use_sample_offset) query += " AND ( cityHash64(" + primary_key_comma_separated + ") %" + toString(number_of_splits) + " = " + toString(current_piece_number) + " )"; @@ -1851,9 +1852,9 @@ bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts, TaskTable & task_table = task_shard.task_table; WriteBufferFromOwnString ss; + ss << "WITH " + partition_quoted_name + " AS partition_key "; ss << "SELECT 1 FROM " << getQuotedTable(task_shard.table_read_shard); - ss << " WHERE (" << queryToString(task_table.engine_push_partition_key_ast); - ss << " = (" + partition_quoted_name << " AS partition_key))"; + ss << " WHERE (" << queryToString(task_table.engine_push_partition_key_ast) << " = partition_key)"; if (!task_table.where_condition_str.empty()) ss << " AND (" << task_table.where_condition_str << ")"; ss << " LIMIT 1"; @@ -1882,13 +1883,15 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi UNUSED(primary_key_comma_separated); - std::string query = "SELECT 1 FROM " + getQuotedTable(task_shard.table_read_shard); + std::string query; + + query += "WITH " + partition_quoted_name + " AS partition_key "; + query += "SELECT 1 FROM " + getQuotedTable(task_shard.table_read_shard); if (experimental_use_sample_offset) query += " SAMPLE 1/" + toString(number_of_splits) + " OFFSET " + toString(current_piece_number) + "/" + toString(number_of_splits); - query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) - + " = (" + partition_quoted_name + " AS partition_key))"; + query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = partition_key)"; if (!experimental_use_sample_offset) query += " AND (cityHash64(" + primary_key_comma_separated + ") % " diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index a79b3e51e16f..5f7c2841205e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -103,26 +103,34 @@ bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & const auto & expr_list = contents_node->as(); - /// empty expression in parentheses is not allowed + /// Empty expression in parentheses is not allowed. if (expr_list.children.empty()) { expected.add(pos, "non-empty parenthesized list of expressions"); return false; } + /// Special case for one-element tuple. if (expr_list.children.size() == 1 && is_elem) { - node = expr_list.children.front(); - } - else - { - auto function_node = std::make_shared(); - function_node->name = "tuple"; - function_node->arguments = contents_node; - function_node->children.push_back(contents_node); - node = function_node; + auto * ast_literal = expr_list.children.front()->as(); + /// But only if its argument is not tuple, + /// since otherwise it will do incorrect transformation: + /// + /// (foo,bar) IN (('foo','bar')) -> (foo,bar) IN ('foo','bar') + if (!(ast_literal && ast_literal->value.getType() == Field::Types::Tuple)) + { + node = expr_list.children.front(); + return true; + } } + auto function_node = std::make_shared(); + function_node->name = "tuple"; + function_node->arguments = contents_node; + function_node->children.push_back(contents_node); + node = function_node; + return true; } diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 1d4cad576fac..be2cfd68dff8 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -109,7 +109,7 @@ static void check( std::string transformed_query = transformQueryForExternalDatabase( query_info, state.getColumns(), IdentifierQuotingStyle::DoubleQuotes, "test", "table", state.context); - EXPECT_EQ(transformed_query, expected); + EXPECT_EQ(transformed_query, expected) << query; } @@ -128,6 +128,18 @@ TEST(TransformQueryForExternalDatabase, InWithSingleElement) R"(SELECT "column" FROM "test"."table" WHERE "column" NOT IN ('hello', 'world'))"); } +TEST(TransformQueryForExternalDatabase, InWithMultipleColumns) +{ + const State & state = State::instance(); + + check(state, 1, + "SELECT column FROM test.table WHERE (1,1) IN ((1,1))", + R"(SELECT "column" FROM "test"."table" WHERE 1)"); + check(state, 1, + "SELECT field, value FROM test.table WHERE (field, value) IN (('foo', 'bar'))", + R"(SELECT "field", "value" FROM "test"."table" WHERE ("field", "value") IN (('foo', 'bar')))"); +} + TEST(TransformQueryForExternalDatabase, InWithTable) { const State & state = State::instance(); diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 4e299c5a357a..1bd665de4608 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -105,9 +105,9 @@ void dropAliases(ASTPtr & node) } -bool isCompatible(const IAST & node) +bool isCompatible(IAST & node) { - if (const auto * function = node.as()) + if (auto * function = node.as()) { if (function->parameters) /// Parametric aggregate functions return false; @@ -135,8 +135,14 @@ bool isCompatible(const IAST & node) /// A tuple with zero or one elements is represented by a function tuple(x) and is not compatible, /// but a normal tuple with more than one element is represented as a parenthesized expression (x, y) and is perfectly compatible. - if (name == "tuple" && function->arguments->children.size() <= 1) - return false; + /// So to support tuple with zero or one elements we can clear function name to get (x) instead of tuple(x) + if (name == "tuple") + { + if (function->arguments->children.size() <= 1) + { + function->name.clear(); + } + } /// If the right hand side of IN is a table identifier (example: x IN table), then it's not compatible. if ((name == "in" || name == "notIn") diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index a044528cacff..040fc1ed8e2f 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -319,6 +319,81 @@ def test_external_settings(started_cluster): conn.close() +# Check that limited connection_wait_timeout (via connection_pool_size=1) will throw. +def test_settings_connection_wait_timeout(started_cluster): + table_name = 'test_settings_connection_wait_timeout' + node1.query(f'DROP TABLE IF EXISTS {table_name}') + wait_timeout = 2 + + conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + drop_mysql_table(conn, table_name) + create_mysql_table(conn, table_name) + + node1.query(''' + CREATE TABLE {} + ( + id UInt32, + name String, + age UInt32, + money UInt32 + ) + ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') + SETTINGS connection_wait_timeout={}, connection_pool_size=1 + '''.format(table_name, table_name, wait_timeout) + ) + + node1.query("INSERT INTO {} (id, name) SELECT number, concat('name_', toString(number)) from numbers(10) ".format(table_name)) + + def worker(): + node1.query("SELECT sleepEachRow(1) FROM {}".format(table_name)) + + worker_thread = threading.Thread(target=worker) + worker_thread.start() + + # ensure that first query started in worker_thread + time.sleep(1) + + started = time.time() + with pytest.raises(QueryRuntimeException, match=r"Exception: mysqlxx::Pool is full \(connection_wait_timeout is exceeded\)"): + node1.query("SELECT sleepEachRow(1) FROM {}".format(table_name)) + ended = time.time() + assert (ended - started) >= wait_timeout + + worker_thread.join() + + drop_mysql_table(conn, table_name) + conn.close() + +# Regression for (k, v) IN ((k, v)) +def test_mysql_in(started_cluster): + table_name = 'test_mysql_in' + node1.query(f'DROP TABLE IF EXISTS {table_name}') + + conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + drop_mysql_table(conn, table_name) + create_mysql_table(conn, table_name) + + node1.query(''' + CREATE TABLE {} + ( + id UInt32, + name String, + age UInt32, + money UInt32 + ) + ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') + '''.format(table_name, table_name) + ) + + node1.query("INSERT INTO {} (id, name) SELECT number, concat('name_', toString(number)) from numbers(10) ".format(table_name)) + node1.query("SELECT * FROM {} WHERE (id) IN (1)".format(table_name)) + node1.query("SELECT * FROM {} WHERE (id) IN (1, 2)".format(table_name)) + node1.query("SELECT * FROM {} WHERE (id, name) IN ((1, 'name_1'))".format(table_name)) + node1.query("SELECT * FROM {} WHERE (id, name) IN ((1, 'name_1'),(1, 'name_1'))".format(table_name)) + + drop_mysql_table(conn, table_name) + conn.close() + if __name__ == '__main__': with contextmanager(started_cluster)() as cluster: for name, instance in list(cluster.instances.items()): diff --git a/tests/queries/0_stateless/00132_sets.sql b/tests/queries/0_stateless/00132_sets.sql index f9cb2fbbd35f..1b0a5fd0f647 100644 --- a/tests/queries/0_stateless/00132_sets.sql +++ b/tests/queries/0_stateless/00132_sets.sql @@ -7,7 +7,7 @@ SELECT (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, '') IN (1, 2, 3, SELECT (number AS n, n + 1, n + 2, n + 3) IN (1, 2, 3, 4) FROM system.numbers LIMIT 3; SELECT (number AS n, n + 1, n + 2, n + 3, n - 1) IN (1, 2, 3, 4, 0) FROM system.numbers LIMIT 3; SELECT (number AS n, n + 1, toString(n + 2), n + 3, n - 1) IN (1, 2, '3', 4, 0) FROM system.numbers LIMIT 3; -SELECT number, tuple FROM (SELECT 1 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN (((1, (2, 3)), (4, (5, 6)))); +SELECT number, tuple FROM (SELECT 1 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN ( (/*number*/1, /*tuple*/(2, 3)), (/*number*/4, /*tuple*/(5, 6)) ); SELECT number, tuple FROM (SELECT 2 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN ((2, (2, 3))); SELECT number, tuple FROM (SELECT 3 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN (3, (2, 3)); SELECT number, tuple FROM (SELECT 4 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN (SELECT 4, (2, 3)); diff --git a/tests/queries/0_stateless/01016_input_null_as_default.sh b/tests/queries/0_stateless/01016_input_null_as_default.sh index 137e25b6a120..bfeaca0fcacd 100755 --- a/tests/queries/0_stateless/01016_input_null_as_default.sh +++ b/tests/queries/0_stateless/01016_input_null_as_default.sh @@ -57,8 +57,8 @@ $CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; echo 'Values' echo '(NULL, '\''1'\'', (null), '\''2019-07-22'\'', ([10, 20, 30]), (NuLl)), -(1, '\''world'\'', (3), '\''2019-07-23'\'', (NULL), (('\''tuple'\'', 3.14))), -(2, null, (123), null, ([]), (('\''test'\'', 2.71828))), +(1, '\''world'\'', (3), '\''2019-07-23'\'', (NULL), ('\''tuple'\'', 3.14)), +(2, null, (123), null, ([]), ('\''test'\'', 2.71828)), (3, null, (null), null, (null), (null))' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default VALUES"; $CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; $CLICKHOUSE_CLIENT --query="DROP TABLE null_as_default"; From 2f3855432f4d1f1ca649e5ac5a5bf79962a2a466 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 20 Sep 2021 16:29:11 +0300 Subject: [PATCH 074/472] Backport #29132 to 21.9: Fix segfault in Avro with LowCardinality(Nullable) --- .../Formats/Impl/AvroRowInputFormat.cpp | 16 +++++++++- ...543_avro_deserialization_with_lc.reference | 30 +++++++++++++++++++ .../01543_avro_deserialization_with_lc.sh | 18 +++++++++-- 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index cd5769163647..5ec6559d1534 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -178,7 +179,20 @@ static std::string nodeName(avro::NodePtr node) AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::NodePtr root_node, DataTypePtr target_type) { - const WhichDataType target = removeLowCardinality(target_type); + if (target_type->lowCardinality()) + { + const auto * lc_type = assert_cast(target_type.get()); + auto dict_deserialize = createDeserializeFn(root_node, lc_type->getDictionaryType()); + return [dict_deserialize](IColumn & column, avro::Decoder & decoder) + { + auto & lc_column = assert_cast(column); + auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty(); + dict_deserialize(*tmp_column, decoder); + lc_column.insertFromFullColumn(*tmp_column, 0); + }; + } + + const WhichDataType target = WhichDataType(target_type); switch (root_node->type()) { diff --git a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.reference b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.reference index e69de29bb2d1..8d99d88ca14d 100644 --- a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.reference +++ b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.reference @@ -0,0 +1,30 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +\N +1 +\N +3 +\N +5 +\N +7 +\N +9 diff --git a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh index f3f636dee730..6218dca647a4 100755 --- a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh +++ b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh @@ -4,10 +4,24 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS test_01543 (value LowCardinality(String)) ENGINE=Memory()" -$CLICKHOUSE_CLIENT --query "INSERT INTO test_01543 SELECT toString(number) FROM numbers(1000)" +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +$CLICKHOUSE_CLIENT --multiquery --query " +SET allow_suspicious_low_cardinality_types=1; +CREATE TABLE IF NOT EXISTS test_01543 (value LowCardinality(String), value2 LowCardinality(UInt64)) ENGINE=Memory(); +" + +$CLICKHOUSE_CLIENT --query "INSERT INTO test_01543 SELECT toString(number), number FROM numbers(10)" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_01543 FORMAT Avro" | $CLICKHOUSE_CLIENT -q "INSERT INTO test_01543 FORMAT Avro"; +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_01543"; + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_01543" + +$CLICKHOUSE_CLIENT --query "SELECT number % 2 ? number: NULL as x from numbers(10) FORMAT Avro" > $USER_FILES_PATH/test_01543.avro + +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('test_01543.avro', 'Avro', 'x LowCardinality(Nullable(UInt64))')" + +rm $USER_FILES_PATH/test_01543.avro From 1a13a9a2ca2416d56a76eed4e1f247caa7a0c8f8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 22 Sep 2021 00:38:17 +0300 Subject: [PATCH 075/472] Backport #29216 to 21.9: Fix terminate on uncaught exception --- src/Databases/DatabaseMemory.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index c0af027e027f..0ca1de09dd1f 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -42,12 +42,17 @@ void DatabaseMemory::dropTable( try { table->drop(); - fs::path table_data_dir{getTableDataPath(table_name)}; - if (fs::exists(table_data_dir)) - fs::remove_all(table_data_dir); + if (table->storesDataOnDisk()) + { + assert(database_name != DatabaseCatalog::TEMPORARY_DATABASE); + fs::path table_data_dir{getTableDataPath(table_name)}; + if (fs::exists(table_data_dir)) + fs::remove_all(table_data_dir); + } } catch (...) { + assert(database_name != DatabaseCatalog::TEMPORARY_DATABASE); attachTableUnlocked(table_name, table, lock); throw; } From 0f48096bfa13ff716924828df40922a35802f34e Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 22 Sep 2021 15:03:18 +0300 Subject: [PATCH 076/472] Restart CI From 5301d70d5ee35b1a219ab58aaa6adba191d83bfe Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 23 Sep 2021 18:46:20 +0300 Subject: [PATCH 077/472] Backport #29282 to 21.9: Fix connection timeouts (send_timeout/receive_timeout) --- src/Client/Connection.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 366e61bc8e29..4c01f54578e3 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -413,7 +413,12 @@ void Connection::sendQuery( if (!connected) connect(timeouts); - TimeoutSetter timeout_setter(*socket, timeouts.send_timeout, timeouts.receive_timeout, true); + /// Query is not executed within sendQuery() function. + /// + /// And what this means that temporary timeout (via TimeoutSetter) is not + /// enough, since next query can use timeout from the previous query in this case. + socket->setReceiveTimeout(timeouts.receive_timeout); + socket->setSendTimeout(timeouts.send_timeout); if (settings) { From f05e0bf482c724ec2200d3cb78129a28a06f5feb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 24 Sep 2021 14:49:44 +0300 Subject: [PATCH 078/472] Auto version update to [21.9.4.35] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 370808a516e3..75fbe7f90535 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) SET(VERSION_PATCH 4) -SET(VERSION_GITHASH aafd51a77cbb05242e1108555d97cc932d452ad8) -SET(VERSION_DESCRIBE v21.9.4.1-stable) -SET(VERSION_STRING 21.9.4.1) +SET(VERSION_GITHASH 6a82e988c12d80f628303b23974a32cd0dc6480e) +SET(VERSION_DESCRIBE v21.9.4.35-stable) +SET(VERSION_STRING 21.9.4.35) # end of autochange From c92ba85c2544c25ac662ebc1a05fac6f766ca522 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 24 Sep 2021 14:53:37 +0300 Subject: [PATCH 079/472] Auto version update to [21.9.5.1] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 75fbe7f90535..3232810fdce8 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) -SET(VERSION_PATCH 4) +SET(VERSION_PATCH 5) SET(VERSION_GITHASH 6a82e988c12d80f628303b23974a32cd0dc6480e) -SET(VERSION_DESCRIBE v21.9.4.35-stable) -SET(VERSION_STRING 21.9.4.35) +SET(VERSION_DESCRIBE v21.9.5.1-stable) +SET(VERSION_STRING 21.9.5.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index 6a8a7c11eda8..65cde08e0c0d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.9.4.1) unstable; urgency=low +clickhouse (21.9.5.1) unstable; urgency=low * Modified source code - -- clickhouse-release Fri, 17 Sep 2021 23:21:29 +0300 + -- clickhouse-release Fri, 24 Sep 2021 14:53:31 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 4b9f573f0b57..22a5e4b526b8 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.4.* +ARG version=21.9.5.* RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 0c3bea605241..3398aae7f3d4 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.4.* +ARG version=21.9.5.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index f8153ec9b210..17c5e6c17493 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.4.* +ARG version=21.9.5.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 87ce887866875db5da6e91305196dc971e57b978 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 25 Sep 2021 06:54:10 +0300 Subject: [PATCH 080/472] Backport #29266 to 21.9: Normalize ASTs in ColumnsDescription --- src/Storages/ColumnsDescription.cpp | 7 +++ .../gtest_columns_description_normalize.cpp | 25 +++++++++ .../__init__.py | 0 .../test.py | 55 +++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 src/Storages/tests/gtest_columns_description_normalize.cpp create mode 100644 tests/integration/test_replicated_merge_tree_compatibility/__init__.py create mode 100644 tests/integration/test_replicated_merge_tree_compatibility/test.py diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index c05441148df6..568257b4fd77 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -29,6 +29,7 @@ #include #include #include +#include namespace DB @@ -201,6 +202,12 @@ void ColumnsDescription::add(ColumnDescription column, const String & after_colu throw Exception("Cannot add column " + column.name + ": column with this name already exists", ErrorCodes::ILLEGAL_COLUMN); + /// Normalize ASTs to be compatible with InterpreterCreateQuery. + if (column.default_desc.expression) + FunctionNameNormalizer::visit(column.default_desc.expression.get()); + if (column.ttl) + FunctionNameNormalizer::visit(column.ttl.get()); + auto insert_it = columns.cend(); if (first) diff --git a/src/Storages/tests/gtest_columns_description_normalize.cpp b/src/Storages/tests/gtest_columns_description_normalize.cpp new file mode 100644 index 000000000000..d6c5aa8073be --- /dev/null +++ b/src/Storages/tests/gtest_columns_description_normalize.cpp @@ -0,0 +1,25 @@ +#include +#include + +#include + +using namespace DB; + +TEST(ColumnsDescription, Normalize) +{ + constexpr auto columns = "columns format version: 1\n" + "3 columns:\n" + "`a` UInt32\n" + "`b` String\tDEFAULT\tIf(a = 0, 'true', 'false')\n" + "`c` String\tDEFAULT\tcAsT(a, 'String')\n"; + + constexpr auto columns_normalized = "columns format version: 1\n" + "3 columns:\n" + "`a` UInt32\n" + "`b` String\tDEFAULT\tif(a = 0, 'true', 'false')\n" + "`c` String\tDEFAULT\tcast(a, 'String')\n"; + + tryRegisterFunctions(); + + ASSERT_EQ(ColumnsDescription::parse(columns), ColumnsDescription::parse(columns_normalized)); +} diff --git a/tests/integration/test_replicated_merge_tree_compatibility/__init__.py b/tests/integration/test_replicated_merge_tree_compatibility/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_replicated_merge_tree_compatibility/test.py b/tests/integration/test_replicated_merge_tree_compatibility/test.py new file mode 100644 index 000000000000..b56aa5706c92 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_compatibility/test.py @@ -0,0 +1,55 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server', tag='20.12.4.5', stay_alive=True, with_installed_binary=True) +node2 = cluster.add_instance('node2', with_zookeeper=True, image='yandex/clickhouse-server', tag='20.12.4.5', stay_alive=True, with_installed_binary=True) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + +def test_replicated_merge_tree_defaults_compatibility(started_cluster): + # This test checks, that result of parsing list of columns with defaults + # from 'CREATE/ATTACH' is compatible with parsing from zookeeper metadata on different versions. + # We create table and write 'columns' node in zookeeper with old version, than restart with new version + # drop and try recreate one replica. During startup of table structure is checked between 'CREATE' query and zookeeper. + + create_query = ''' + CREATE TABLE test.table + ( + a UInt32, + b String DEFAULT If(a = 0, 'true', 'false'), + c String DEFAULT Cast(a, 'String') + ) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/table', '{replica}') + ORDER BY a + ''' + + for node in (node1, node2): + node.query("CREATE DATABASE test ENGINE = Ordinary") + node.query(create_query.format(replica=node.name)) + + node1.query("DETACH TABLE test.table") + node2.query("SYSTEM DROP REPLICA 'node1' FROM TABLE test.table") + node1.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/test/table.sql"]) + node1.exec_in_container(["bash", "-c", "rm -r /var/lib/clickhouse/data/test/table"]) + + zk = cluster.get_kazoo_client('zoo1') + exists_replica_1 = zk.exists("/clickhouse/tables/test/table/replicas/node1") + assert exists_replica_1 == None + + node1.restart_with_latest_version() + node2.restart_with_latest_version() + + node1.query(create_query.format(replica=1)) + node1.query("EXISTS TABLE test.table") == "1\n" From 66f5d2c0b6eb7bd4fb2737248d23f14b40c9749c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 26 Sep 2021 04:56:56 +0300 Subject: [PATCH 081/472] Backport #29348 to 21.9: Remove window function 'nth_value' --- src/Processors/Transforms/WindowTransform.cpp | 73 ------------------ .../01591_window_functions.reference | 74 +++++-------------- .../0_stateless/01591_window_functions.sql | 46 ++++-------- 3 files changed, 31 insertions(+), 162 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 1b8406682ea8..dbfd2c68777a 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1662,73 +1662,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction } }; -struct WindowFunctionNthValue final : public WindowFunction -{ - WindowFunctionNthValue(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) - { - if (!parameters.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} cannot be parameterized", name_); - } - - if (argument_types.size() != 2) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function '{}' accepts 2 arguments, {} given", - name_, argument_types.size()); - } - } - - DataTypePtr getReturnType() const override - { return argument_types[0]; } - - bool allocatesMemoryInArena() const override { return false; } - - void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override - { - const auto & current_block = transform->blockAt(transform->current_row); - IColumn & to = *(current_block.output_columns[function_index]); - const auto & workspace = transform->workspaces[function_index]; - - int64_t offset = (*current_block.input_columns[ - workspace.argument_column_indices[1]])[ - transform->current_row.row].get() - 1; - - if (offset < 0) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "The offset for function {} must be non-negative, {} given", - getName(), offset); - } - if (offset > INT_MAX) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "The offset for function {} must be less than {}, {} given", - getName(), INT_MAX, offset); - } - - const auto [target_row, offset_left] = transform->moveRowNumber(transform->frame_start, offset); - if (offset_left != 0 - || target_row < transform->frame_start - || transform->frame_end <= target_row) - { - // Offset is outside the frame. - to.insertDefault(); - } - else - { - // Offset is inside the frame. - to.insertFrom(*transform->blockAt(target_row).input_columns[ - workspace.argument_column_indices[0]], - target_row.row); - } - } -}; void registerWindowFunctions(AggregateFunctionFactory & factory) { @@ -1793,13 +1727,6 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) return std::make_shared>( name, argument_types, parameters); }, properties}); - - factory.registerFunction("nth_value", {[](const std::string & name, - const DataTypes & argument_types, const Array & parameters, const Settings *) - { - return std::make_shared( - name, argument_types, parameters); - }, properties}); } } diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 26e9e500c3c4..f54c10ee8b9e 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -1094,62 +1094,24 @@ order by number 7 6 8 8 7 9 9 8 9 --- nth_value without specific frame range given -select - number, - nth_value(number, 1) over w as firstValue, - nth_value(number, 2) over w as secondValue, - nth_value(number, 3) over w as thirdValue, - nth_value(number, 4) over w as fourthValue -from numbers(10) -window w as (order by number) -order by number -; -0 0 0 0 0 -1 0 1 0 0 -2 0 1 2 0 -3 0 1 2 3 -4 0 1 2 3 -5 0 1 2 3 -6 0 1 2 3 -7 0 1 2 3 -8 0 1 2 3 -9 0 1 2 3 --- nth_value with frame range specified -select - number, - nth_value(number, 1) over w as firstValue, - nth_value(number, 2) over w as secondValue, - nth_value(number, 3) over w as thirdValue, - nth_value(number, 4) over w as fourthValue -from numbers(10) -window w as (order by number range between 1 preceding and 1 following) -order by number -; -0 0 1 0 0 -1 0 1 2 0 -2 1 2 3 0 -3 2 3 4 0 -4 3 4 5 0 -5 4 5 6 0 -6 5 6 7 0 -7 6 7 8 0 -8 7 8 9 0 -9 8 9 0 0 --- to make nth_value return null for out-of-frame rows, cast the argument to --- Nullable; otherwise, it returns default values. -SELECT - number, - nth_value(toNullable(number), 1) OVER w as firstValue, - nth_value(toNullable(number), 3) OVER w as thridValue -FROM numbers(5) -WINDOW w AS (ORDER BY number ASC) -; -0 0 \N -1 0 \N -2 0 2 -3 0 2 -4 0 2 +-- lagInFrame UBsan +SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(1, 0) OVER (); +1 +SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +0 +SELECT lagInFrame(1, 1) OVER (); +0 +-- leadInFrame UBsan +SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT leadInFrame(1, 0) OVER (); +1 +SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +0 +SELECT leadInFrame(1, 1) OVER (); +0 -- In this case, we had a problem with PartialSortingTransform returning zero-row -- chunks for input chunks w/o columns. select count() over () from numbers(4) where number < 2; diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index 3075c1ddb462..aa9bd9795e71 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -401,40 +401,20 @@ window w as (order by number range between 1 preceding and 1 following) order by number ; --- nth_value without specific frame range given -select - number, - nth_value(number, 1) over w as firstValue, - nth_value(number, 2) over w as secondValue, - nth_value(number, 3) over w as thirdValue, - nth_value(number, 4) over w as fourthValue -from numbers(10) -window w as (order by number) -order by number -; +-- lagInFrame UBsan +SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(1, 0) OVER (); +SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +SELECT lagInFrame(1, 1) OVER (); + +-- leadInFrame UBsan +SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT leadInFrame(1, 0) OVER (); +SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +SELECT leadInFrame(1, 1) OVER (); --- nth_value with frame range specified -select - number, - nth_value(number, 1) over w as firstValue, - nth_value(number, 2) over w as secondValue, - nth_value(number, 3) over w as thirdValue, - nth_value(number, 4) over w as fourthValue -from numbers(10) -window w as (order by number range between 1 preceding and 1 following) -order by number -; - --- to make nth_value return null for out-of-frame rows, cast the argument to --- Nullable; otherwise, it returns default values. -SELECT - number, - nth_value(toNullable(number), 1) OVER w as firstValue, - nth_value(toNullable(number), 3) OVER w as thridValue -FROM numbers(5) -WINDOW w AS (ORDER BY number ASC) -; - -- In this case, we had a problem with PartialSortingTransform returning zero-row -- chunks for input chunks w/o columns. select count() over () from numbers(4) where number < 2; From 2771ca97e82262fcaa0bf3f55b08debbb8b226db Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 26 Sep 2021 20:25:55 +0300 Subject: [PATCH 082/472] Delete 01591_window_functions.sql --- .../0_stateless/01591_window_functions.sql | 476 ------------------ 1 file changed, 476 deletions(-) delete mode 100644 tests/queries/0_stateless/01591_window_functions.sql diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql deleted file mode 100644 index aa9bd9795e71..000000000000 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ /dev/null @@ -1,476 +0,0 @@ --- { echo } - --- just something basic -select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); - --- proper calculation across blocks -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; - --- not a window function -select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } - --- no partition by -select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); - --- no order by -select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10); - --- can add an alias after window spec -select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); - --- can't reference it yet -- the window functions are calculated at the --- last stage of select, after all other functions. -select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } - --- must work in WHERE if you wrap it in a subquery -select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; - --- should work in ORDER BY -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; - --- also works in ORDER BY if you wrap it in a subquery -select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; - --- Example with window function only in ORDER BY. Here we make a rank of all --- numbers sorted descending, and then sort by this rank descending, and must get --- the ascending order. -select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; - --- Aggregate functions as window function arguments. This query is semantically --- the same as the above one, only we replace `number` with --- `any(number) group by number` and so on. -select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; --- some more simple cases w/aggregate functions -select sum(any(number)) over (rows unbounded preceding) from numbers(1); -select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); -select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); - --- different windows --- an explain test would also be helpful, but it's too immature now and I don't --- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; - --- two functions over the same window --- an explain test would also be helpful, but it's too immature now and I don't --- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; - --- check that we can work with constant columns -select median(x) over (partition by x) from (select 1 x); - --- an empty window definition is valid as well -select groupArray(number) over (rows unbounded preceding) from numbers(3); -select groupArray(number) over () from numbers(3); - --- This one tests we properly process the window function arguments. --- Seen errors like 'column `1` not found' from count(1). -select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); - --- Should work in DISTINCT -select distinct sum(0) over (rows unbounded preceding) from numbers(2); -select distinct any(number) over (rows unbounded preceding) from numbers(2); - --- Various kinds of aliases are properly substituted into various parts of window --- function definition. -with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); - --- WINDOW clause -select 1 window w1 as (); - -select sum(number) over w1, sum(number) over w2 -from numbers(10) -window - w1 as (rows unbounded preceding), - w2 as (partition by intDiv(number, 3) rows unbounded preceding) -; - --- FIXME both functions should use the same window, but they don't. Add an --- EXPLAIN test for this. -select - sum(number) over w1, - sum(number) over (partition by intDiv(number, 3) rows unbounded preceding) -from numbers(10) -window - w1 as (partition by intDiv(number, 3) rows unbounded preceding) -; - --- RANGE frame --- It's the default -select sum(number) over () from numbers(3); - --- Try some mutually prime sizes of partition, group and block, for the number --- of rows that is their least common multiple + 1, so that we see all the --- interesting corner cases. -select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 5 -; - -select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 2 -; - -select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 3 -; - -select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 2 -; - -select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 3 -; - -select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 5 -; - --- A case where the partition end is in the current block, and the frame end --- is triggered by the partition end. -select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); - --- UNBOUNDED FOLLOWING frame end -select - min(number) over wa, min(number) over wo, - max(number) over wa, max(number) over wo -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(31)) -window - wa as (partition by p order by o - range between unbounded preceding and unbounded following), - wo as (partition by p order by o - rows between unbounded preceding and unbounded following) -settings max_block_size = 2; - --- ROWS offset frame start -select number, p, - count(*) over (partition by p order by number - rows between 1 preceding and unbounded following), - count(*) over (partition by p order by number - rows between current row and unbounded following), - count(*) over (partition by p order by number - rows between 1 following and unbounded following) -from (select number, intDiv(number, 5) p from numbers(31)) -order by p, number -settings max_block_size = 2; - --- ROWS offset frame start and end -select number, p, - count(*) over (partition by p order by number - rows between 2 preceding and 2 following) -from (select number, intDiv(number, 7) p from numbers(71)) -order by p, number -settings max_block_size = 2; - -SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); - --- frame boundaries that runs into the partition end -select - count() over (partition by intDiv(number, 3) - rows between 100 following and unbounded following), - count() over (partition by intDiv(number, 3) - rows between current row and 100 following) -from numbers(10); - --- seen a use-after-free under MSan in this query once -SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; - --- a corner case -select count() over (); - --- RANGE CURRENT ROW frame start -select number, p, o, - count(*) over (partition by p order by o - range between current row and unbounded following) -from (select number, intDiv(number, 5) p, mod(number, 3) o - from numbers(31)) -order by p, o, number -settings max_block_size = 2; - -select - count(*) over (rows between current row and current row), - count(*) over (range between current row and current row) -from numbers(3); - --- RANGE OFFSET --- a basic RANGE OFFSET frame -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) -window w as (order by x asc range between 1 preceding and 2 following) -order by x; - --- overflow conditions -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toUInt8(if(mod(number, 2), - toInt64(255 - intDiv(number, 2)), - toInt64(intDiv(number, 2)))) x - from numbers(10) -) -window w as (order by x range between 1 preceding and 2 following) -order by x; - -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toInt8(multiIf( - mod(number, 3) == 0, toInt64(intDiv(number, 3)), - mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), - toInt64(-128 + intDiv(number, 3)))) x - from numbers(15) -) -window w as (order by x range between 1 preceding and 2 following) -order by x; - --- We need large offsets to trigger overflow to positive direction, or --- else the frame end runs into partition end w/o overflow and doesn't move --- after that. The frame from this query is equivalent to the entire partition. -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toUInt8(if(mod(number, 2), - toInt64(255 - intDiv(number, 2)), - toInt64(intDiv(number, 2)))) x - from numbers(10) -) -window w as (order by x range between 255 preceding and 255 following) -order by x; - --- RANGE OFFSET ORDER BY DESC -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between 1 preceding and 2 following) -order by x -settings max_block_size = 1; - -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between 1 preceding and unbounded following) -order by x -settings max_block_size = 2; - -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between unbounded preceding and 2 following) -order by x -settings max_block_size = 3; - -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between unbounded preceding and 2 preceding) -order by x -settings max_block_size = 4; - - --- Check that we put windows in such an order that we can reuse the sort. --- First, check that at least the result is correct when we have many windows --- with different sort order. -select - number, - count(*) over (partition by p order by number), - count(*) over (partition by p order by number, o), - count(*) over (), - count(*) over (order by number), - count(*) over (order by o), - count(*) over (order by o, number), - count(*) over (order by number, o), - count(*) over (partition by p order by o, number), - count(*) over (partition by p), - count(*) over (partition by p order by o), - count(*) over (partition by p, o order by number) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -order by number -; - --- The EXPLAIN for the above query would be difficult to understand, so check some --- simple cases instead. -explain select - count(*) over (partition by p), - count(*) over (), - count(*) over (partition by p order by o) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -; - -explain select - count(*) over (order by o, number), - count(*) over (order by number) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -; - --- A test case for the sort comparator found by fuzzer. -SELECT - max(number) OVER (ORDER BY number DESC NULLS FIRST), - max(number) OVER (ORDER BY number ASC NULLS FIRST) -FROM numbers(2) -; - --- optimize_read_in_order conflicts with sorting for window functions, check that --- it is disabled. -drop table if exists window_mt; -create table window_mt engine MergeTree order by number - as select number, mod(number, 3) p from numbers(100); - -select number, count(*) over (partition by p) - from window_mt order by number limit 10 settings optimize_read_in_order = 0; - -select number, count(*) over (partition by p) - from window_mt order by number limit 10 settings optimize_read_in_order = 1; - -drop table window_mt; - --- some true window functions -- rank and friends -select number, p, o, - count(*) over w, - rank() over w, - dense_rank() over w, - row_number() over w -from (select number, intDiv(number, 5) p, mod(number, 3) o - from numbers(31) order by o, number) t -window w as (partition by p order by o) -order by p, o, number -settings max_block_size = 2; - --- our replacement for lag/lead -select - anyOrNull(number) - over (order by number rows between 1 preceding and 1 preceding), - anyOrNull(number) - over (order by number rows between 1 following and 1 following) -from numbers(5); - --- variants of lag/lead that respect the frame -select number, p, pp, - lagInFrame(number) over w as lag1, - lagInFrame(number, number - pp) over w as lag2, - lagInFrame(number, number - pp, number * 11) over w as lag, - leadInFrame(number, number - pp, number * 11) over w as lead -from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16)) -window w as (partition by p order by number - rows between unbounded preceding and unbounded following) -order by number -settings max_block_size = 3; -; - --- careful with auto-application of Null combinator -select lagInFrame(toNullable(1)) over (); -select lagInFrameOrNull(1) over (); -- { serverError 36 } --- this is the same as `select max(Null::Nullable(Nothing))` -select intDiv(1, NULL) x, toTypeName(x), max(x) over (); --- to make lagInFrame return null for out-of-frame rows, cast the argument to --- Nullable; otherwise, it returns default values. -SELECT - number, - lagInFrame(toNullable(number), 1) OVER w, - lagInFrame(toNullable(number), 2) OVER w, - lagInFrame(number, 1) OVER w, - lagInFrame(number, 2) OVER w -FROM numbers(4) -WINDOW w AS (ORDER BY number ASC) -; - --- case-insensitive SQL-standard synonyms for any and anyLast -select - number, - fIrSt_VaLue(number) over w, - lAsT_vAlUe(number) over w -from numbers(10) -window w as (order by number range between 1 preceding and 1 following) -order by number -; - --- lagInFrame UBsan -SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT lagInFrame(1, 0) OVER (); -SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); -SELECT lagInFrame(1, 1) OVER (); - --- leadInFrame UBsan -SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT leadInFrame(1, 0) OVER (); -SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); -SELECT leadInFrame(1, 1) OVER (); - --- In this case, we had a problem with PartialSortingTransform returning zero-row --- chunks for input chunks w/o columns. -select count() over () from numbers(4) where number < 2; - --- floating point RANGE frame -select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) -from numbers(7) -; - --- negative offsets should not be allowed -select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } - --- a test with aggregate function that allocates memory in arena -select sum(a[length(a)]) -from ( - select groupArray(number) over (partition by modulo(number, 11) - order by modulo(number, 1111), number) a - from numbers_mt(10000) -) settings max_block_size = 7; - --- -INT_MIN row offset that can lead to problems with negation, found when fuzzing --- under UBSan. Should be limited to at most INT_MAX. -select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } - --- Somehow in this case WindowTransform gets empty input chunks not marked as --- input end, and then two (!) empty input chunks marked as input end. Whatever. -select count() over () from (select 1 a) l inner join (select 2 a) r using a; --- This case works as expected, one empty input chunk marked as input end. -select count() over () where null; - --- Inheriting another window. -select number, count() over (w1 rows unbounded preceding) from numbers(10) -window - w0 as (partition by intDiv(number, 5) as p), - w1 as (w0 order by mod(number, 3) as o) -order by p, o, number -; - --- can't redefine PARTITION BY -select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 } - --- can't redefine existing ORDER BY -select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 } - --- parent window can't have frame -select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 } - --- looks weird but probably should work -- this is a window that inherits and changes nothing -select count() over (w) from numbers(1) window w as (); - --- nonexistent parent window -select count() over (w2 rows unbounded preceding); -- { serverError 36 } From 8fed0de11c92edfff32a7e1d9da8a64031f6b39e Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 26 Sep 2021 20:26:16 +0300 Subject: [PATCH 083/472] Delete 01591_window_functions.reference --- .../01591_window_functions.reference | 1183 ----------------- 1 file changed, 1183 deletions(-) delete mode 100644 tests/queries/0_stateless/01591_window_functions.reference diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference deleted file mode 100644 index f54c10ee8b9e..000000000000 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ /dev/null @@ -1,1183 +0,0 @@ --- { echo } - --- just something basic -select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); -0 1 -1 2 -2 3 -3 1 -4 2 -5 3 -6 1 -7 2 -8 3 -9 1 --- proper calculation across blocks -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; -2 2 -1 2 -0 2 -5 5 -4 5 -3 5 -8 8 -7 8 -6 8 -9 9 --- not a window function -select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } --- no partition by -select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); -0 0 -1 0.5 -2 1 -3 1.5 -4 2 -5 2.5 -6 3 -7 3.5 -8 4 -9 4.5 --- no order by -select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10); -0 0 -1 1 -2 1 -3 3 -4 4 -5 4 -6 6 -7 7 -8 7 -9 9 --- can add an alias after window spec -select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -0 0 -1 1 -2 1 -3 3 -4 4 -5 4 -6 6 -7 7 -8 7 -9 9 --- can't reference it yet -- the window functions are calculated at the --- last stage of select, after all other functions. -select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } --- must work in WHERE if you wrap it in a subquery -select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; -1 -2 -3 --- should work in ORDER BY -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; -9 9 -6 8 -7 8 -8 8 -3 5 -4 5 -5 5 -0 2 -1 2 -2 2 --- also works in ORDER BY if you wrap it in a subquery -select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; -1 -2 -3 --- Example with window function only in ORDER BY. Here we make a rank of all --- numbers sorted descending, and then sort by this rank descending, and must get --- the ascending order. -select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; -0 -1 -2 -3 -4 --- Aggregate functions as window function arguments. This query is semantically --- the same as the above one, only we replace `number` with --- `any(number) group by number` and so on. -select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; -0 -1 -2 -3 -4 --- some more simple cases w/aggregate functions -select sum(any(number)) over (rows unbounded preceding) from numbers(1); -0 -select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); -1 -select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); -1 --- different windows --- an explain test would also be helpful, but it's too immature now and I don't --- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; -0 2 1 -1 2 2 -2 2 3 -3 5 4 -4 5 5 -5 5 1 -6 8 2 -7 8 3 -8 8 4 -9 11 5 -10 11 1 -11 11 2 -12 14 3 -13 14 4 -14 14 5 -15 17 1 -16 17 2 -17 17 3 -18 20 4 -19 20 5 -20 20 1 -21 23 2 -22 23 3 -23 23 4 -24 26 5 -25 26 1 -26 26 2 -27 29 3 -28 29 4 -29 29 5 -30 30 1 --- two functions over the same window --- an explain test would also be helpful, but it's too immature now and I don't --- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; -0 2 3 -1 2 2 -2 2 1 -3 5 3 -4 5 2 -5 5 1 -6 6 1 --- check that we can work with constant columns -select median(x) over (partition by x) from (select 1 x); -1 --- an empty window definition is valid as well -select groupArray(number) over (rows unbounded preceding) from numbers(3); -[0] -[0,1] -[0,1,2] -select groupArray(number) over () from numbers(3); -[0,1,2] -[0,1,2] -[0,1,2] --- This one tests we properly process the window function arguments. --- Seen errors like 'column `1` not found' from count(1). -select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); -1 3 --- Should work in DISTINCT -select distinct sum(0) over (rows unbounded preceding) from numbers(2); -0 -select distinct any(number) over (rows unbounded preceding) from numbers(2); -0 --- Various kinds of aliases are properly substituted into various parts of window --- function definition. -with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); -0 1 -0 3 -0 6 -1 5 -1 11 -1 18 -2 9 --- WINDOW clause -select 1 window w1 as (); -1 -select sum(number) over w1, sum(number) over w2 -from numbers(10) -window - w1 as (rows unbounded preceding), - w2 as (partition by intDiv(number, 3) rows unbounded preceding) -; -0 0 -1 1 -3 3 -6 3 -10 7 -15 12 -21 6 -28 13 -36 21 -45 9 --- FIXME both functions should use the same window, but they don't. Add an --- EXPLAIN test for this. -select - sum(number) over w1, - sum(number) over (partition by intDiv(number, 3) rows unbounded preceding) -from numbers(10) -window - w1 as (partition by intDiv(number, 3) rows unbounded preceding) -; -0 0 -1 1 -3 3 -3 3 -7 7 -12 12 -6 6 -13 13 -21 21 -9 9 --- RANGE frame --- It's the default -select sum(number) over () from numbers(3); -3 -3 -3 --- Try some mutually prime sizes of partition, group and block, for the number --- of rows that is their least common multiple + 1, so that we see all the --- interesting corner cases. -select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 5 -; -0 0 0 2 -1 0 1 3 -2 0 0 2 -3 1 1 3 -4 1 0 1 -5 1 1 3 -6 2 0 2 -7 2 1 3 -8 2 0 2 -9 3 1 3 -10 3 0 1 -11 3 1 3 -12 4 0 2 -13 4 1 3 -14 4 0 2 -15 5 1 3 -16 5 0 1 -17 5 1 3 -18 6 0 2 -19 6 1 3 -20 6 0 2 -21 7 1 3 -22 7 0 1 -23 7 1 3 -24 8 0 2 -25 8 1 3 -26 8 0 2 -27 9 1 3 -28 9 0 1 -29 9 1 3 -30 10 0 1 -select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 2 -; -0 0 0 2 -1 0 1 4 -2 0 2 5 -3 0 0 2 -4 0 1 4 -5 1 2 5 -6 1 0 2 -7 1 1 3 -8 1 2 5 -9 1 0 2 -10 2 1 3 -11 2 2 5 -12 2 0 1 -13 2 1 3 -14 2 2 5 -15 3 0 2 -16 3 1 4 -17 3 2 5 -18 3 0 2 -19 3 1 4 -20 4 2 5 -21 4 0 2 -22 4 1 3 -23 4 2 5 -24 4 0 2 -25 5 1 3 -26 5 2 5 -27 5 0 1 -28 5 1 3 -29 5 2 5 -30 6 0 1 -select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 3 -; -0 0 0 3 -1 0 1 5 -2 0 0 3 -3 0 1 5 -4 0 0 3 -5 1 1 5 -6 1 0 2 -7 1 1 5 -8 1 0 2 -9 1 1 5 -10 2 0 3 -11 2 1 5 -12 2 0 3 -13 2 1 5 -14 2 0 3 -15 3 1 5 -16 3 0 2 -17 3 1 5 -18 3 0 2 -19 3 1 5 -20 4 0 3 -21 4 1 5 -22 4 0 3 -23 4 1 5 -24 4 0 3 -25 5 1 5 -26 5 0 2 -27 5 1 5 -28 5 0 2 -29 5 1 5 -30 6 0 1 -select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 2 -; -0 0 0 1 -1 0 1 2 -2 0 2 3 -3 1 3 2 -4 1 4 3 -5 1 0 1 -6 2 1 1 -7 2 2 2 -8 2 3 3 -9 3 4 3 -10 3 0 1 -11 3 1 2 -12 4 2 1 -13 4 3 2 -14 4 4 3 -15 5 0 1 -16 5 1 2 -17 5 2 3 -18 6 3 2 -19 6 4 3 -20 6 0 1 -21 7 1 1 -22 7 2 2 -23 7 3 3 -24 8 4 3 -25 8 0 1 -26 8 1 2 -27 9 2 1 -28 9 3 2 -29 9 4 3 -30 10 0 1 -select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 3 -; -0 0 0 1 -1 0 1 2 -2 1 2 1 -3 1 3 2 -4 2 4 2 -5 2 0 1 -6 3 1 1 -7 3 2 2 -8 4 3 1 -9 4 4 2 -10 5 0 1 -11 5 1 2 -12 6 2 1 -13 6 3 2 -14 7 4 2 -15 7 0 1 -16 8 1 1 -17 8 2 2 -18 9 3 1 -19 9 4 2 -20 10 0 1 -21 10 1 2 -22 11 2 1 -23 11 3 2 -24 12 4 2 -25 12 0 1 -26 13 1 1 -27 13 2 2 -28 14 3 1 -29 14 4 2 -30 15 0 1 -select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 5 -; -0 0 0 1 -1 0 1 2 -2 1 2 2 -3 1 0 1 -4 2 1 1 -5 2 2 2 -6 3 0 1 -7 3 1 2 -8 4 2 2 -9 4 0 1 -10 5 1 1 -11 5 2 2 -12 6 0 1 -13 6 1 2 -14 7 2 2 -15 7 0 1 -16 8 1 1 -17 8 2 2 -18 9 0 1 -19 9 1 2 -20 10 2 2 -21 10 0 1 -22 11 1 1 -23 11 2 2 -24 12 0 1 -25 12 1 2 -26 13 2 2 -27 13 0 1 -28 14 1 1 -29 14 2 2 -30 15 0 1 --- A case where the partition end is in the current block, and the frame end --- is triggered by the partition end. -select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); -0 -0 -0 -3 -3 -3 -6 -6 -6 -9 --- UNBOUNDED FOLLOWING frame end -select - min(number) over wa, min(number) over wo, - max(number) over wa, max(number) over wo -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(31)) -window - wa as (partition by p order by o - range between unbounded preceding and unbounded following), - wo as (partition by p order by o - rows between unbounded preceding and unbounded following) -settings max_block_size = 2; -0 0 2 2 -0 0 2 2 -0 0 2 2 -3 3 5 5 -3 3 5 5 -3 3 5 5 -6 6 8 8 -6 6 8 8 -6 6 8 8 -9 9 11 11 -9 9 11 11 -9 9 11 11 -12 12 14 14 -12 12 14 14 -12 12 14 14 -15 15 17 17 -15 15 17 17 -15 15 17 17 -18 18 20 20 -18 18 20 20 -18 18 20 20 -21 21 23 23 -21 21 23 23 -21 21 23 23 -24 24 26 26 -24 24 26 26 -24 24 26 26 -27 27 29 29 -27 27 29 29 -27 27 29 29 -30 30 30 30 --- ROWS offset frame start -select number, p, - count(*) over (partition by p order by number - rows between 1 preceding and unbounded following), - count(*) over (partition by p order by number - rows between current row and unbounded following), - count(*) over (partition by p order by number - rows between 1 following and unbounded following) -from (select number, intDiv(number, 5) p from numbers(31)) -order by p, number -settings max_block_size = 2; -0 0 5 5 4 -1 0 5 4 3 -2 0 4 3 2 -3 0 3 2 1 -4 0 2 1 0 -5 1 5 5 4 -6 1 5 4 3 -7 1 4 3 2 -8 1 3 2 1 -9 1 2 1 0 -10 2 5 5 4 -11 2 5 4 3 -12 2 4 3 2 -13 2 3 2 1 -14 2 2 1 0 -15 3 5 5 4 -16 3 5 4 3 -17 3 4 3 2 -18 3 3 2 1 -19 3 2 1 0 -20 4 5 5 4 -21 4 5 4 3 -22 4 4 3 2 -23 4 3 2 1 -24 4 2 1 0 -25 5 5 5 4 -26 5 5 4 3 -27 5 4 3 2 -28 5 3 2 1 -29 5 2 1 0 -30 6 1 1 0 --- ROWS offset frame start and end -select number, p, - count(*) over (partition by p order by number - rows between 2 preceding and 2 following) -from (select number, intDiv(number, 7) p from numbers(71)) -order by p, number -settings max_block_size = 2; -0 0 3 -1 0 4 -2 0 5 -3 0 5 -4 0 5 -5 0 4 -6 0 3 -7 1 3 -8 1 4 -9 1 5 -10 1 5 -11 1 5 -12 1 4 -13 1 3 -14 2 3 -15 2 4 -16 2 5 -17 2 5 -18 2 5 -19 2 4 -20 2 3 -21 3 3 -22 3 4 -23 3 5 -24 3 5 -25 3 5 -26 3 4 -27 3 3 -28 4 3 -29 4 4 -30 4 5 -31 4 5 -32 4 5 -33 4 4 -34 4 3 -35 5 3 -36 5 4 -37 5 5 -38 5 5 -39 5 5 -40 5 4 -41 5 3 -42 6 3 -43 6 4 -44 6 5 -45 6 5 -46 6 5 -47 6 4 -48 6 3 -49 7 3 -50 7 4 -51 7 5 -52 7 5 -53 7 5 -54 7 4 -55 7 3 -56 8 3 -57 8 4 -58 8 5 -59 8 5 -60 8 5 -61 8 4 -62 8 3 -63 9 3 -64 9 4 -65 9 5 -66 9 5 -67 9 5 -68 9 4 -69 9 3 -70 10 1 -SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); -1 -2 -3 -3 --- frame boundaries that runs into the partition end -select - count() over (partition by intDiv(number, 3) - rows between 100 following and unbounded following), - count() over (partition by intDiv(number, 3) - rows between current row and 100 following) -from numbers(10); -0 3 -0 2 -0 1 -0 3 -0 2 -0 1 -0 3 -0 2 -0 1 -0 1 --- seen a use-after-free under MSan in this query once -SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; --- a corner case -select count() over (); -1 --- RANGE CURRENT ROW frame start -select number, p, o, - count(*) over (partition by p order by o - range between current row and unbounded following) -from (select number, intDiv(number, 5) p, mod(number, 3) o - from numbers(31)) -order by p, o, number -settings max_block_size = 2; -0 0 0 5 -3 0 0 5 -1 0 1 3 -4 0 1 3 -2 0 2 1 -6 1 0 5 -9 1 0 5 -7 1 1 3 -5 1 2 2 -8 1 2 2 -12 2 0 5 -10 2 1 4 -13 2 1 4 -11 2 2 2 -14 2 2 2 -15 3 0 5 -18 3 0 5 -16 3 1 3 -19 3 1 3 -17 3 2 1 -21 4 0 5 -24 4 0 5 -22 4 1 3 -20 4 2 2 -23 4 2 2 -27 5 0 5 -25 5 1 4 -28 5 1 4 -26 5 2 2 -29 5 2 2 -30 6 0 1 -select - count(*) over (rows between current row and current row), - count(*) over (range between current row and current row) -from numbers(3); -1 3 -1 3 -1 3 --- RANGE OFFSET --- a basic RANGE OFFSET frame -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) -window w as (order by x asc range between 1 preceding and 2 following) -order by x; -0 0 2 3 -1 0 3 4 -2 1 4 4 -3 2 5 4 -4 3 6 4 -5 4 7 4 -6 5 8 4 -7 6 9 4 -8 7 10 4 -9 8 10 3 -10 9 10 2 --- overflow conditions -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toUInt8(if(mod(number, 2), - toInt64(255 - intDiv(number, 2)), - toInt64(intDiv(number, 2)))) x - from numbers(10) -) -window w as (order by x range between 1 preceding and 2 following) -order by x; -0 0 2 3 -1 0 3 4 -2 1 4 4 -3 2 4 3 -4 3 4 2 -251 251 253 3 -252 251 254 4 -253 252 255 4 -254 253 255 3 -255 254 255 2 -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toInt8(multiIf( - mod(number, 3) == 0, toInt64(intDiv(number, 3)), - mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), - toInt64(-128 + intDiv(number, 3)))) x - from numbers(15) -) -window w as (order by x range between 1 preceding and 2 following) -order by x; --128 -128 -126 3 --127 -128 -125 4 --126 -127 -124 4 --125 -126 -124 3 --124 -125 -124 2 -0 0 2 3 -1 0 3 4 -2 1 4 4 -3 2 4 3 -4 3 4 2 -123 123 125 3 -124 123 126 4 -125 124 127 4 -126 125 127 3 -127 126 127 2 --- We need large offsets to trigger overflow to positive direction, or --- else the frame end runs into partition end w/o overflow and doesn't move --- after that. The frame from this query is equivalent to the entire partition. -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toUInt8(if(mod(number, 2), - toInt64(255 - intDiv(number, 2)), - toInt64(intDiv(number, 2)))) x - from numbers(10) -) -window w as (order by x range between 255 preceding and 255 following) -order by x; -0 0 255 10 -1 0 255 10 -2 0 255 10 -3 0 255 10 -4 0 255 10 -251 0 255 10 -252 0 255 10 -253 0 255 10 -254 0 255 10 -255 0 255 10 --- RANGE OFFSET ORDER BY DESC -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between 1 preceding and 2 following) -order by x -settings max_block_size = 1; -0 0 1 2 -1 0 2 3 -2 0 3 4 -3 1 4 4 -4 2 5 4 -5 3 6 4 -6 4 7 4 -7 5 8 4 -8 6 9 4 -9 7 10 4 -10 8 10 3 -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between 1 preceding and unbounded following) -order by x -settings max_block_size = 2; -0 0 1 2 -1 0 2 3 -2 0 3 4 -3 0 4 5 -4 0 5 6 -5 0 6 7 -6 0 7 8 -7 0 8 9 -8 0 9 10 -9 0 10 11 -10 0 10 11 -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between unbounded preceding and 2 following) -order by x -settings max_block_size = 3; -0 0 10 11 -1 0 10 11 -2 0 10 11 -3 1 10 10 -4 2 10 9 -5 3 10 8 -6 4 10 7 -7 5 10 6 -8 6 10 5 -9 7 10 4 -10 8 10 3 -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between unbounded preceding and 2 preceding) -order by x -settings max_block_size = 4; -0 2 10 9 -1 3 10 8 -2 4 10 7 -3 5 10 6 -4 6 10 5 -5 7 10 4 -6 8 10 3 -7 9 10 2 -8 10 10 1 -9 0 0 0 -10 0 0 0 --- Check that we put windows in such an order that we can reuse the sort. --- First, check that at least the result is correct when we have many windows --- with different sort order. -select - number, - count(*) over (partition by p order by number), - count(*) over (partition by p order by number, o), - count(*) over (), - count(*) over (order by number), - count(*) over (order by o), - count(*) over (order by o, number), - count(*) over (order by number, o), - count(*) over (partition by p order by o, number), - count(*) over (partition by p), - count(*) over (partition by p order by o), - count(*) over (partition by p, o order by number) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -order by number -; -0 1 1 16 1 4 1 1 1 3 1 1 -1 2 2 16 2 7 5 2 2 3 2 1 -2 3 3 16 3 10 8 3 3 3 3 1 -3 1 1 16 4 13 11 4 2 3 2 1 -4 2 2 16 5 16 14 5 3 3 3 1 -5 3 3 16 6 4 2 6 1 3 1 1 -6 1 1 16 7 7 6 7 1 3 1 1 -7 2 2 16 8 10 9 8 2 3 2 1 -8 3 3 16 9 13 12 9 3 3 3 1 -9 1 1 16 10 16 15 10 3 3 3 1 -10 2 2 16 11 4 3 11 1 3 1 1 -11 3 3 16 12 7 7 12 2 3 2 1 -12 1 1 16 13 10 10 13 1 3 1 1 -13 2 2 16 14 13 13 14 2 3 2 1 -14 3 3 16 15 16 16 15 3 3 3 1 -15 1 1 16 16 4 4 16 1 1 1 1 --- The EXPLAIN for the above query would be difficult to understand, so check some --- simple cases instead. -explain select - count(*) over (partition by p), - count(*) over (), - count(*) over (partition by p order by o) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -; -Expression ((Projection + Before ORDER BY)) - Window (Window step for window \'\') - Window (Window step for window \'PARTITION BY p\') - Window (Window step for window \'PARTITION BY p ORDER BY o ASC\') - MergingSorted (Merge sorted streams for window \'PARTITION BY p ORDER BY o ASC\') - MergeSorting (Merge sorted blocks for window \'PARTITION BY p ORDER BY o ASC\') - PartialSorting (Sort each block for window \'PARTITION BY p ORDER BY o ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) -explain select - count(*) over (order by o, number), - count(*) over (order by number) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -; -Expression ((Projection + Before ORDER BY)) - Window (Window step for window \'ORDER BY o ASC, number ASC\') - MergingSorted (Merge sorted streams for window \'ORDER BY o ASC, number ASC\') - MergeSorting (Merge sorted blocks for window \'ORDER BY o ASC, number ASC\') - PartialSorting (Sort each block for window \'ORDER BY o ASC, number ASC\') - Window (Window step for window \'ORDER BY number ASC\') - MergingSorted (Merge sorted streams for window \'ORDER BY number ASC\') - MergeSorting (Merge sorted blocks for window \'ORDER BY number ASC\') - PartialSorting (Sort each block for window \'ORDER BY number ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) --- A test case for the sort comparator found by fuzzer. -SELECT - max(number) OVER (ORDER BY number DESC NULLS FIRST), - max(number) OVER (ORDER BY number ASC NULLS FIRST) -FROM numbers(2) -; -1 0 -1 1 --- optimize_read_in_order conflicts with sorting for window functions, check that --- it is disabled. -drop table if exists window_mt; -create table window_mt engine MergeTree order by number - as select number, mod(number, 3) p from numbers(100); -select number, count(*) over (partition by p) - from window_mt order by number limit 10 settings optimize_read_in_order = 0; -0 34 -1 33 -2 33 -3 34 -4 33 -5 33 -6 34 -7 33 -8 33 -9 34 -select number, count(*) over (partition by p) - from window_mt order by number limit 10 settings optimize_read_in_order = 1; -0 34 -1 33 -2 33 -3 34 -4 33 -5 33 -6 34 -7 33 -8 33 -9 34 -drop table window_mt; --- some true window functions -- rank and friends -select number, p, o, - count(*) over w, - rank() over w, - dense_rank() over w, - row_number() over w -from (select number, intDiv(number, 5) p, mod(number, 3) o - from numbers(31) order by o, number) t -window w as (partition by p order by o) -order by p, o, number -settings max_block_size = 2; -0 0 0 2 1 1 1 -3 0 0 2 1 1 2 -1 0 1 4 3 2 3 -4 0 1 4 3 2 4 -2 0 2 5 5 3 5 -6 1 0 2 1 1 1 -9 1 0 2 1 1 2 -7 1 1 3 3 2 3 -5 1 2 5 4 3 4 -8 1 2 5 4 3 5 -12 2 0 1 1 1 1 -10 2 1 3 2 2 2 -13 2 1 3 2 2 3 -11 2 2 5 4 3 4 -14 2 2 5 4 3 5 -15 3 0 2 1 1 2 -18 3 0 2 1 1 1 -16 3 1 4 3 2 3 -19 3 1 4 3 2 4 -17 3 2 5 5 3 5 -21 4 0 2 1 1 1 -24 4 0 2 1 1 2 -22 4 1 3 3 2 3 -20 4 2 5 4 3 5 -23 4 2 5 4 3 4 -27 5 0 1 1 1 1 -25 5 1 3 2 2 2 -28 5 1 3 2 2 3 -26 5 2 5 4 3 4 -29 5 2 5 4 3 5 -30 6 0 1 1 1 1 --- our replacement for lag/lead -select - anyOrNull(number) - over (order by number rows between 1 preceding and 1 preceding), - anyOrNull(number) - over (order by number rows between 1 following and 1 following) -from numbers(5); -\N 1 -0 2 -1 3 -2 4 -3 \N --- variants of lag/lead that respect the frame -select number, p, pp, - lagInFrame(number) over w as lag1, - lagInFrame(number, number - pp) over w as lag2, - lagInFrame(number, number - pp, number * 11) over w as lag, - leadInFrame(number, number - pp, number * 11) over w as lead -from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16)) -window w as (partition by p order by number - rows between unbounded preceding and unbounded following) -order by number -settings max_block_size = 3; -; -0 0 0 0 0 0 0 -1 0 0 0 0 0 2 -2 0 0 1 0 0 4 -3 0 0 2 0 0 33 -4 0 0 3 0 0 44 -5 1 5 0 5 5 5 -6 1 5 5 5 5 7 -7 1 5 6 5 5 9 -8 1 5 7 5 5 88 -9 1 5 8 5 5 99 -10 2 10 0 10 10 10 -11 2 10 10 10 10 12 -12 2 10 11 10 10 14 -13 2 10 12 10 10 143 -14 2 10 13 10 10 154 -15 3 15 0 15 15 15 --- careful with auto-application of Null combinator -select lagInFrame(toNullable(1)) over (); -\N -select lagInFrameOrNull(1) over (); -- { serverError 36 } --- this is the same as `select max(Null::Nullable(Nothing))` -select intDiv(1, NULL) x, toTypeName(x), max(x) over (); -\N Nullable(Nothing) \N --- to make lagInFrame return null for out-of-frame rows, cast the argument to --- Nullable; otherwise, it returns default values. -SELECT - number, - lagInFrame(toNullable(number), 1) OVER w, - lagInFrame(toNullable(number), 2) OVER w, - lagInFrame(number, 1) OVER w, - lagInFrame(number, 2) OVER w -FROM numbers(4) -WINDOW w AS (ORDER BY number ASC) -; -0 \N \N 0 0 -1 0 \N 0 0 -2 1 0 1 0 -3 2 1 2 1 --- case-insensitive SQL-standard synonyms for any and anyLast -select - number, - fIrSt_VaLue(number) over w, - lAsT_vAlUe(number) over w -from numbers(10) -window w as (order by number range between 1 preceding and 1 following) -order by number -; -0 0 1 -1 0 2 -2 1 3 -3 2 4 -4 3 5 -5 4 6 -6 5 7 -7 6 8 -8 7 9 -9 8 9 --- lagInFrame UBsan -SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT lagInFrame(1, 0) OVER (); -1 -SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); -0 -SELECT lagInFrame(1, 1) OVER (); -0 --- leadInFrame UBsan -SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT leadInFrame(1, 0) OVER (); -1 -SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); -0 -SELECT leadInFrame(1, 1) OVER (); -0 --- In this case, we had a problem with PartialSortingTransform returning zero-row --- chunks for input chunks w/o columns. -select count() over () from numbers(4) where number < 2; -2 -2 --- floating point RANGE frame -select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) -from numbers(7) -; -1 1 6 6 -2 2 6 6 -3 3 5 5 -4 4 4 4 -5 5 3 3 -6 6 2 2 -6 6 1 1 --- negative offsets should not be allowed -select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } --- a test with aggregate function that allocates memory in arena -select sum(a[length(a)]) -from ( - select groupArray(number) over (partition by modulo(number, 11) - order by modulo(number, 1111), number) a - from numbers_mt(10000) -) settings max_block_size = 7; -49995000 --- -INT_MIN row offset that can lead to problems with negation, found when fuzzing --- under UBSan. Should be limited to at most INT_MAX. -select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } --- Somehow in this case WindowTransform gets empty input chunks not marked as --- input end, and then two (!) empty input chunks marked as input end. Whatever. -select count() over () from (select 1 a) l inner join (select 2 a) r using a; --- This case works as expected, one empty input chunk marked as input end. -select count() over () where null; --- Inheriting another window. -select number, count() over (w1 rows unbounded preceding) from numbers(10) -window - w0 as (partition by intDiv(number, 5) as p), - w1 as (w0 order by mod(number, 3) as o) -order by p, o, number -; -0 1 -3 2 -1 3 -4 4 -2 5 -6 1 -9 2 -7 3 -5 4 -8 5 --- can't redefine PARTITION BY -select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 } --- can't redefine existing ORDER BY -select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 } --- parent window can't have frame -select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 } --- looks weird but probably should work -- this is a window that inherits and changes nothing -select count() over (w) from numbers(1) window w as (); -1 --- nonexistent parent window -select count() over (w2 rows unbounded preceding); -- { serverError 36 } From d4fd7aaef94c8dbaf283bd3f58ff64bf8f969f11 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 27 Sep 2021 07:01:38 +0300 Subject: [PATCH 084/472] Backport #29229 to 21.9: Send UNKNOWN_DATABASE to the client (via TCP) --- src/Server/TCPHandler.cpp | 29 +++++++++---------- .../02030_client_unknown_database.reference | 0 .../02030_client_unknown_database.sh | 9 ++++++ 3 files changed, 23 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/02030_client_unknown_database.reference create mode 100755 tests/queries/0_stateless/02030_client_unknown_database.sh diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index b2db65e22bcf..812c081a6462 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -136,6 +136,20 @@ void TCPHandler::runImpl() try { receiveHello(); + sendHello(); + + if (!is_interserver_mode) /// In interserver mode queries are executed without a session context. + { + session->makeSessionContext(); + + /// If session created, then settings in session context has been updated. + /// So it's better to update the connection settings for flexibility. + extractConnectionSettingsFromContext(session->sessionContext()); + + /// When connecting, the default database could be specified. + if (!default_database.empty()) + session->sessionContext()->setCurrentDatabase(default_database); + } } catch (const Exception & e) /// Typical for an incorrect username, password, or address. { @@ -161,21 +175,6 @@ void TCPHandler::runImpl() throw; } - sendHello(); - - if (!is_interserver_mode) /// In interserver mode queries are executed without a session context. - { - session->makeSessionContext(); - - /// If session created, then settings in session context has been updated. - /// So it's better to update the connection settings for flexibility. - extractConnectionSettingsFromContext(session->sessionContext()); - - /// When connecting, the default database could be specified. - if (!default_database.empty()) - session->sessionContext()->setCurrentDatabase(default_database); - } - while (true) { /// We are waiting for a packet from the client. Thus, every `poll_interval` seconds check whether we need to shut down. diff --git a/tests/queries/0_stateless/02030_client_unknown_database.reference b/tests/queries/0_stateless/02030_client_unknown_database.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02030_client_unknown_database.sh b/tests/queries/0_stateless/02030_client_unknown_database.sh new file mode 100755 index 000000000000..28bd4895a23f --- /dev/null +++ b/tests/queries/0_stateless/02030_client_unknown_database.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CLICKHOUSE_DATABASE=no_such_database_could_exist + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "SELECT 1" |& grep -q UNKNOWN_DATABASE From 8509140629e6a26f9e0f99270dd896a7935df973 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 27 Sep 2021 19:09:54 +0300 Subject: [PATCH 085/472] Backport #29388 to 21.9: Fix ReplicatedAccessStorage shutdown without startup --- src/Access/ReplicatedAccessStorage.cpp | 9 ++-- .../tests/gtest_replicated_access_storage.cpp | 46 +++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 src/Access/tests/gtest_replicated_access_storage.cpp diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index f91b7c8fb06e..7c71e264cd0b 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -63,9 +63,12 @@ void ReplicatedAccessStorage::shutdown() bool prev_stop_flag = stop_flag.exchange(true); if (!prev_stop_flag) { - /// Notify the worker thread to stop waiting for new queue items - refresh_queue.push(UUIDHelpers::Nil); - worker_thread.join(); + if (worker_thread.joinable()) + { + /// Notify the worker thread to stop waiting for new queue items + refresh_queue.push(UUIDHelpers::Nil); + worker_thread.join(); + } } } diff --git a/src/Access/tests/gtest_replicated_access_storage.cpp b/src/Access/tests/gtest_replicated_access_storage.cpp new file mode 100644 index 000000000000..f2052e91749d --- /dev/null +++ b/src/Access/tests/gtest_replicated_access_storage.cpp @@ -0,0 +1,46 @@ +#include +#include + +using namespace DB; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NO_ZOOKEEPER; +} +} + + +TEST(ReplicatedAccessStorage, ShutdownWithoutStartup) +{ + auto get_zk = []() + { + return std::shared_ptr(); + }; + + auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk); + storage.shutdown(); +} + + +TEST(ReplicatedAccessStorage, ShutdownWithFailedStartup) +{ + auto get_zk = []() + { + return std::shared_ptr(); + }; + + auto storage = ReplicatedAccessStorage("replicated", "/clickhouse/access", get_zk); + try + { + storage.startup(); + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::NO_ZOOKEEPER) + throw; + } + storage.shutdown(); +} + From 48502d375a6b02183324d992123eacc7d10acd1f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 27 Sep 2021 19:11:25 +0300 Subject: [PATCH 086/472] Backport #28817 to 21.9: Fix rare replicas diverge --- .../ReplicatedMergeTreePartCheckThread.cpp | 14 +++- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 61 +++++++++++++-- .../MergeTree/ReplicatedMergeTreeQueue.h | 4 +- .../ReplicatedMergeTreeRestartingThread.cpp | 3 + src/Storages/StorageReplicatedMergeTree.cpp | 42 ++++------- .../test_lost_part_during_startup/__init__.py | 1 + .../test_lost_part_during_startup/test.py | 75 +++++++++++++++++++ 7 files changed, 163 insertions(+), 37 deletions(-) create mode 100644 tests/integration/test_lost_part_during_startup/__init__.py create mode 100644 tests/integration/test_lost_part_during_startup/test.py diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 0efa83237ca4..7df7d45ff3c6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -111,6 +111,18 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP bool found_part_with_the_same_max_block = false; Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas"); + /// Move our replica to the end of replicas + for (auto it = replicas.begin(); it != replicas.end(); ++it) + { + String replica_path = storage.zookeeper_path + "/replicas/" + *it; + if (replica_path == storage.replica_path) + { + std::iter_swap(it, replicas.rbegin()); + break; + } + } + + /// Check all replicas and our replica must be this last one for (const String & replica : replicas) { String replica_path = storage.zookeeper_path + "/replicas/" + replica; @@ -146,7 +158,7 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP if (found_part_with_the_same_min_block && found_part_with_the_same_max_block) { /// FIXME It may never appear - LOG_WARNING(log, "Found parts with the same min block and with the same max block as the missing part {}. Hoping that it will eventually appear as a result of a merge.", part_name); + LOG_WARNING(log, "Found parts with the same min block and with the same max block as the missing part {} on replica {}. Hoping that it will eventually appear as a result of a merge.", part_name, replica); return MissingPartSearchResult::FoundAndDontNeedFetch; } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 2c2542b6eb34..edc2ab964af9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -55,14 +55,31 @@ void ReplicatedMergeTreeQueue::clear() mutation_pointer.clear(); } -void ReplicatedMergeTreeQueue::initialize(const MergeTreeData::DataParts & parts) +void ReplicatedMergeTreeQueue::initialize(zkutil::ZooKeeperPtr zookeeper) { std::lock_guard lock(state_mutex); - for (const auto & part : parts) + + LOG_TRACE(log, "Initializing parts in queue"); + + /// Get current parts state from zookeeper + Strings parts = zookeeper->getChildren(replica_path + "/parts"); + for (const auto & part_name : parts) { - current_parts.add(part->name, nullptr); - virtual_parts.add(part->name, nullptr); + LOG_TEST(log, "Adding part {} to current and virtual parts", part_name); + current_parts.add(part_name, nullptr); + virtual_parts.add(part_name, nullptr); } + + /// Drop parts can negatively affect virtual parts. So when we load parts + /// from zookeeper we can break invariant with virtual parts. To fix this we + /// have it here. + for (const LogEntryPtr & entry : queue) + { + if (entry->isDropPart(format_version)) + virtual_parts.removePartAndCoveredParts(*entry->getDropRange(format_version)); + } + + LOG_TRACE(log, "Queue initialized"); } bool ReplicatedMergeTreeQueue::isVirtualPart(const MergeTreeData::DataPartPtr & data_part) const @@ -163,7 +180,11 @@ void ReplicatedMergeTreeQueue::insertUnlocked( const LogEntryPtr & entry, std::optional & min_unprocessed_insert_time_changed, std::lock_guard & state_lock) { - for (const String & virtual_part_name : entry->getVirtualPartNames(format_version)) + auto entry_virtual_parts = entry->getVirtualPartNames(format_version); + + LOG_TEST(log, "Insert entry {} to queue with type {} with virtual parts [{}]", entry->znode_name, entry->typeToString(), fmt::join(entry_virtual_parts, ", ")); + + for (const String & virtual_part_name : entry_virtual_parts) { virtual_parts.add(virtual_part_name, nullptr); /// Don't add drop range parts to mutations @@ -227,6 +248,11 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( std::optional & max_processed_insert_time_changed, std::unique_lock & state_lock) { + + auto entry_virtual_parts = entry->getVirtualPartNames(format_version); + LOG_TEST(log, "Removing {} entry {} from queue with type {} with virtual parts [{}]", + is_successful ? "successful" : "unsuccessful", + entry->znode_name, entry->typeToString(), fmt::join(entry_virtual_parts, ", ")); /// Update insert times. if (entry->type == LogEntry::GET_PART || entry->type == LogEntry::ATTACH_PART) { @@ -254,6 +280,7 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( { if (!entry->actual_new_part_name.empty()) { + LOG_TEST(log, "Entry {} has actual new part name {}, removing it from mutations", entry->znode_name, entry->actual_new_part_name); /// We don't add bigger fetched part to current_parts because we /// have an invariant `virtual_parts` = `current_parts` + `queue`. /// @@ -264,7 +291,9 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( removeCoveredPartsFromMutations(entry->actual_new_part_name, /*remove_part = */ false, /*remove_covered_parts = */ true); } - for (const String & virtual_part_name : entry->getVirtualPartNames(format_version)) + LOG_TEST(log, "Adding parts [{}] to current parts", fmt::join(entry_virtual_parts, ", ")); + + for (const String & virtual_part_name : entry_virtual_parts) { current_parts.add(virtual_part_name, nullptr); @@ -275,14 +304,21 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( if (auto drop_range_part_name = entry->getDropRange(format_version)) { + MergeTreePartInfo drop_range_info = MergeTreePartInfo::fromPartName(*drop_range_part_name, format_version); /// DROP PART doesn't have virtual parts so remove from current /// parts all covered parts. if (entry->isDropPart(format_version)) + { + LOG_TEST(log, "Removing drop part from current and virtual parts {}", *drop_range_part_name); current_parts.removePartAndCoveredParts(*drop_range_part_name); + } else + { + LOG_TEST(log, "Removing drop range from current and virtual parts {}", *drop_range_part_name); current_parts.remove(*drop_range_part_name); + } virtual_parts.remove(*drop_range_part_name); @@ -307,7 +343,9 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( drop_ranges.removeDropRange(entry); } - for (const String & virtual_part_name : entry->getVirtualPartNames(format_version)) + LOG_TEST(log, "Removing unsuccessful entry {} virtual parts [{}]", entry->znode_name, fmt::join(entry_virtual_parts, ", ")); + + for (const String & virtual_part_name : entry_virtual_parts) { /// This part will never appear, so remove it from virtual parts virtual_parts.remove(virtual_part_name); @@ -324,6 +362,9 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( void ReplicatedMergeTreeQueue::removeCoveredPartsFromMutations(const String & part_name, bool remove_part, bool remove_covered_parts) { auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); + + LOG_TEST(log, "Removing part {} from mutations (remove_part: {}, remove_covered_parts: {})", part_name, remove_part, remove_covered_parts); + auto in_partition = mutations_by_partition.find(part_info.partition_id); if (in_partition == mutations_by_partition.end()) return; @@ -361,11 +402,17 @@ void ReplicatedMergeTreeQueue::removeCoveredPartsFromMutations(const String & pa void ReplicatedMergeTreeQueue::addPartToMutations(const String & part_name) { + + LOG_TEST(log, "Adding part {} to mutations", part_name); + auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); /// Do not add special virtual parts to parts_to_do if (part_info.isFakeDropRangePart()) + { + LOG_TEST(log, "Part {} is fake drop range part, will not add it to mutations", part_name); return; + } auto in_partition = mutations_by_partition.find(part_info.partition_id); if (in_partition == mutations_by_partition.end()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 57e1e6586651..37abd0a16685 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -277,8 +277,8 @@ class ReplicatedMergeTreeQueue /// Clears queue state void clear(); - /// Put a set of (already existing) parts in virtual_parts. - void initialize(const MergeTreeData::DataParts & parts); + /// Get set of parts from zookeeper + void initialize(zkutil::ZooKeeperPtr zookeeper); /** Inserts an action to the end of the queue. * To restore broken parts during operation. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index a7bb56f19559..4a4a266fdf15 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -172,11 +172,14 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() storage.cloneReplicaIfNeeded(zookeeper); + + storage.queue.initialize(zookeeper); storage.queue.load(zookeeper); /// pullLogsToQueue() after we mark replica 'is_active' (and after we repair if it was lost); /// because cleanup_thread doesn't delete log_pointer of active replicas. storage.queue.pullLogsToQueue(zookeeper, {}, ReplicatedMergeTreeQueue::LOAD); + storage.queue.removeCurrentPartsFromMutations(); storage.last_queue_update_finish_time.store(time(nullptr)); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f3ac36b6660e..01d2ed75fdd3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1226,34 +1226,24 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) Coordination::Requests ops; - String has_replica = findReplicaHavingPart(part_name, true); - if (!has_replica.empty()) + LOG_ERROR(log, "Removing locally missing part from ZooKeeper and queueing a fetch: {}", part_name); + time_t part_create_time = 0; + Coordination::ExistsResponse exists_resp = exists_futures[i].get(); + if (exists_resp.error == Coordination::Error::ZOK) { - LOG_ERROR(log, "Removing locally missing part from ZooKeeper and queueing a fetch: {}", part_name); - time_t part_create_time = 0; - Coordination::ExistsResponse exists_resp = exists_futures[i].get(); - if (exists_resp.error == Coordination::Error::ZOK) - { - part_create_time = exists_resp.stat.ctime / 1000; - removePartFromZooKeeper(part_name, ops, exists_resp.stat.numChildren > 0); - } - LogEntry log_entry; - log_entry.type = LogEntry::GET_PART; - log_entry.source_replica = ""; - log_entry.new_part_name = part_name; - log_entry.create_time = part_create_time; - - /// We assume that this occurs before the queue is loaded (queue.initialize). - ops.emplace_back(zkutil::makeCreateRequest( - fs::path(replica_path) / "queue/queue-", log_entry.toString(), zkutil::CreateMode::PersistentSequential)); - enqueue_futures.emplace_back(zookeeper->asyncMulti(ops)); - } - else - { - LOG_ERROR(log, "Not found active replica having part {}", part_name); - enqueuePartForCheck(part_name); + part_create_time = exists_resp.stat.ctime / 1000; + removePartFromZooKeeper(part_name, ops, exists_resp.stat.numChildren > 0); } + LogEntry log_entry; + log_entry.type = LogEntry::GET_PART; + log_entry.source_replica = ""; + log_entry.new_part_name = part_name; + log_entry.create_time = part_create_time; + /// We assume that this occurs before the queue is loaded (queue.initialize). + ops.emplace_back(zkutil::makeCreateRequest( + fs::path(replica_path) / "queue/queue-", log_entry.toString(), zkutil::CreateMode::PersistentSequential)); + enqueue_futures.emplace_back(zookeeper->asyncMulti(ops)); } for (auto & future : enqueue_futures) @@ -4318,8 +4308,6 @@ void StorageReplicatedMergeTree::startup() try { - queue.initialize(getDataParts()); - InterserverIOEndpointPtr data_parts_exchange_ptr = std::make_shared(*this); [[maybe_unused]] auto prev_ptr = std::atomic_exchange(&data_parts_exchange_endpoint, data_parts_exchange_ptr); assert(prev_ptr == nullptr); diff --git a/tests/integration/test_lost_part_during_startup/__init__.py b/tests/integration/test_lost_part_during_startup/__init__.py new file mode 100644 index 000000000000..e5a0d9b4834e --- /dev/null +++ b/tests/integration/test_lost_part_during_startup/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_lost_part_during_startup/test.py b/tests/integration/test_lost_part_during_startup/test.py new file mode 100644 index 000000000000..f9d24682354c --- /dev/null +++ b/tests/integration/test_lost_part_during_startup/test.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +import time + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', with_zookeeper=True, stay_alive=True) +node2 = cluster.add_instance('node2', with_zookeeper=True, stay_alive=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + +def remove_part_from_disk(node, table, part_name): + part_path = node.query( + "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip() + if not part_path: + raise Exception("Part " + part_name + "doesn't exist") + node.exec_in_container(['bash', '-c', 'rm -r {p}/*'.format(p=part_path)], privileged=True) + + +def test_lost_part_during_startup(start_cluster): + for i, node in enumerate([node1, node2]): + node.query(f"CREATE TABLE test_lost (value UInt64) Engine = ReplicatedMergeTree('/clickhouse/test_lost', '{i + 1}') ORDER BY tuple()") + + for i in range(4): + node2.query(f"INSERT INTO test_lost VALUES({i})") + + node2.query("OPTIMIZE TABLE test_lost FINAL") + node1.query("SYSTEM SYNC REPLICA test_lost") + + assert node2.query("SELECT sum(value) FROM test_lost") == str(sum(i for i in range(4))) + '\n' + assert node1.query("SELECT sum(value) FROM test_lost") == str(sum(i for i in range(4))) + '\n' + + + remove_part_from_disk(node2, "test_lost", "all_0_3_1") + remove_part_from_disk(node2, "test_lost", "all_1_1_0") + remove_part_from_disk(node2, "test_lost", "all_2_2_0") + + node2.stop_clickhouse() + node1.stop_clickhouse() + node2.start_clickhouse() + + for i in range(10): + try: + node2.query("INSERT INTO test_lost VALUES(7)") + node2.query("INSERT INTO test_lost VALUES(8)") + node2.query("INSERT INTO test_lost VALUES(9)") + node2.query("INSERT INTO test_lost VALUES(10)") + node2.query("INSERT INTO test_lost VALUES(11)") + node2.query("INSERT INTO test_lost VALUES(12)") + + node2.query("OPTIMIZE TABLE test_lost FINAL") + break + except Exception as ex: + print("Exception", ex) + time.sleep(0.5) + + node1.start_clickhouse() + node2.query("SYSTEM SYNC REPLICA test_lost") + node1.query("SYSTEM SYNC REPLICA test_lost") + + assert node2.query("SELECT sum(value) FROM test_lost") == str(sum(i for i in range(4)) + sum(i for i in range(7, 13))) + '\n' + assert node1.query("SELECT sum(value) FROM test_lost") == str(sum(i for i in range(4)) + sum(i for i in range(7, 13))) + '\n' From ac8ff0125cadcd231c9a1576a884e4ce58bfc049 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 27 Sep 2021 21:10:08 +0300 Subject: [PATCH 087/472] Backport #29276 to 21.9: Fix !hasPendingData() failed assertion in hdfs read buffer --- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index d241bd07294b..96d67ad0e087 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -85,10 +85,15 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemoryposition() = impl->buffer().begin() + offset(); auto result = impl->next(); if (result) - { - working_buffer = internal_buffer = impl->buffer(); - pos = working_buffer.begin(); - } - else - return false; - return true; + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset); /// use the buffer returned by `impl` + + return result; } From 250b711f4fe11d66712d554ca6ae0518aea1359a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 28 Sep 2021 17:17:46 +0300 Subject: [PATCH 088/472] Backport #29454 to 21.9: Fix logical error in functions greatest/least --- src/Functions/LeastGreatestGeneric.h | 2 ++ .../0_stateless/01822_short_circuit.sql | 1 + ...short_circuit_least_greatest_bug.reference | 20 +++++++++++++++++++ ...02032_short_circuit_least_greatest_bug.sql | 2 ++ 4 files changed, 25 insertions(+) create mode 100644 tests/queries/0_stateless/02032_short_circuit_least_greatest_bug.reference create mode 100644 tests/queries/0_stateless/02032_short_circuit_least_greatest_bug.sql diff --git a/src/Functions/LeastGreatestGeneric.h b/src/Functions/LeastGreatestGeneric.h index a8bab0efd541..df44ff87762f 100644 --- a/src/Functions/LeastGreatestGeneric.h +++ b/src/Functions/LeastGreatestGeneric.h @@ -107,6 +107,8 @@ class LeastGreatestOverloadResolver : public IFunctionOverloadResolver FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { DataTypes argument_types; + for (const auto & argument : arguments) + argument_types.push_back(argument.type); /// More efficient specialization for two numeric arguments. if (arguments.size() == 2 && isNumber(arguments[0].type) && isNumber(arguments[1].type)) diff --git a/tests/queries/0_stateless/01822_short_circuit.sql b/tests/queries/0_stateless/01822_short_circuit.sql index 1f0e04cb4b5b..48fff04921b2 100644 --- a/tests/queries/0_stateless/01822_short_circuit.sql +++ b/tests/queries/0_stateless/01822_short_circuit.sql @@ -153,3 +153,4 @@ select number % 2 and toLowCardinality(number) from numbers(5); select number % 2 or toLowCardinality(number) from numbers(5); select if(toLowCardinality(number) % 2, number, number + 1) from numbers(10); select multiIf(toLowCardinality(number) % 2, number, number + 1) from numbers(10); + diff --git a/tests/queries/0_stateless/02032_short_circuit_least_greatest_bug.reference b/tests/queries/0_stateless/02032_short_circuit_least_greatest_bug.reference new file mode 100644 index 000000000000..57c88cc489e0 --- /dev/null +++ b/tests/queries/0_stateless/02032_short_circuit_least_greatest_bug.reference @@ -0,0 +1,20 @@ +0 +1 +1 +1 +1 +1 +0 +1 +1 +1 +0 +1 +0 +0 +0 +1 +0 +1 +0 +0 diff --git a/tests/queries/0_stateless/02032_short_circuit_least_greatest_bug.sql b/tests/queries/0_stateless/02032_short_circuit_least_greatest_bug.sql new file mode 100644 index 000000000000..e7dca0bde91a --- /dev/null +++ b/tests/queries/0_stateless/02032_short_circuit_least_greatest_bug.sql @@ -0,0 +1,2 @@ +select 1 and greatest(number % 2, number % 3) from numbers(10); +select 1 and least(number % 2, number % 3) from numbers(10); From 95d005697dc2f61126ba13bfcfa4fafb75b0b79b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 29 Sep 2021 17:25:02 +0300 Subject: [PATCH 089/472] Backport #29475 to 21.9: Remove filter column from HAVING when it is not needed. --- src/Interpreters/ExpressionAnalyzer.cpp | 38 ++++++++++++------ src/Interpreters/ExpressionAnalyzer.h | 9 ++++- src/Interpreters/InterpreterSelectQuery.cpp | 13 +++--- src/Interpreters/InterpreterSelectQuery.h | 4 +- src/Processors/QueryPlan/TotalsHavingStep.cpp | 10 ++++- src/Processors/QueryPlan/TotalsHavingStep.h | 2 + .../Transforms/TotalsHavingTransform.cpp | 31 +++++++++++--- .../Transforms/TotalsHavingTransform.h | 4 +- .../2025_having_filter_column.reference | 0 .../0_stateless/2025_having_filter_column.sql | 40 +++++++++++++++++++ 10 files changed, 121 insertions(+), 30 deletions(-) create mode 100644 tests/queries/0_stateless/2025_having_filter_column.reference create mode 100644 tests/queries/0_stateless/2025_having_filter_column.sql diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index c8a5ed6c56af..40eb0cd79800 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1452,18 +1452,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( const Settings & settings = context->getSettingsRef(); const ConstStoragePtr & storage = query_analyzer.storage(); - bool finalized = false; - size_t where_step_num = 0; + ssize_t prewhere_step_num = -1; + ssize_t where_step_num = -1; + ssize_t having_step_num = -1; auto finalize_chain = [&](ExpressionActionsChain & chain) { chain.finalize(); - if (!finalized) - { - finalize(chain, where_step_num, query); - finalized = true; - } + finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); chain.clear(); }; @@ -1494,6 +1491,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere)) { + /// Prewhere is always the first one. + prewhere_step_num = 0; prewhere_info = std::make_shared(actions, query.prewhere()->getColumnName()); if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings)) @@ -1563,6 +1562,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (query_analyzer.appendHaving(chain, only_types || !second_stage)) { + having_step_num = chain.steps.size() - 1; before_having = chain.getLastActions(); chain.addStep(); } @@ -1663,13 +1663,16 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( checkActions(); } -void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query) +void ExpressionAnalysisResult::finalize( + const ExpressionActionsChain & chain, + ssize_t & prewhere_step_num, + ssize_t & where_step_num, + ssize_t & having_step_num, + const ASTSelectQuery & query) { - size_t next_step_i = 0; - - if (hasPrewhere()) + if (prewhere_step_num >= 0) { - const ExpressionActionsChain::Step & step = *chain.steps.at(next_step_i++); + const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); prewhere_info->prewhere_actions->projectInput(false); NameSet columns_to_remove; @@ -1682,12 +1685,21 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si } columns_to_remove_after_prewhere = std::move(columns_to_remove); + prewhere_step_num = -1; } - if (hasWhere()) + if (where_step_num >= 0) { where_column_name = query.where()->getColumnName(); remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second; + where_step_num = -1; + } + + if (having_step_num >= 0) + { + having_column_name = query.having()->getColumnName(); + remove_having_filter = chain.steps.at(having_step_num)->required_output.find(having_column_name)->second; + having_step_num = -1; } } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 2d0041bd96be..b5a5731a2d3f 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -226,6 +226,8 @@ struct ExpressionAnalysisResult ActionsDAGPtr before_where; ActionsDAGPtr before_aggregation; ActionsDAGPtr before_having; + String having_column_name; + bool remove_having_filter = false; ActionsDAGPtr before_window; ActionsDAGPtr before_order_by; ActionsDAGPtr before_limit_by; @@ -271,7 +273,12 @@ struct ExpressionAnalysisResult void removeExtraColumns() const; void checkActions() const; - void finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query); + void finalize( + const ExpressionActionsChain & chain, + ssize_t & prewhere_step_num, + ssize_t & where_step_num, + ssize_t & having_step_num, + const ASTSelectQuery & query); }; /// SelectQuery specific ExpressionAnalyzer part. diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 297e41c95461..f5a9c1f9fefa 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1250,7 +1250,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu { bool final = !query.group_by_with_rollup && !query.group_by_with_cube; executeTotalsAndHaving( - query_plan, expressions.hasHaving(), expressions.before_having, aggregate_overflow_row, final); + query_plan, expressions.hasHaving(), expressions.before_having, expressions.remove_having_filter, aggregate_overflow_row, final); } if (query.group_by_with_rollup) @@ -1264,11 +1264,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu throw Exception( "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING", ErrorCodes::NOT_IMPLEMENTED); - executeHaving(query_plan, expressions.before_having); + executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter); } } else if (expressions.hasHaving()) - executeHaving(query_plan, expressions.before_having); + executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter); } else if (query.group_by_with_totals || query.group_by_with_rollup || query.group_by_with_cube) throw Exception("WITH TOTALS, ROLLUP or CUBE are not supported without aggregation", ErrorCodes::NOT_IMPLEMENTED); @@ -2135,10 +2135,10 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool } -void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression) +void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) { auto having_step - = std::make_unique(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false); + = std::make_unique(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), remove_filter); having_step->setStepDescription("HAVING"); query_plan.addStep(std::move(having_step)); @@ -2146,7 +2146,7 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions void InterpreterSelectQuery::executeTotalsAndHaving( - QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final) + QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final) { const Settings & settings = context->getSettingsRef(); @@ -2155,6 +2155,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving( overflow_row, expression, has_having ? getSelectQuery().having()->getColumnName() : "", + remove_filter, settings.totals_mode, settings.totals_auto_threshold, final); diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index aec3b0b8bd38..99c95a8d6248 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -131,8 +131,8 @@ class InterpreterSelectQuery : public IInterpreterUnionOrSelectQuery void executeAggregation( QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final); - void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final); - void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression); + void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final); + void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description); /// FIXME should go through ActionsDAG to behave as a proper function void executeWindow(QueryPlan & query_plan); diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index db82538d5a0f..4cac12639a89 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -30,6 +30,7 @@ TotalsHavingStep::TotalsHavingStep( bool overflow_row_, const ActionsDAGPtr & actions_dag_, const std::string & filter_column_, + bool remove_filter_, TotalsMode totals_mode_, double auto_include_threshold_, bool final_) @@ -38,11 +39,14 @@ TotalsHavingStep::TotalsHavingStep( TotalsHavingTransform::transformHeader( input_stream_.header, actions_dag_.get(), + filter_column_, + remove_filter_, final_), getTraits(!filter_column_.empty())) , overflow_row(overflow_row_) , actions_dag(actions_dag_) , filter_column_name(filter_column_) + , remove_filter(remove_filter_) , totals_mode(totals_mode_) , auto_include_threshold(auto_include_threshold_) , final(final_) @@ -58,6 +62,7 @@ void TotalsHavingStep::transformPipeline(QueryPipeline & pipeline, const BuildQu overflow_row, expression_actions, filter_column_name, + remove_filter, totals_mode, auto_include_threshold, final); @@ -85,7 +90,10 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr void TotalsHavingStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, ' '); - settings.out << prefix << "Filter column: " << filter_column_name << '\n'; + settings.out << prefix << "Filter column: " << filter_column_name; + if (remove_filter) + settings.out << " (removed)"; + settings.out << '\n'; settings.out << prefix << "Mode: " << totalsModeToString(totals_mode, auto_include_threshold) << '\n'; if (actions_dag) diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index bc053c96970b..1ad98a70a01f 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -18,6 +18,7 @@ class TotalsHavingStep : public ITransformingStep bool overflow_row_, const ActionsDAGPtr & actions_dag_, const std::string & filter_column_, + bool remove_filter_, TotalsMode totals_mode_, double auto_include_threshold_, bool final_); @@ -35,6 +36,7 @@ class TotalsHavingStep : public ITransformingStep bool overflow_row; ActionsDAGPtr actions_dag; String filter_column_name; + bool remove_filter; TotalsMode totals_mode; double auto_include_threshold; bool final; diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 9724d332f15e..c475b87e08f6 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -28,13 +28,22 @@ void finalizeChunk(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } -Block TotalsHavingTransform::transformHeader(Block block, const ActionsDAG * expression, bool final) +Block TotalsHavingTransform::transformHeader( + Block block, + const ActionsDAG * expression, + const std::string & filter_column_name, + bool remove_filter, + bool final) { if (final) finalizeBlock(block); if (expression) + { block = expression->updateHeader(std::move(block)); + if (remove_filter) + block.erase(filter_column_name); + } return block; } @@ -44,20 +53,19 @@ TotalsHavingTransform::TotalsHavingTransform( bool overflow_row_, const ExpressionActionsPtr & expression_, const std::string & filter_column_, + bool remove_filter_, TotalsMode totals_mode_, double auto_include_threshold_, bool final_) - : ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, final_), true) + : ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, filter_column_, remove_filter_, final_), true) , overflow_row(overflow_row_) , expression(expression_) , filter_column_name(filter_column_) + , remove_filter(remove_filter_) , totals_mode(totals_mode_) , auto_include_threshold(auto_include_threshold_) , final(final_) { - if (!filter_column_name.empty()) - filter_column_pos = outputs.front().getHeader().getPositionByName(filter_column_name); - finalized_header = getInputPort().getHeader(); finalizeBlock(finalized_header); @@ -67,10 +75,17 @@ TotalsHavingTransform::TotalsHavingTransform( auto totals_header = finalized_header; size_t num_rows = totals_header.rows(); expression->execute(totals_header, num_rows); + filter_column_pos = totals_header.getPositionByName(filter_column_name); + if (remove_filter) + totals_header.erase(filter_column_name); outputs.emplace_back(totals_header, this); } else + { + if (!filter_column_name.empty()) + filter_column_pos = finalized_header.getPositionByName(filter_column_name); outputs.emplace_back(finalized_header, this); + } /// Initialize current totals with initial state. current_totals.reserve(header.columns()); @@ -167,9 +182,11 @@ void TotalsHavingTransform::transform(Chunk & chunk) } expression->execute(finalized_block, num_rows); + ColumnPtr filter_column_ptr = finalized_block.getByPosition(filter_column_pos).column; + if (remove_filter) + finalized_block.erase(filter_column_name); auto columns = finalized_block.getColumns(); - ColumnPtr filter_column_ptr = columns[filter_column_pos]; ConstantFilterDescription const_filter_description(*filter_column_ptr); if (const_filter_description.always_true) @@ -270,6 +287,8 @@ void TotalsHavingTransform::prepareTotals() size_t num_rows = totals.getNumRows(); auto block = finalized_header.cloneWithColumns(totals.detachColumns()); expression->execute(block, num_rows); + if (remove_filter) + block.erase(filter_column_name); /// Note: after expression totals may have several rows if `arrayJoin` was used in expression. totals = Chunk(block.getColumns(), num_rows); } diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h index d42543d311ae..03635054c653 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.h +++ b/src/Processors/Transforms/TotalsHavingTransform.h @@ -28,6 +28,7 @@ class TotalsHavingTransform : public ISimpleTransform bool overflow_row_, const ExpressionActionsPtr & expression_, const std::string & filter_column_, + bool remove_filter_, TotalsMode totals_mode_, double auto_include_threshold_, bool final_); @@ -39,7 +40,7 @@ class TotalsHavingTransform : public ISimpleTransform Status prepare() override; void work() override; - static Block transformHeader(Block block, const ActionsDAG * expression, bool final); + static Block transformHeader(Block block, const ActionsDAG * expression, const std::string & filter_column_name, bool remove_filter, bool final); protected: void transform(Chunk & chunk) override; @@ -55,6 +56,7 @@ class TotalsHavingTransform : public ISimpleTransform bool overflow_row; ExpressionActionsPtr expression; String filter_column_name; + bool remove_filter; TotalsMode totals_mode; double auto_include_threshold; bool final; diff --git a/tests/queries/0_stateless/2025_having_filter_column.reference b/tests/queries/0_stateless/2025_having_filter_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/2025_having_filter_column.sql b/tests/queries/0_stateless/2025_having_filter_column.sql new file mode 100644 index 000000000000..aab419adc160 --- /dev/null +++ b/tests/queries/0_stateless/2025_having_filter_column.sql @@ -0,0 +1,40 @@ +drop table if exists test; + +-- #29010 +CREATE TABLE test +( + d DateTime, + a String, + b UInt64 +) +ENGINE = MergeTree +PARTITION BY toDate(d) +ORDER BY d; + +SELECT * +FROM ( + SELECT + a, + max((d, b)).2 AS value + FROM test + GROUP BY rollup(a) +) +WHERE a <> ''; + +-- the same query, but after syntax optimization +SELECT + a, + value +FROM +( + SELECT + a, + max((d, b)).2 AS value + FROM test + GROUP BY a + WITH ROLLUP + HAVING a != '' +) +WHERE a != ''; + +drop table if exists test; From cc9f1709e73ca89d9e04cbf0b9c5e5dea67bee40 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 15:45:22 +0300 Subject: [PATCH 090/472] Update ReplicatedMergeTreeQueue.cpp --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index edc2ab964af9..a3cc446d9b3b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -65,7 +65,6 @@ void ReplicatedMergeTreeQueue::initialize(zkutil::ZooKeeperPtr zookeeper) Strings parts = zookeeper->getChildren(replica_path + "/parts"); for (const auto & part_name : parts) { - LOG_TEST(log, "Adding part {} to current and virtual parts", part_name); current_parts.add(part_name, nullptr); virtual_parts.add(part_name, nullptr); } @@ -182,8 +181,6 @@ void ReplicatedMergeTreeQueue::insertUnlocked( { auto entry_virtual_parts = entry->getVirtualPartNames(format_version); - LOG_TEST(log, "Insert entry {} to queue with type {} with virtual parts [{}]", entry->znode_name, entry->typeToString(), fmt::join(entry_virtual_parts, ", ")); - for (const String & virtual_part_name : entry_virtual_parts) { virtual_parts.add(virtual_part_name, nullptr); @@ -250,9 +247,6 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( { auto entry_virtual_parts = entry->getVirtualPartNames(format_version); - LOG_TEST(log, "Removing {} entry {} from queue with type {} with virtual parts [{}]", - is_successful ? "successful" : "unsuccessful", - entry->znode_name, entry->typeToString(), fmt::join(entry_virtual_parts, ", ")); /// Update insert times. if (entry->type == LogEntry::GET_PART || entry->type == LogEntry::ATTACH_PART) { @@ -280,7 +274,6 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( { if (!entry->actual_new_part_name.empty()) { - LOG_TEST(log, "Entry {} has actual new part name {}, removing it from mutations", entry->znode_name, entry->actual_new_part_name); /// We don't add bigger fetched part to current_parts because we /// have an invariant `virtual_parts` = `current_parts` + `queue`. /// @@ -291,8 +284,6 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( removeCoveredPartsFromMutations(entry->actual_new_part_name, /*remove_part = */ false, /*remove_covered_parts = */ true); } - LOG_TEST(log, "Adding parts [{}] to current parts", fmt::join(entry_virtual_parts, ", ")); - for (const String & virtual_part_name : entry_virtual_parts) { current_parts.add(virtual_part_name, nullptr); @@ -311,12 +302,10 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( /// parts all covered parts. if (entry->isDropPart(format_version)) { - LOG_TEST(log, "Removing drop part from current and virtual parts {}", *drop_range_part_name); current_parts.removePartAndCoveredParts(*drop_range_part_name); } else { - LOG_TEST(log, "Removing drop range from current and virtual parts {}", *drop_range_part_name); current_parts.remove(*drop_range_part_name); } @@ -343,8 +332,6 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( drop_ranges.removeDropRange(entry); } - LOG_TEST(log, "Removing unsuccessful entry {} virtual parts [{}]", entry->znode_name, fmt::join(entry_virtual_parts, ", ")); - for (const String & virtual_part_name : entry_virtual_parts) { /// This part will never appear, so remove it from virtual parts @@ -363,8 +350,6 @@ void ReplicatedMergeTreeQueue::removeCoveredPartsFromMutations(const String & pa { auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); - LOG_TEST(log, "Removing part {} from mutations (remove_part: {}, remove_covered_parts: {})", part_name, remove_part, remove_covered_parts); - auto in_partition = mutations_by_partition.find(part_info.partition_id); if (in_partition == mutations_by_partition.end()) return; @@ -402,15 +387,11 @@ void ReplicatedMergeTreeQueue::removeCoveredPartsFromMutations(const String & pa void ReplicatedMergeTreeQueue::addPartToMutations(const String & part_name) { - - LOG_TEST(log, "Adding part {} to mutations", part_name); - auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); /// Do not add special virtual parts to parts_to_do if (part_info.isFakeDropRangePart()) { - LOG_TEST(log, "Part {} is fake drop range part, will not add it to mutations", part_name); return; } From 136f1682acc82138e2d9127d967057bcad35de24 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 30 Sep 2021 17:36:45 +0300 Subject: [PATCH 091/472] Backport #29531 to 21.9: Fix pathStartsWith --- src/Common/filesystemHelpers.cpp | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 9c3db0f3e306..89214ad496e9 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -113,13 +113,9 @@ String getFilesystemName([[maybe_unused]] const String & mount_point) bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) { - auto absolute_path = std::filesystem::weakly_canonical(path); - auto absolute_prefix_path = std::filesystem::weakly_canonical(prefix_path); - - auto [_, prefix_path_mismatch_it] = std::mismatch(absolute_path.begin(), absolute_path.end(), absolute_prefix_path.begin(), absolute_prefix_path.end()); - - bool path_starts_with_prefix_path = (prefix_path_mismatch_it == absolute_prefix_path.end()); - return path_starts_with_prefix_path; + String absolute_path = std::filesystem::weakly_canonical(path); + String absolute_prefix_path = std::filesystem::weakly_canonical(prefix_path); + return absolute_path.starts_with(absolute_prefix_path); } bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) @@ -129,15 +125,11 @@ bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem /// `.` and `..` and extra `/`. Path is not canonized because otherwise path will /// not be a path of a symlink itself. - auto absolute_path = std::filesystem::absolute(path); - absolute_path = absolute_path.lexically_normal(); /// Normalize path. - auto absolute_prefix_path = std::filesystem::absolute(prefix_path); - absolute_prefix_path = absolute_prefix_path.lexically_normal(); /// Normalize path. - - auto [_, prefix_path_mismatch_it] = std::mismatch(absolute_path.begin(), absolute_path.end(), absolute_prefix_path.begin(), absolute_prefix_path.end()); - - bool path_starts_with_prefix_path = (prefix_path_mismatch_it == absolute_prefix_path.end()); - return path_starts_with_prefix_path; + String absolute_path = std::filesystem::absolute(path); + absolute_path = fs::path(absolute_path).lexically_normal(); /// Normalize path. + String absolute_prefix_path = std::filesystem::absolute(prefix_path); + absolute_prefix_path = fs::path(absolute_prefix_path).lexically_normal(); /// Normalize path. + return absolute_path.starts_with(absolute_prefix_path); } bool pathStartsWith(const String & path, const String & prefix_path) From 5199a7b28b65ff7efa6839fe92a1bc4662cbebdc Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 30 Sep 2021 23:35:35 +0300 Subject: [PATCH 092/472] Backport #29518 to 21.9: Fix possible odbc invalid cursor state --- programs/odbc-bridge/ODBCConnectionFactory.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/programs/odbc-bridge/ODBCConnectionFactory.h b/programs/odbc-bridge/ODBCConnectionFactory.h index 773b65eb27de..ca403eca6623 100644 --- a/programs/odbc-bridge/ODBCConnectionFactory.h +++ b/programs/odbc-bridge/ODBCConnectionFactory.h @@ -81,8 +81,12 @@ T execute(nanodbc::ConnectionHolderPtr connection_holder, std::functionupdateConnection(); return query_func(connection_holder->get()); From ef13313f374b9b924d0c17d01554756e3d694054 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 1 Oct 2021 17:39:29 +0300 Subject: [PATCH 093/472] Backport #29573 to 21.9: Fix nullpointer dereference in AddDefaultDatabaseVisitor --- src/Interpreters/AddDefaultDatabaseVisitor.h | 7 ++++++- .../0_stateless/02041_test_fuzzy_alter.reference | 1 + .../queries/0_stateless/02041_test_fuzzy_alter.sql | 13 +++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02041_test_fuzzy_alter.reference create mode 100644 tests/queries/0_stateless/02041_test_fuzzy_alter.sql diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index fe3edc009577..4da8df64f25a 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -136,7 +136,12 @@ class AddDefaultDatabaseVisitor /// XXX: for some unknown reason this place assumes that argument can't be an alias, /// like in the similar code in `MarkTableIdentifierVisitor`. if (auto * identifier = child->children[i]->as()) - child->children[i] = identifier->createTable(); + { + /// If identifier is broken then we can do nothing and get an exception + auto maybe_table_identifier = identifier->createTable(); + if (maybe_table_identifier) + child->children[i] = maybe_table_identifier; + } /// Second argument of the "in" function (or similar) may be a table name or a subselect. /// Rewrite the table name or descend into subselect. diff --git a/tests/queries/0_stateless/02041_test_fuzzy_alter.reference b/tests/queries/0_stateless/02041_test_fuzzy_alter.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/02041_test_fuzzy_alter.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02041_test_fuzzy_alter.sql b/tests/queries/0_stateless/02041_test_fuzzy_alter.sql new file mode 100644 index 000000000000..a330defc3164 --- /dev/null +++ b/tests/queries/0_stateless/02041_test_fuzzy_alter.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS alter_table; + +CREATE TABLE alter_table (a UInt8, b Int16) +ENGINE = MergeTree +ORDER BY a; + +ALTER TABLE alter_table + MODIFY COLUMN `b` DateTime DEFAULT now(([NULL, NULL, NULL, [-2147483648], [NULL, NULL, NULL, NULL, NULL, NULL, NULL]] AND (1048576 AND NULL) AND (NULL AND 1048575 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id)); --{serverError 47} + +SELECT 1; + + +DROP TABLE IF EXISTS alter_table; From 99e1f8d8214b6a197f9e049f00735c2772c421d2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 1 Oct 2021 19:46:58 +0300 Subject: [PATCH 094/472] Backport #28195 to 21.9: Maybe fix livelock in ZooKeeper client --- src/Common/ConcurrentBoundedQueue.h | 126 +++++++++++++++---------- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 28 +++--- src/Common/ZooKeeper/ZooKeeperImpl.h | 6 +- 3 files changed, 90 insertions(+), 70 deletions(-) diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index cb29efc33496..bc9d55ff8f5f 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -2,11 +2,21 @@ #include #include +#include #include #include #include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} +} /** A very simple thread-safe queue of limited size. * If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty. @@ -17,9 +27,41 @@ class ConcurrentBoundedQueue { private: std::queue queue; - Poco::FastMutex mutex; + mutable Poco::FastMutex mutex; Poco::Semaphore fill_count; Poco::Semaphore empty_count; + std::atomic_bool closed = false; + + template + bool tryEmplaceImpl(Args &&... args) + { + bool emplaced = true; + + { + Poco::ScopedLock lock(mutex); + if (closed) + emplaced = false; + else + queue.emplace(std::forward(args)...); + } + + if (emplaced) + fill_count.set(); + else + empty_count.set(); + + return emplaced; + } + + void popImpl(T & x) + { + { + Poco::ScopedLock lock(mutex); + detail::moveOrCopyIfThrow(std::move(queue.front()), x); + queue.pop(); + } + empty_count.set(); + } public: explicit ConcurrentBoundedQueue(size_t max_fill) @@ -30,91 +72,75 @@ class ConcurrentBoundedQueue void push(const T & x) { empty_count.wait(); - { - Poco::ScopedLock lock(mutex); - queue.push(x); - } - fill_count.set(); + if (!tryEmplaceImpl(x)) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "tryPush/tryEmplace must be used with close()"); } template void emplace(Args &&... args) { empty_count.wait(); - { - Poco::ScopedLock lock(mutex); - queue.emplace(std::forward(args)...); - } - fill_count.set(); + if (!tryEmplaceImpl(std::forward(args)...)) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "tryPush/tryEmplace must be used with close()"); } void pop(T & x) { fill_count.wait(); - { - Poco::ScopedLock lock(mutex); - detail::moveOrCopyIfThrow(std::move(queue.front()), x); - queue.pop(); - } - empty_count.set(); + popImpl(x); } bool tryPush(const T & x, UInt64 milliseconds = 0) { - if (empty_count.tryWait(milliseconds)) - { - { - Poco::ScopedLock lock(mutex); - queue.push(x); - } - fill_count.set(); - return true; - } - return false; + if (!empty_count.tryWait(milliseconds)) + return false; + + return tryEmplaceImpl(x); } template bool tryEmplace(UInt64 milliseconds, Args &&... args) { - if (empty_count.tryWait(milliseconds)) - { - { - Poco::ScopedLock lock(mutex); - queue.emplace(std::forward(args)...); - } - fill_count.set(); - return true; - } - return false; + if (!empty_count.tryWait(milliseconds)) + return false; + + return tryEmplaceImpl(std::forward(args)...); } bool tryPop(T & x, UInt64 milliseconds = 0) { - if (fill_count.tryWait(milliseconds)) - { - { - Poco::ScopedLock lock(mutex); - detail::moveOrCopyIfThrow(std::move(queue.front()), x); - queue.pop(); - } - empty_count.set(); - return true; - } - return false; + if (!fill_count.tryWait(milliseconds)) + return false; + + popImpl(x); + return true; } - size_t size() + size_t size() const { Poco::ScopedLock lock(mutex); return queue.size(); } - size_t empty() + size_t empty() const { Poco::ScopedLock lock(mutex); return queue.empty(); } + /// Forbids to push new elements to queue. + /// Returns false if queue was not closed before call, returns true if queue was already closed. + bool close() + { + Poco::ScopedLock lock(mutex); + return closed.exchange(true); + } + + bool isClosed() const + { + return closed.load(); + } + void clear() { while (fill_count.tryWait(0)) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 5f15a3b8b75f..a883d4d7b765 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -539,7 +539,7 @@ void ZooKeeper::sendThread() try { - while (!expired) + while (!requests_queue.isClosed()) { auto prev_bytes_sent = out->count(); @@ -571,7 +571,7 @@ void ZooKeeper::sendThread() info.request->has_watch = true; } - if (expired) + if (requests_queue.isClosed()) { break; } @@ -616,7 +616,7 @@ void ZooKeeper::receiveThread() try { Int64 waited = 0; - while (!expired) + while (!requests_queue.isClosed()) { auto prev_bytes_received = in->count(); @@ -639,7 +639,7 @@ void ZooKeeper::receiveThread() if (in->poll(max_wait)) { - if (expired) + if (requests_queue.isClosed()) break; receiveEvent(); @@ -839,12 +839,10 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) auto expire_session_if_not_expired = [&] { - std::lock_guard lock(push_request_mutex); - if (!expired) - { - expired = true; + /// No new requests will appear in queue after close() + bool was_already_closed = requests_queue.close(); + if (!was_already_closed) active_session_metric_increment.destroy(); - } }; try @@ -1017,17 +1015,15 @@ void ZooKeeper::pushRequest(RequestInfo && info) } } - /// We must serialize 'pushRequest' and 'finalize' (from sendThread, receiveThread) calls - /// to avoid forgotten operations in the queue when session is expired. - /// Invariant: when expired, no new operations will be pushed to the queue in 'pushRequest' - /// and the queue will be drained in 'finalize'. - std::lock_guard lock(push_request_mutex); - - if (expired) + if (requests_queue.isClosed()) throw Exception("Session expired", Error::ZSESSIONEXPIRED); if (!requests_queue.tryPush(std::move(info), operation_timeout.totalMilliseconds())) + { + if (requests_queue.isClosed()) + throw Exception("Session expired", Error::ZSESSIONEXPIRED); throw Exception("Cannot push request to queue within operation timeout", Error::ZOPERATIONTIMEOUT); + } } catch (...) { diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 8f0f64ceafab..9dd71bc554a8 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -121,7 +121,7 @@ class ZooKeeper final : public IKeeper /// If expired, you can only destroy the object. All other methods will throw exception. - bool isExpired() const override { return expired; } + bool isExpired() const override { return requests_queue.isClosed(); } /// Useful to check owner of ephemeral node. int64_t getSessionID() const override { return session_id; } @@ -205,11 +205,9 @@ class ZooKeeper final : public IKeeper int64_t session_id = 0; std::atomic next_xid {1}; - std::atomic expired {false}; /// Mark session finalization start. Used to avoid simultaneous /// finalization from different threads. One-shot flag. std::atomic finalization_started {false}; - std::mutex push_request_mutex; using clock = std::chrono::steady_clock; @@ -223,7 +221,7 @@ class ZooKeeper final : public IKeeper using RequestsQueue = ConcurrentBoundedQueue; - RequestsQueue requests_queue{1}; + RequestsQueue requests_queue{1024}; void pushRequest(RequestInfo && info); using Operations = std::map; From 3f5193cc6223059bb58e6e0275f02337ca94ccd1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 2 Oct 2021 03:35:46 +0300 Subject: [PATCH 095/472] Backport #29574 to 21.9: ExpressionJIT fix short-circuit with alias --- src/Interpreters/ExpressionJIT.cpp | 14 +++++++++++++- .../0_stateless/02036_jit_short_circuit.reference | 1 + .../0_stateless/02036_jit_short_circuit.sql | 12 ++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02036_jit_short_circuit.reference create mode 100644 tests/queries/0_stateless/02036_jit_short_circuit.sql diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index 9005b24b044e..e9a0b29aaa4b 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -322,6 +322,16 @@ static bool isCompilableConstant(const ActionsDAG::Node & node) return node.column && isColumnConst(*node.column) && canBeNativeType(*node.result_type); } +static const ActionsDAG::Node * removeAliasIfNecessary(const ActionsDAG::Node * node) +{ + const ActionsDAG::Node * node_no_alias = node; + + while (node_no_alias->type == ActionsDAG::ActionType::ALIAS) + node_no_alias = node_no_alias->children[0]; + + return node_no_alias; +} + static bool isCompilableFunction(const ActionsDAG::Node & node, const std::unordered_set & lazy_executed_nodes) { if (node.type != ActionsDAG::ActionType::FUNCTION) @@ -334,7 +344,9 @@ static bool isCompilableFunction(const ActionsDAG::Node & node, const std::unord { for (const auto & child : node.children) { - if (lazy_executed_nodes.contains(child)) + const ActionsDAG::Node * child_no_alias = removeAliasIfNecessary(child); + + if (lazy_executed_nodes.contains(child_no_alias)) return false; } } diff --git a/tests/queries/0_stateless/02036_jit_short_circuit.reference b/tests/queries/0_stateless/02036_jit_short_circuit.reference new file mode 100644 index 000000000000..573541ac9702 --- /dev/null +++ b/tests/queries/0_stateless/02036_jit_short_circuit.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02036_jit_short_circuit.sql b/tests/queries/0_stateless/02036_jit_short_circuit.sql new file mode 100644 index 000000000000..18faf701a845 --- /dev/null +++ b/tests/queries/0_stateless/02036_jit_short_circuit.sql @@ -0,0 +1,12 @@ +SET compile_expressions = 1; +SET min_count_to_compile_expression = 0; +SET short_circuit_function_evaluation='enable'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (message String) ENGINE=TinyLog; + +INSERT INTO test_table VALUES ('Test'); + +SELECT if(action = 'bonus', sport_amount, 0) * 100 FROM (SELECT message AS action, cast(message, 'Float64') AS sport_amount FROM test_table); + +DROP TABLE test_table; From 95c616aa166c8af631de2f2b3f7afd4699970f74 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 4 Oct 2021 13:56:51 +0300 Subject: [PATCH 096/472] Backport #29553 to 21.9: Fix null deference for GROUP BY WITH TOTALS HAVING (when the column from HAVING wasn't selected) --- src/Processors/Transforms/TotalsHavingTransform.cpp | 3 ++- .../02039_group_by_with_totals_having.reference | 9 +++++++++ .../0_stateless/02039_group_by_with_totals_having.sql | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02039_group_by_with_totals_having.reference create mode 100644 tests/queries/0_stateless/02039_group_by_with_totals_having.sql diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 9724d332f15e..b3e41722d1bf 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -207,7 +208,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) } } - num_rows = columns.front()->size(); + num_rows = columns.empty() ? countBytesInFilter(*filter_description.data) : columns.front()->size(); chunk.setColumns(std::move(columns), num_rows); } diff --git a/tests/queries/0_stateless/02039_group_by_with_totals_having.reference b/tests/queries/0_stateless/02039_group_by_with_totals_having.reference new file mode 100644 index 000000000000..581470ced835 --- /dev/null +++ b/tests/queries/0_stateless/02039_group_by_with_totals_having.reference @@ -0,0 +1,9 @@ +-- { echo } +SELECT 'x' FROM numbers(2) GROUP BY number WITH TOTALS HAVING count(number)>0; +x +x + +x +SELECT 'x' FROM numbers(2) GROUP BY number WITH TOTALS HAVING count(number)<0; + +x diff --git a/tests/queries/0_stateless/02039_group_by_with_totals_having.sql b/tests/queries/0_stateless/02039_group_by_with_totals_having.sql new file mode 100644 index 000000000000..28aa34a90d2a --- /dev/null +++ b/tests/queries/0_stateless/02039_group_by_with_totals_having.sql @@ -0,0 +1,3 @@ +-- { echo } +SELECT 'x' FROM numbers(2) GROUP BY number WITH TOTALS HAVING count(number)>0; +SELECT 'x' FROM numbers(2) GROUP BY number WITH TOTALS HAVING count(number)<0; From af40bd0180db2318599b7cf667f61470da3e1f92 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 4 Oct 2021 21:59:51 +0300 Subject: [PATCH 097/472] Backport #29625 to 21.9: Fix missing condition in pushed down predicate. --- src/Interpreters/ActionsDAG.cpp | 47 +++++++++++++++---- .../01763_filter_push_down_bugs.reference | 1 + .../01763_filter_push_down_bugs.sql | 4 +- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index fc8f5801ba29..6375210fd7cb 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1573,11 +1573,37 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere std::unordered_set allowed; std::unordered_set rejected; + /// Parts of predicate in case predicate is conjunction (or just predicate itself). + std::unordered_set predicates; + { + std::stack stack; + std::unordered_set visited_nodes; + stack.push(predicate); + visited_nodes.insert(predicate); + while (!stack.empty()) + { + const auto * node = stack.top(); + stack.pop(); + bool is_conjunction = node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "and"; + if (is_conjunction) + { + for (const auto & child : node->children) + { + if (visited_nodes.count(child) == 0) + { + visited_nodes.insert(child); + stack.push(child); + } + } + } + else + predicates.insert(node); + } + } + struct Frame { const ActionsDAG::Node * node = nullptr; - /// Node is a part of predicate (predicate itself, or some part of AND) - bool is_predicate = false; size_t next_child_to_visit = 0; size_t num_allowed_children = 0; }; @@ -1585,14 +1611,11 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere std::stack stack; std::unordered_set visited_nodes; - stack.push(Frame{.node = predicate, .is_predicate = true}); + stack.push(Frame{.node = predicate}); visited_nodes.insert(predicate); while (!stack.empty()) { auto & cur = stack.top(); - bool is_conjunction = cur.is_predicate - && cur.node->type == ActionsDAG::ActionType::FUNCTION - && cur.node->function_base->getName() == "and"; /// At first, visit all children. while (cur.next_child_to_visit < cur.node->children.size()) @@ -1602,7 +1625,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere if (visited_nodes.count(child) == 0) { visited_nodes.insert(child); - stack.push({.node = child, .is_predicate = is_conjunction}); + stack.push({.node = child}); break; } @@ -1619,8 +1642,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere allowed_nodes.emplace(cur.node); } - /// Add parts of AND to result. Do not add function AND. - if (cur.is_predicate && ! is_conjunction) + if (predicates.count(cur.node)) { if (allowed_nodes.count(cur.node)) { @@ -1639,6 +1661,13 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere } } + // std::cerr << "Allowed " << conjunction.allowed.size() << std::endl; + // for (const auto & node : conjunction.allowed) + // std::cerr << node->result_name << std::endl; + // std::cerr << "Rejected " << conjunction.rejected.size() << std::endl; + // for (const auto & node : conjunction.rejected) + // std::cerr << node->result_name << std::endl; + return conjunction; } diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference index 66ea84a07c11..6917117b3e22 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference @@ -4,3 +4,4 @@ [[1]] 2 String1_0 String2_0 String3_0 String4_0 1 String1_0 String2_0 String3_0 String4_0 1 +1 [0,1,2] diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql index 5000eb388786..b13282e6dca9 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql @@ -9,7 +9,7 @@ CREATE TABLE Test ENGINE = MergeTree() PRIMARY KEY (String1,String2) ORDER BY (String1,String2) -AS +AS SELECT 'String1_' || toString(number) as String1, 'String2_' || toString(number) as String2, @@ -35,3 +35,5 @@ FROM WHERE String4 ='String4_0'; DROP TABLE IF EXISTS Test; + +select x, y from (select [0, 1, 2] as y, 1 as a, 2 as b) array join y as x where a = 1 and b = 2 and (x = 1 or x != 1) and x = 1; From 635334057a05a8fa918d8c397d4dea1ca4e5a3ba Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 5 Oct 2021 16:06:41 +0300 Subject: [PATCH 098/472] Backport #28637 to 21.9: Fix materialized column as sharding key --- src/Storages/Distributed/DistributedSink.cpp | 67 ++++++---- src/Storages/Distributed/DistributedSink.h | 10 +- src/Storages/StorageDistributed.cpp | 30 +---- .../__init__.py | 0 .../configs/test_cluster.xml | 18 +++ .../test.py | 116 ++++++++++++++++++ 6 files changed, 190 insertions(+), 51 deletions(-) create mode 100644 tests/integration/test_sharding_key_from_default_column/__init__.py create mode 100644 tests/integration/test_sharding_key_from_default_column/configs/test_cluster.xml create mode 100644 tests/integration/test_sharding_key_from_default_column/test.py diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index ec3f82d914c6..de328e74837e 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include @@ -86,26 +88,40 @@ static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & bl } +static ASTPtr createInsertToRemoteTableQuery(const std::string & database, const std::string & table, const Names & column_names) +{ + auto query = std::make_shared(); + query->table_id = StorageID(database, table); + auto columns = std::make_shared(); + query->columns = columns; + query->children.push_back(columns); + for (const auto & column_name : column_names) + columns->children.push_back(std::make_shared(column_name)); + return query; +} + + DistributedSink::DistributedSink( ContextPtr context_, StorageDistributed & storage_, const StorageMetadataPtr & metadata_snapshot_, - const ASTPtr & query_ast_, const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_, - StorageID main_table_) + StorageID main_table_, + const Names & columns_to_send_) : SinkToStorage(metadata_snapshot_->getSampleBlock()) , context(Context::createCopy(context_)) , storage(storage_) , metadata_snapshot(metadata_snapshot_) - , query_ast(query_ast_) - , query_string(queryToString(query_ast_)) + , query_ast(createInsertToRemoteTableQuery(main_table_.database_name, main_table_.table_name, columns_to_send_)) + , query_string(queryToString(query_ast)) , cluster(cluster_) , insert_sync(insert_sync_) , allow_materialized(context->getSettingsRef().insert_allow_materialized_columns) , insert_timeout(insert_timeout_) , main_table(main_table_) + , columns_to_send(columns_to_send_.begin(), columns_to_send_.end()) , log(&Poco::Logger::get("DistributedBlockOutputStream")) { const auto & settings = context->getSettingsRef(); @@ -126,27 +142,25 @@ void DistributedSink::consume(Chunk chunk) auto ordinary_block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()); - if (!allow_materialized) - { - /* They are added by the AddingDefaultBlockOutputStream, and we will get - * different number of columns eventually */ - for (const auto & col : metadata_snapshot->getColumns().getMaterialized()) - { - if (ordinary_block.has(col.name)) - { - ordinary_block.erase(col.name); - LOG_DEBUG(log, "{}: column {} will be removed, because it is MATERIALIZED", - storage.getStorageID().getNameForLogs(), col.name); - } - } - } - if (insert_sync) writeSync(ordinary_block); else writeAsync(ordinary_block); } + +Block DistributedSink::removeSuperfluousColumns(Block block) const +{ + for (size_t i = block.columns(); i;) + { + --i; + if (!columns_to_send.contains(block.getByPosition(i).name)) + block.erase(i); + } + return block; +} + + void DistributedSink::writeAsync(const Block & block) { if (random_shard_insert) @@ -399,6 +413,8 @@ void DistributedSink::writeSync(const Block & block) { const Settings & settings = context->getSettingsRef(); const auto & shards_info = cluster->getShardsInfo(); + Block block_to_send = removeSuperfluousColumns(block); + size_t start = 0; size_t end = shards_info.size(); @@ -411,7 +427,7 @@ void DistributedSink::writeSync(const Block & block) if (!pool) { /// Deferred initialization. Only for sync insertion. - initWritingJobs(block, start, end); + initWritingJobs(block_to_send, start, end); size_t jobs_count = random_shard_insert ? 1 : (remote_jobs_count + local_jobs_count); size_t max_threads = std::min(settings.max_distributed_connections, jobs_count); @@ -456,7 +472,7 @@ void DistributedSink::writeSync(const Block & block) finished_jobs_count = 0; for (size_t shard_index : collections::range(start, end)) for (JobReplica & job : per_shard_jobs[shard_index].replicas_jobs) - pool->scheduleOrThrowOnError(runWritingJob(job, block, num_shards)); + pool->scheduleOrThrowOnError(runWritingJob(job, block_to_send, num_shards)); } catch (...) { @@ -581,12 +597,13 @@ void DistributedSink::writeAsyncImpl(const Block & block, size_t shard_id) { const auto & shard_info = cluster->getShardsInfo()[shard_id]; const auto & settings = context->getSettingsRef(); + Block block_to_send = removeSuperfluousColumns(block); if (shard_info.hasInternalReplication()) { if (shard_info.isLocal() && settings.prefer_localhost_replica) /// Prefer insert into current instance directly - writeToLocal(block, shard_info.getLocalNodeCount()); + writeToLocal(block_to_send, shard_info.getLocalNodeCount()); else { const auto & path = shard_info.insertPathForInternalReplication( @@ -594,13 +611,13 @@ void DistributedSink::writeAsyncImpl(const Block & block, size_t shard_id) settings.use_compact_format_in_distributed_parts_names); if (path.empty()) throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); - writeToShard(block, {path}); + writeToShard(block_to_send, {path}); } } else { if (shard_info.isLocal() && settings.prefer_localhost_replica) - writeToLocal(block, shard_info.getLocalNodeCount()); + writeToLocal(block_to_send, shard_info.getLocalNodeCount()); std::vector dir_names; for (const auto & address : cluster->getShardsAddresses()[shard_id]) @@ -608,7 +625,7 @@ void DistributedSink::writeAsyncImpl(const Block & block, size_t shard_id) dir_names.push_back(address.toFullString(settings.use_compact_format_in_distributed_parts_names)); if (!dir_names.empty()) - writeToShard(block, dir_names); + writeToShard(block_to_send, dir_names); } } diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index af04f8c8aacc..a57f93b70a05 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -41,11 +41,11 @@ class DistributedSink : public SinkToStorage ContextPtr context_, StorageDistributed & storage_, const StorageMetadataPtr & metadata_snapshot_, - const ASTPtr & query_ast_, const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_, - StorageID main_table_); + StorageID main_table_, + const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } void consume(Chunk chunk) override; @@ -63,6 +63,9 @@ class DistributedSink : public SinkToStorage void writeAsyncImpl(const Block & block, size_t shard_id = 0); + /// Removes columns which should not be sent to shards. + Block removeSuperfluousColumns(Block block) const; + /// Increments finished_writings_count after each repeat. void writeToLocal(const Block & block, size_t repeats); @@ -82,7 +85,9 @@ class DistributedSink : public SinkToStorage /// Returns the number of blocks was written for each cluster node. Uses during exception handling. std::string getCurrentStateDescription(); + /// Context used for writing to remote tables. ContextMutablePtr context; + StorageDistributed & storage; StorageMetadataPtr metadata_snapshot; ASTPtr query_ast; @@ -100,6 +105,7 @@ class DistributedSink : public SinkToStorage /// Sync-related stuff UInt64 insert_timeout; // in seconds StorageID main_table; + NameSet columns_to_send; Stopwatch watch; Stopwatch watch_current_block; std::optional pool; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1ad80f8aea66..fc005c8d2d64 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -158,23 +158,6 @@ ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, co return modified_query_ast; } -/// The columns list in the original INSERT query is incorrect because inserted blocks are transformed -/// to the form of the sample block of the Distributed table. So we rewrite it and add all columns from -/// the sample block instead. -ASTPtr createInsertToRemoteTableQuery(const std::string & database, const std::string & table, const Block & sample_block) -{ - auto query = std::make_shared(); - query->table_id = StorageID(database, table); - - auto columns = std::make_shared(); - query->columns = columns; - query->children.push_back(columns); - for (const auto & col : sample_block) - columns->children.push_back(std::make_shared(col.name)); - - return query; -} - /// Calculate maximum number in file names in directory and all subdirectories. /// To ensure global order of data blocks yet to be sent across server restarts. UInt64 getMaximumFileNumber(const std::string & dir_path) @@ -681,17 +664,16 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata bool insert_sync = settings.insert_distributed_sync || settings.insert_shard_id || owned_cluster; auto timeout = settings.insert_distributed_timeout; - Block sample_block; - if (!settings.insert_allow_materialized_columns) - sample_block = metadata_snapshot->getSampleBlockNonMaterialized(); + Names columns_to_send; + if (settings.insert_allow_materialized_columns) + columns_to_send = metadata_snapshot->getSampleBlock().getNames(); else - sample_block = metadata_snapshot->getSampleBlock(); + columns_to_send = metadata_snapshot->getSampleBlockNonMaterialized().getNames(); /// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster return std::make_shared( - local_context, *this, metadata_snapshot, - createInsertToRemoteTableQuery(remote_database, remote_table, sample_block), - cluster, insert_sync, timeout, StorageID{remote_database, remote_table}); + local_context, *this, metadata_snapshot, cluster, insert_sync, timeout, + StorageID{remote_database, remote_table}, columns_to_send); } diff --git a/tests/integration/test_sharding_key_from_default_column/__init__.py b/tests/integration/test_sharding_key_from_default_column/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_sharding_key_from_default_column/configs/test_cluster.xml b/tests/integration/test_sharding_key_from_default_column/configs/test_cluster.xml new file mode 100644 index 000000000000..0437e047fadc --- /dev/null +++ b/tests/integration/test_sharding_key_from_default_column/configs/test_cluster.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_sharding_key_from_default_column/test.py b/tests/integration/test_sharding_key_from_default_column/test.py new file mode 100644 index 000000000000..1717a1ee14a5 --- /dev/null +++ b/tests/integration/test_sharding_key_from_default_column/test.py @@ -0,0 +1,116 @@ +import pytest +import itertools +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', main_configs=['configs/test_cluster.xml'], with_zookeeper=True) +node2 = cluster.add_instance('node2', main_configs=['configs/test_cluster.xml'], with_zookeeper=True) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + node1.query("DROP TABLE IF EXISTS dist ON CLUSTER 'test_cluster'") + node1.query("DROP TABLE IF EXISTS local ON CLUSTER 'test_cluster'") + + +# A default column is used in the sharding key expression. +def test_default_column(): + node1.query("CREATE TABLE dist ON CLUSTER 'test_cluster' (x Int32, y Int32 DEFAULT x + 100, z Int32 DEFAULT x + y) ENGINE = Distributed('test_cluster', currentDatabase(), local, y)") + node1.query("CREATE TABLE local ON CLUSTER 'test_cluster' (x Int32, y Int32 DEFAULT x + 200, z Int32 DEFAULT x - y) ENGINE = MergeTree() ORDER BY y") + + for insert_sync in [0, 1]: + settings = {'insert_distributed_sync': insert_sync} + + # INSERT INTO TABLE dist (x) + node1.query("TRUNCATE TABLE local ON CLUSTER 'test_cluster'") + node1.query("INSERT INTO TABLE dist (x) VALUES (1), (2), (3), (4)", settings=settings) + node1.query("SYSTEM FLUSH DISTRIBUTED dist") + assert node1.query("SELECT x, y, z FROM local") == TSV([[2, 102, 104], [4, 104, 108]]) + assert node2.query("SELECT x, y, z FROM local") == TSV([[1, 101, 102], [3, 103, 106]]) + assert node1.query("SELECT x, y, z FROM dist") == TSV([[2, 102, 104], [4, 104, 108], [1, 101, 102], [3, 103, 106]]) + + # INSERT INTO TABLE dist (x, y) + node1.query("TRUNCATE TABLE local ON CLUSTER 'test_cluster'") + node1.query("INSERT INTO TABLE dist (x, y) VALUES (1, 11), (2, 22), (3, 33)", settings=settings) + node1.query("SYSTEM FLUSH DISTRIBUTED dist") + assert node1.query("SELECT x, y, z FROM local") == TSV([[2, 22, 24]]) + assert node2.query("SELECT x, y, z FROM local") == TSV([[1, 11, 12], [3, 33, 36]]) + assert node1.query("SELECT x, y, z FROM dist") == TSV([[2, 22, 24], [1, 11, 12], [3, 33, 36]]) + + +# A materialized column is used in the sharding key expression and `insert_allow_materialized_columns` set to 1. +def test_materialized_column_allow_insert_materialized(): + node1.query("CREATE TABLE dist ON CLUSTER 'test_cluster' (x Int32, y Int32 MATERIALIZED x + 100, z Int32 MATERIALIZED x + y) ENGINE = Distributed('test_cluster', currentDatabase(), local, y)") + node1.query("CREATE TABLE local ON CLUSTER 'test_cluster' (x Int32, y Int32 MATERIALIZED x + 200, z Int32 MATERIALIZED x - y) ENGINE = MergeTree() ORDER BY y") + + for insert_sync in [0, 1]: + settings = {'insert_distributed_sync': insert_sync, 'insert_allow_materialized_columns': 1} + + # INSERT INTO TABLE dist (x) + node1.query("TRUNCATE TABLE local ON CLUSTER 'test_cluster'") + node1.query("INSERT INTO TABLE dist (x) VALUES (1), (2), (3), (4)", settings=settings) + node1.query("SYSTEM FLUSH DISTRIBUTED dist") + assert node1.query("SELECT x, y, z FROM local") == TSV([[2, 102, 104], [4, 104, 108]]) + assert node2.query("SELECT x, y, z FROM local") == TSV([[1, 101, 102], [3, 103, 106]]) + assert node1.query("SELECT x, y, z FROM dist") == TSV([[2, 102, 104], [4, 104, 108], [1, 101, 102], [3, 103, 106]]) + + # INSERT INTO TABLE dist (x, y) + node1.query("TRUNCATE TABLE local ON CLUSTER 'test_cluster'") + node1.query("INSERT INTO TABLE dist (x, y) VALUES (1, 11), (2, 22), (3, 33)", settings=settings) + node1.query("SYSTEM FLUSH DISTRIBUTED dist") + assert node1.query("SELECT x, y, z FROM local") == TSV([[2, 22, 24]]) + assert node2.query("SELECT x, y, z FROM local") == TSV([[1, 11, 12], [3, 33, 36]]) + assert node1.query("SELECT x, y, z FROM dist") == TSV([[2, 22, 24], [1, 11, 12], [3, 33, 36]]) + + +# A materialized column is used in the sharding key expression and `insert_allow_materialized_columns` set to 0. +def test_materialized_column_disallow_insert_materialized(): + node1.query("CREATE TABLE dist ON CLUSTER 'test_cluster' (x Int32, y Int32 MATERIALIZED x + 100, z Int32 MATERIALIZED x + y) ENGINE = Distributed('test_cluster', currentDatabase(), local, y)") + node1.query("CREATE TABLE local ON CLUSTER 'test_cluster' (x Int32, y Int32 MATERIALIZED x + 200, z Int32 MATERIALIZED x - y) ENGINE = MergeTree() ORDER BY y") + + for insert_sync in [0, 1]: + settings = {'insert_distributed_sync': insert_sync, 'insert_allow_materialized_columns': 0} + + # INSERT INTO TABLE dist (x) + node1.query("TRUNCATE TABLE local ON CLUSTER 'test_cluster'") + node1.query("INSERT INTO TABLE dist (x) VALUES (1), (2), (3), (4)", settings=settings) + node1.query("SYSTEM FLUSH DISTRIBUTED dist") + assert node1.query("SELECT x, y, z FROM local") == TSV([[2, 202, -200], [4, 204, -200]]) + assert node2.query("SELECT x, y, z FROM local") == TSV([[1, 201, -200], [3, 203, -200]]) + assert node1.query("SELECT x, y, z FROM dist") == TSV([[2, 202, -200], [4, 204, -200], [1, 201, -200], [3, 203, -200]]) + + # INSERT INTO TABLE dist (x, y) + node1.query("TRUNCATE TABLE local ON CLUSTER 'test_cluster'") + expected_error = "Cannot insert column y, because it is MATERIALIZED column" + assert expected_error in node1.query_and_get_error("INSERT INTO TABLE dist (x, y) VALUES (1, 11), (2, 22), (3, 33)", settings=settings) + + +# Almost the same as the previous test `test_materialized_column_disallow_insert_materialized`, but the sharding key has different values. +def test_materialized_column_disallow_insert_materialized_different_shards(): + node1.query("CREATE TABLE dist ON CLUSTER 'test_cluster' (x Int32, y Int32 MATERIALIZED x + 101, z Int32 MATERIALIZED x + y) ENGINE = Distributed('test_cluster', currentDatabase(), local, y)") + node1.query("CREATE TABLE local ON CLUSTER 'test_cluster' (x Int32, y Int32 MATERIALIZED x + 200, z Int32 MATERIALIZED x - y) ENGINE = MergeTree() ORDER BY y") + + for insert_sync in [0, 1]: + settings = {'insert_distributed_sync': insert_sync, 'insert_allow_materialized_columns': 0} + + # INSERT INTO TABLE dist (x) + node1.query("TRUNCATE TABLE local ON CLUSTER 'test_cluster'") + node1.query("INSERT INTO TABLE dist (x) VALUES (1), (2), (3), (4)", settings=settings) + node1.query("SYSTEM FLUSH DISTRIBUTED dist") + assert node1.query("SELECT x, y, z FROM local") == TSV([[1, 201, -200], [3, 203, -200]]) + assert node2.query("SELECT x, y, z FROM local") == TSV([[2, 202, -200], [4, 204, -200]]) + assert node1.query("SELECT x, y, z FROM dist") == TSV([[1, 201, -200], [3, 203, -200], [2, 202, -200], [4, 204, -200]]) From 0406363b45396bb8f96b4373c1a09f63fa956be0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 6 Oct 2021 22:14:49 +0300 Subject: [PATCH 099/472] Backport #29060 to 21.9: Do not allow to reuse previous credentials in case of inter-server secret --- src/Server/TCPHandler.cpp | 21 ++++- tests/integration/helpers/client.py | 38 ++++++-- tests/integration/helpers/cluster.py | 24 ++++- .../configs/remote_servers.xml | 2 + .../configs/users.xml | 13 +++ .../test.py | 91 +++++++++++++++---- 6 files changed, 153 insertions(+), 36 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 812c081a6462..02bdd9721d4a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1161,6 +1161,17 @@ void TCPHandler::receiveQuery() state.is_empty = false; readStringBinary(state.query_id, *in); + /// In interserer mode, + /// initial_user can be empty in case of Distributed INSERT via Buffer/Kafka, + /// (i.e. when the INSERT is done with the global context w/o user), + /// so it is better to reset session to avoid using old user. + if (is_interserver_mode) + { + ClientInfo original_session_client_info = session->getClientInfo(); + session = std::make_unique(server.context(), ClientInfo::Interface::TCP_INTERSERVER); + session->getClientInfo() = original_session_client_info; + } + /// Read client info. ClientInfo client_info = session->getClientInfo(); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_CLIENT_INFO) @@ -1208,11 +1219,13 @@ void TCPHandler::receiveQuery() throw NetException("Hash mismatch", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); /// TODO: change error code? - /// initial_user can be empty in case of Distributed INSERT via Buffer/Kafka, - /// i.e. when the INSERT is done with the global context (w/o user). - if (!client_info.initial_user.empty()) + if (client_info.initial_user.empty()) + { + LOG_DEBUG(log, "User (no user, interserver mode)"); + } + else { - LOG_DEBUG(log, "User (initial): {}", client_info.initial_user); + LOG_DEBUG(log, "User (initial, interserver mode): {}", client_info.initial_user); session->authenticate(AlwaysAllowCredentials{client_info.initial_user}, client_info.initial_address); } #else diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index ceebf3c23bf3..b0e764bf174f 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -16,13 +16,34 @@ def __init__(self, host, port=9000, command='/usr/bin/clickhouse-client'): self.command += ['--host', self.host, '--port', str(self.port), '--stacktrace'] - def query(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None, - ignore_error=False): - return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, - password=password, database=database, ignore_error=ignore_error).get_answer() - - def get_query_request(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None, - ignore_error=False): + def query(self, sql, + stdin=None, + timeout=None, + settings=None, + user=None, + password=None, + database=None, + ignore_error=False, + query_id=None): + return self.get_query_request(sql, + stdin=stdin, + timeout=timeout, + settings=settings, + user=user, + password=password, + database=database, + ignore_error=ignore_error, + query_id=query_id).get_answer() + + def get_query_request(self, sql, + stdin=None, + timeout=None, + settings=None, + user=None, + password=None, + database=None, + ignore_error=False, + query_id=None): command = self.command[:] if stdin is None: @@ -44,6 +65,9 @@ def get_query_request(self, sql, stdin=None, timeout=None, settings=None, user=N if database is not None: command += ['--database', database] + if query_id is not None: + command += ['--query_id', query_id] + return CommandRequest(command, stdin, timeout, ignore_error) def query_and_get_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 6fe01b5df034..3316c94abca0 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1846,11 +1846,25 @@ def is_built_with_memory_sanitizer(self): return self.is_built_with_sanitizer('memory') # Connects to the instance via clickhouse-client, sends a query (1st argument) and returns the answer - def query(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None, - ignore_error=False): - logging.debug(f"Executing query {sql} on {self.name}") - return self.client.query(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, password=password, - database=database, ignore_error=ignore_error) + def query(self, sql, + stdin=None, + timeout=None, + settings=None, + user=None, + password=None, + database=None, + ignore_error=False, + query_id=None): + logging.debug("Executing query %s on %s", sql, self.name) + return self.client.query(sql, + stdin=stdin, + timeout=timeout, + settings=settings, + user=user, + password=password, + database=database, + ignore_error=ignore_error, + query_id=query_id) def query_with_retry(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None, database=None, ignore_error=False, diff --git a/tests/integration/test_distributed_inter_server_secret/configs/remote_servers.xml b/tests/integration/test_distributed_inter_server_secret/configs/remote_servers.xml index 0ff521ac8002..f0f7007b567a 100644 --- a/tests/integration/test_distributed_inter_server_secret/configs/remote_servers.xml +++ b/tests/integration/test_distributed_inter_server_secret/configs/remote_servers.xml @@ -20,6 +20,8 @@ n2 9000 + + 1 diff --git a/tests/integration/test_distributed_inter_server_secret/configs/users.xml b/tests/integration/test_distributed_inter_server_secret/configs/users.xml index 1b012bfea9c4..2ab79bd24e99 100644 --- a/tests/integration/test_distributed_inter_server_secret/configs/users.xml +++ b/tests/integration/test_distributed_inter_server_secret/configs/users.xml @@ -3,6 +3,10 @@ + + + 1 + @@ -32,6 +36,15 @@ default default + + + + + ::/0 + + ro + default + diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 0e77fc6e1627..73d338ba8706 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -3,6 +3,8 @@ # pylint: disable=line-too-long import pytest +import uuid +import time from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster @@ -27,8 +29,10 @@ def make_instance(name, cfg): def bootstrap(): for n in list(cluster.instances.values()): n.query('DROP TABLE IF EXISTS data') + n.query('DROP TABLE IF EXISTS data_from_buffer') n.query('DROP TABLE IF EXISTS dist') n.query('CREATE TABLE data (key Int) Engine=Memory()') + n.query('CREATE TABLE data_from_buffer (key Int) Engine=Memory()') n.query(""" CREATE TABLE dist_insecure AS data Engine=Distributed(insecure, currentDatabase(), data, key) @@ -38,20 +42,24 @@ def bootstrap(): Engine=Distributed(secure, currentDatabase(), data, key) """) n.query(""" + CREATE TABLE dist_secure_from_buffer AS data_from_buffer + Engine=Distributed(secure, currentDatabase(), data_from_buffer, key) + """) + n.query(""" CREATE TABLE dist_secure_disagree AS data Engine=Distributed(secure_disagree, currentDatabase(), data, key) """) n.query(""" - CREATE TABLE dist_secure_buffer AS dist_secure - Engine=Buffer(currentDatabase(), dist_secure, + CREATE TABLE dist_secure_buffer AS dist_secure_from_buffer + Engine=Buffer(currentDatabase(), dist_secure_from_buffer, /* settings for manual flush only */ - 1, /* num_layers */ - 10e6, /* min_time, placeholder */ - 10e6, /* max_time, placeholder */ - 0, /* min_rows */ - 10e6, /* max_rows */ - 0, /* min_bytes */ - 80e6 /* max_bytes */ + 1, /* num_layers */ + 0, /* min_time, placeholder */ + 0, /* max_time, placeholder */ + 0, /* min_rows */ + 0, /* max_rows */ + 0, /* min_bytes */ + 0 /* max_bytes */ ) """) @@ -129,17 +137,62 @@ def test_secure_insert_sync(): # # Buffer() flush happens with global context, that does not have user # And so Context::user/ClientInfo::current_user/ClientInfo::initial_user will be empty +# +# This is the regression test for the subsequent query that it +# will not use user from the previous query. +# +# The test a little bit complex, but I will try to explain: +# - first, we need to execute query with the readonly user (regualar SELECT), +# and then we will execute INSERT, and if the bug is there, then INSERT will +# use the user from SELECT and will fail (since you cannot do INSERT with +# readonly=1/2) +# +# - the trick with generating random priority (via sed) is to avoid reusing +# connection from n1 to n2 from another test (and we cannot simply use +# another pool after ConnectionPoolFactory had been added [1]. +# +# [1]: https://github.com/ClickHouse/ClickHouse/pull/26318 +# +# We need at least one change in one of fields of the node/shard definition, +# and this "priorirty" for us in this test. +# +# - after we will ensure that connection is really established from the context +# of SELECT query, and that the connection will not be established from the +# context of the INSERT query (but actually it is a no-op since the INSERT +# will be done in background, due to insert_distributed_sync=false by +# default) +# +# - if the bug is there, then FLUSH DISTRIBUTED will fail, because it will go +# from n1 to n2 using previous user. +# +# I hope that this will clarify something for the reader. def test_secure_insert_buffer_async(): - n1.query("TRUNCATE TABLE data") - n1.query('INSERT INTO dist_secure_buffer SELECT * FROM numbers(2)') - n1.query('SYSTEM FLUSH DISTRIBUTED ON CLUSTER secure dist_secure') - # no Buffer flush happened - assert int(n1.query('SELECT count() FROM dist_secure')) == 0 + # Change cluster definition so that the SELECT will always creates new connection + priority = int(time.time()) + n1.exec_in_container(['bash', '-c', f'sed -i "s#.*#{priority}#" /etc/clickhouse-server/config.d/remote_servers.xml']) + n1.query('SYSTEM RELOAD CONFIG') + # ensure that SELECT creates new connection (we need separate table for + # this, so that separate distributed pool will be used) + query_id = uuid.uuid4().hex + n1.query('SELECT * FROM dist_secure_from_buffer', user='ro', query_id=query_id) + assert n1.contains_in_log('{' + query_id + '} Connection (n2:9000): Connecting.') + + query_id = uuid.uuid4().hex + n1.query('INSERT INTO dist_secure_buffer SELECT * FROM numbers(2)', query_id=query_id) + # ensure that INSERT does not creates new connection, so that it will use + # previous connection that was instantiated with "ro" user (using + # interserver secret) + assert not n1.contains_in_log('{' + query_id + '} Connection (n2:9000): Connecting.') + + # And before the bug was fixed this query will fail with the following error: + # + # Code: 164. DB::Exception: Received from 172.16.2.5:9000. DB::Exception: There was an error on [n1:9000]: Code: 164. DB::Exception: Received from n2:9000. DB::Exception: ro: Cannot execute query in readonly mode. (READONLY) + n1.query('SYSTEM FLUSH DISTRIBUTED ON CLUSTER secure dist_secure_from_buffer') n1.query('OPTIMIZE TABLE dist_secure_buffer') - # manual flush - n1.query('SYSTEM FLUSH DISTRIBUTED ON CLUSTER secure dist_secure') - assert int(n1.query('SELECT count() FROM dist_secure')) == 2 - n1.query('TRUNCATE TABLE data ON CLUSTER secure') + n1.query('SYSTEM FLUSH DISTRIBUTED ON CLUSTER secure dist_secure_from_buffer') + + assert int(n1.query('SELECT count() FROM dist_secure_from_buffer')) == 2 + n1.query('TRUNCATE TABLE data_from_buffer ON CLUSTER secure') def test_secure_disagree(): with pytest.raises(QueryRuntimeException, match='.*Hash mismatch.*'): @@ -209,5 +262,3 @@ def test_per_user_protocol_settings_secure_cluster(user, password): 'max_untracked_memory': 0, }) assert int(get_query_setting_on_shard(n1, id_, 'max_memory_usage_for_user')) == int(1e9) - -# TODO: check user for INSERT From 430493896e4541c212aca99558d1c1af9c389d08 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 7 Oct 2021 20:21:58 +0300 Subject: [PATCH 100/472] Backport #29782 to 21.9: Fix concurrent access to LowCardinality during GROUP BY (leads to SIGSEGV) --- src/Columns/ReverseIndex.h | 12 ++++++---- ...ow_cardinality_parallel_group_by.reference | 0 ...02046_low_cardinality_parallel_group_by.sh | 23 +++++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02046_low_cardinality_parallel_group_by.reference create mode 100755 tests/queries/0_stateless/02046_low_cardinality_parallel_group_by.sh diff --git a/src/Columns/ReverseIndex.h b/src/Columns/ReverseIndex.h index 1db42fcd6a1d..3890ce7872a9 100644 --- a/src/Columns/ReverseIndex.h +++ b/src/Columns/ReverseIndex.h @@ -317,7 +317,7 @@ class ReverseIndex { public: ReverseIndex(UInt64 num_prefix_rows_to_skip_, UInt64 base_index_) - : num_prefix_rows_to_skip(num_prefix_rows_to_skip_), base_index(base_index_), saved_hash_ptr(nullptr) {} + : num_prefix_rows_to_skip(num_prefix_rows_to_skip_), base_index(base_index_), external_saved_hash_ptr(nullptr) {} void setColumn(ColumnType * column_); @@ -352,14 +352,14 @@ class ReverseIndex if (!use_saved_hash) return nullptr; - UInt64 * ptr = saved_hash_ptr.load(); + UInt64 * ptr = external_saved_hash_ptr.load(); if (!ptr) { auto hash = calcHashes(); ptr = &hash->getData()[0]; UInt64 * expected = nullptr; - if (saved_hash_ptr.compare_exchange_strong(expected, ptr)) - saved_hash = std::move(hash); + if (external_saved_hash_ptr.compare_exchange_strong(expected, ptr)) + external_saved_hash = std::move(hash); else ptr = expected; } @@ -379,7 +379,9 @@ class ReverseIndex /// Lazy initialized. std::unique_ptr index; mutable ColumnUInt64::MutablePtr saved_hash; - mutable std::atomic saved_hash_ptr; + /// For usage during GROUP BY + mutable ColumnUInt64::MutablePtr external_saved_hash; + mutable std::atomic external_saved_hash_ptr; void buildIndex(); diff --git a/tests/queries/0_stateless/02046_low_cardinality_parallel_group_by.reference b/tests/queries/0_stateless/02046_low_cardinality_parallel_group_by.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02046_low_cardinality_parallel_group_by.sh b/tests/queries/0_stateless/02046_low_cardinality_parallel_group_by.sh new file mode 100755 index 000000000000..c2ae622e6a8e --- /dev/null +++ b/tests/queries/0_stateless/02046_low_cardinality_parallel_group_by.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# This is the regression test for parallel usage of LowCardinality column +# via Buffer engine. +# +# See also: +# - https://github.com/ClickHouse/ClickHouse/issues/24158 +# - https://github.com/ClickHouse/ClickHouse/pull/3138 + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS low_card_buffer_test" +$CLICKHOUSE_CLIENT -q "CREATE TABLE low_card_buffer_test (test_text LowCardinality(String)) ENGINE=Buffer('', '', 16, 60, 360, 100, 1000, 10000, 100000)" + +$CLICKHOUSE_BENCHMARK -d 0 -i 1000 -c 5 <<<"SELECT count() FROM low_card_buffer_test GROUP BY test_text format Null" 2>/dev/null & +$CLICKHOUSE_BENCHMARK -d 0 -i 1000 -c 2 <<<"INSERT INTO low_card_buffer_test values('TEST1')" 2>/dev/null & +wait + +# server is alive +$CLICKHOUSE_CLIENT -q "SELECT 1 FORMAT Null" From 7d403fd7c96a113f4f80cce72ede5f822721e2e0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 8 Oct 2021 00:21:12 +0300 Subject: [PATCH 101/472] Backport #29811 to 21.9: Fix overflow in Stopwatch --- src/Common/Stopwatch.cpp | 19 -------- src/Common/Stopwatch.h | 100 ++++++++++----------------------------- src/Common/Throttler.cpp | 2 +- 3 files changed, 27 insertions(+), 94 deletions(-) delete mode 100644 src/Common/Stopwatch.cpp diff --git a/src/Common/Stopwatch.cpp b/src/Common/Stopwatch.cpp deleted file mode 100644 index b17e343f1afb..000000000000 --- a/src/Common/Stopwatch.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include "Stopwatch.h" - -StopwatchRUsage::Timestamp StopwatchRUsage::Timestamp::current() -{ - StopwatchRUsage::Timestamp res; - - ::rusage rusage {}; -#if !defined(__APPLE__) -#if defined(OS_SUNOS) - ::getrusage(RUSAGE_LWP, &rusage); -#else - ::getrusage(RUSAGE_THREAD, &rusage); -#endif // OS_SUNOS -#endif // __APPLE__ - res.user_ns = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL; - res.sys_ns = rusage.ru_stime.tv_sec * 1000000000UL + rusage.ru_stime.tv_usec * 1000UL; - return res; -} diff --git a/src/Common/Stopwatch.h b/src/Common/Stopwatch.h index a7f5e76d5be3..d2c5b1104c43 100644 --- a/src/Common/Stopwatch.h +++ b/src/Common/Stopwatch.h @@ -1,8 +1,10 @@ #pragma once +#include #include #include +#include #include @@ -13,6 +15,20 @@ inline UInt64 clock_gettime_ns(clockid_t clock_type = CLOCK_MONOTONIC) return UInt64(ts.tv_sec * 1000000000LL + ts.tv_nsec); } +/// Sometimes monotonic clock may not be monotonic (due to bug in kernel?). +/// It may cause some operations to fail with "Timeout exceeded: elapsed 18446744073.709553 seconds". +/// Takes previously returned value and returns it again if time stepped back for some reason. +inline UInt64 clock_gettime_ns_adjusted(UInt64 prev_time, clockid_t clock_type = CLOCK_MONOTONIC) +{ + UInt64 current_time = clock_gettime_ns(clock_type); + if (likely(prev_time <= current_time)) + return current_time; + + /// Something probably went completely wrong if time stepped back for more than 1 second. + assert(prev_time - current_time <= 1000000000ULL); + return prev_time; +} + /** Differs from Poco::Stopwatch only by using 'clock_gettime' instead of 'gettimeofday', * returns nanoseconds instead of microseconds, and also by other minor differencies. */ @@ -40,7 +56,7 @@ class Stopwatch clockid_t clock_type; bool is_running = false; - UInt64 nanoseconds() const { return clock_gettime_ns(clock_type); } + UInt64 nanoseconds() const { return clock_gettime_ns_adjusted(start_ns, clock_type); } }; @@ -49,8 +65,12 @@ class AtomicStopwatch public: AtomicStopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) : clock_type(clock_type_) { restart(); } - void restart() { start_ns = nanoseconds(); } - UInt64 elapsed() const { return nanoseconds() - start_ns; } + void restart() { start_ns = nanoseconds(0); } + UInt64 elapsed() const + { + UInt64 current_start_ns = start_ns; + return nanoseconds(current_start_ns) - current_start_ns; + } UInt64 elapsedMilliseconds() const { return elapsed() / 1000000UL; } double elapsedSeconds() const { return static_cast(elapsed()) / 1000000000ULL; } @@ -61,8 +81,8 @@ class AtomicStopwatch bool compareAndRestart(double seconds) { UInt64 threshold = static_cast(seconds * 1000000000.0); - UInt64 current_ns = nanoseconds(); UInt64 current_start_ns = start_ns; + UInt64 current_ns = nanoseconds(current_start_ns); while (true) { @@ -105,8 +125,8 @@ class AtomicStopwatch Lock compareAndRestartDeferred(double seconds) { UInt64 threshold = UInt64(seconds * 1000000000.0); - UInt64 current_ns = nanoseconds(); UInt64 current_start_ns = start_ns; + UInt64 current_ns = nanoseconds(current_start_ns); while (true) { @@ -127,74 +147,6 @@ class AtomicStopwatch clockid_t clock_type; /// Most significant bit is a lock. When it is set, compareAndRestartDeferred method will return false. - UInt64 nanoseconds() const { return clock_gettime_ns(clock_type) & 0x7FFFFFFFFFFFFFFFULL; } + UInt64 nanoseconds(UInt64 prev_time) const { return clock_gettime_ns_adjusted(prev_time, clock_type) & 0x7FFFFFFFFFFFFFFFULL; } }; - -/// Like ordinary StopWatch, but uses getrusage() system call -struct StopwatchRUsage -{ - StopwatchRUsage() = default; - - void start() { start_ts = Timestamp::current(); is_running = true; } - void stop() { stop_ts = Timestamp::current(); is_running = false; } - void reset() { start_ts = Timestamp(); stop_ts = Timestamp(); is_running = false; } - void restart() { start(); } - - UInt64 elapsed(bool count_user = true, bool count_sys = true) const - { - return elapsedNanoseconds(count_user, count_sys); - } - - UInt64 elapsedNanoseconds(bool count_user = true, bool count_sys = true) const - { - return (is_running ? Timestamp::current() : stop_ts).nanoseconds(count_user, count_sys) - start_ts.nanoseconds(count_user, count_sys); - } - - UInt64 elapsedMicroseconds(bool count_user = true, bool count_sys = true) const - { - return elapsedNanoseconds(count_user, count_sys) / 1000UL; - } - - UInt64 elapsedMilliseconds(bool count_user = true, bool count_sys = true) const - { - return elapsedNanoseconds(count_user, count_sys) / 1000000UL; - } - - double elapsedSeconds(bool count_user = true, bool count_sys = true) const - { - return static_cast(elapsedNanoseconds(count_user, count_sys)) / 1000000000.0; - } - -private: - - struct Timestamp - { - UInt64 user_ns = 0; - UInt64 sys_ns = 0; - - static Timestamp current(); - - UInt64 nanoseconds(bool count_user = true, bool count_sys = true) const - { - return (count_user ? user_ns : 0) + (count_sys ? sys_ns : 0); - } - }; - - Timestamp start_ts; - Timestamp stop_ts; - bool is_running = false; -}; - - -template -class StopwatchGuard : public TStopwatch -{ -public: - explicit StopwatchGuard(UInt64 & elapsed_ns_) : elapsed_ns(elapsed_ns_) {} - - ~StopwatchGuard() { elapsed_ns += TStopwatch::elapsedNanoseconds(); } - -private: - UInt64 & elapsed_ns; -}; diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index 3462abfeb54d..fd434922ac29 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -35,7 +35,7 @@ void Throttler::add(size_t amount) { std::lock_guard lock(mutex); - auto now = clock_gettime_ns(); + auto now = clock_gettime_ns_adjusted(prev_ns); /// If prev_ns is equal to zero (first `add` call) we known nothing about speed /// and don't track anything. if (max_speed && prev_ns != 0) From 1d07b1a8d6e0870a6569ea814a99e42e1d0f2e45 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 8 Oct 2021 04:21:54 +0300 Subject: [PATCH 102/472] Backport #29857 to 21.9: Fix system tables recreation check (fails to detect changes in enum values) --- src/Interpreters/SystemLog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index a332245439bc..3abc5a7e42da 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -521,7 +521,7 @@ void SystemLog::prepareTable() auto alias_columns = LogElement::getNamesAndAliases(); auto current_query = InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns); - if (old_query->getTreeHash() != current_query->getTreeHash()) + if (serializeAST(*old_query) != serializeAST(*current_query)) { /// Rename the existing table. int suffix = 0; From c47e3e31b9e990328a19da08cdd660a95faeb388 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 8 Oct 2021 14:32:40 +0300 Subject: [PATCH 103/472] Update Stopwatch.h --- src/Common/Stopwatch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Stopwatch.h b/src/Common/Stopwatch.h index d2c5b1104c43..d2a0d6bc3f6c 100644 --- a/src/Common/Stopwatch.h +++ b/src/Common/Stopwatch.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include From 127276ab5ef5ee8d0f80831a945fe55cb272d85d Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 8 Oct 2021 02:51:48 +0800 Subject: [PATCH 104/472] Fix vertical projection merges. (#29765) (cherry picked from commit a44906672b3139d6c14de1700d51d0c33cfedc20) --- src/Storages/MergeTree/MergeList.h | 2 + .../MergeTree/MergeTreeDataMergerMutator.cpp | 99 ++++++++++--------- .../MergeTree/MergeTreeDataMergerMutator.h | 12 +-- src/Storages/StorageMergeTree.cpp | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 4 +- ...01710_projection_vertical_merges.reference | 0 .../01710_projection_vertical_merges.sql | 19 ++++ 7 files changed, 81 insertions(+), 59 deletions(-) create mode 100644 tests/queries/0_stateless/01710_projection_vertical_merges.reference create mode 100644 tests/queries/0_stateless/01710_projection_vertical_merges.sql diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index 9762e616e7b9..669425f5b56d 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -100,6 +100,8 @@ struct MergeListElement : boost::noncopyable MergeInfo getInfo() const; ~MergeListElement(); + + MergeListElement & ref() { return *this; } }; using MergeListEntry = BackgroundProcessListEntry; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 7296e173ed10..dfe1aacefbd2 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -623,21 +623,21 @@ class MergeProgressCallback { public: MergeProgressCallback( - MergeList::Entry & merge_entry_, UInt64 & watch_prev_elapsed_, MergeStageProgress & stage_) - : merge_entry(merge_entry_) + MergeListElement & merge_list_element_, UInt64 & watch_prev_elapsed_, MergeStageProgress & stage_) + : merge_list_element(merge_list_element_) , watch_prev_elapsed(watch_prev_elapsed_) , stage(stage_) { updateWatch(); } - MergeList::Entry & merge_entry; + MergeListElement & merge_list_element; UInt64 & watch_prev_elapsed; MergeStageProgress & stage; void updateWatch() { - UInt64 watch_curr_elapsed = merge_entry->watch.elapsed(); + UInt64 watch_curr_elapsed = merge_list_element.watch.elapsed(); ProfileEvents::increment(ProfileEvents::MergesTimeMilliseconds, (watch_curr_elapsed - watch_prev_elapsed) / 1000000); watch_prev_elapsed = watch_curr_elapsed; } @@ -652,15 +652,15 @@ class MergeProgressCallback } updateWatch(); - merge_entry->bytes_read_uncompressed += value.read_bytes; + merge_list_element.bytes_read_uncompressed += value.read_bytes; if (stage.is_first) - merge_entry->rows_read += value.read_rows; + merge_list_element.rows_read += value.read_rows; stage.total_rows += value.total_rows_to_read; stage.rows_read += value.read_rows; if (stage.total_rows > 0) { - merge_entry->progress.store( + merge_list_element.progress.store( stage.initial_progress + stage.weight * stage.rows_read / stage.total_rows, std::memory_order_relaxed); } @@ -678,7 +678,7 @@ static bool needSyncPart(size_t input_rows, size_t input_bytes, const MergeTreeS MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( const FutureMergedMutatedPart & future_part, const StorageMetadataPtr & metadata_snapshot, - MergeList::Entry & merge_entry, + MergeListElement & merge_list_element, TableLockHolder & holder, time_t time_of_merge, ContextPtr context, @@ -775,11 +775,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor need_remove_expired_values = false; } - size_t sum_input_rows_upper_bound = merge_entry->total_rows_count; - size_t sum_compressed_bytes_upper_bound = merge_entry->total_size_bytes_compressed; + size_t sum_input_rows_upper_bound = merge_list_element.total_rows_count; + size_t sum_compressed_bytes_upper_bound = merge_list_element.total_size_bytes_compressed; MergeAlgorithm chosen_merge_algorithm = chooseMergeAlgorithm( parts, sum_input_rows_upper_bound, gathering_columns, deduplicate, need_remove_expired_values, merging_params); - merge_entry->merge_algorithm.store(chosen_merge_algorithm, std::memory_order_relaxed); + merge_list_element.merge_algorithm.store(chosen_merge_algorithm, std::memory_order_relaxed); LOG_DEBUG(log, "Selected MergeAlgorithm: {}", toString(chosen_merge_algorithm)); @@ -788,7 +788,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor /// (which is locked in shared mode when input streams are created) and when inserting new data /// the order is reverse. This annoys TSan even though one lock is locked in shared mode and thus /// deadlock is impossible. - auto compression_codec = data.getCompressionCodecForPart(merge_entry->total_size_bytes_compressed, new_data_part->ttl_infos, time_of_merge); + auto compression_codec = data.getCompressionCodecForPart(merge_list_element.total_size_bytes_compressed, new_data_part->ttl_infos, time_of_merge); auto tmp_disk = context->getTemporaryVolume()->getDisk(); std::unique_ptr rows_sources_file; @@ -855,7 +855,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor data, metadata_snapshot, part, merging_column_names, read_with_direct_io, true); input->setProgressCallback( - MergeProgressCallback(merge_entry, watch_prev_elapsed, horizontal_stage_progress)); + MergeProgressCallback(merge_list_element, watch_prev_elapsed, horizontal_stage_progress)); Pipe pipe(std::move(input)); @@ -974,7 +974,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor { return merges_blocker.isCancelled() || (need_remove_expired_values && ttl_merges_blocker.isCancelled()) - || merge_entry->is_cancelled.load(std::memory_order_relaxed); + || merge_list_element.is_cancelled.load(std::memory_order_relaxed); }; Block block; @@ -984,17 +984,17 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor to.write(block); - merge_entry->rows_written = merged_stream->getProfileInfo().rows; - merge_entry->bytes_written_uncompressed = merged_stream->getProfileInfo().bytes; + merge_list_element.rows_written = merged_stream->getProfileInfo().rows; + merge_list_element.bytes_written_uncompressed = merged_stream->getProfileInfo().bytes; /// Reservation updates is not performed yet, during the merge it may lead to higher free space requirements if (space_reservation && sum_input_rows_upper_bound) { - /// The same progress from merge_entry could be used for both algorithms (it should be more accurate) + /// The same progress from merge_list_element could be used for both algorithms (it should be more accurate) /// But now we are using inaccurate row-based estimation in Horizontal case for backward compatibility Float64 progress = (chosen_merge_algorithm == MergeAlgorithm::Horizontal) ? std::min(1., 1. * rows_written / sum_input_rows_upper_bound) - : std::min(1., merge_entry->progress.load(std::memory_order_relaxed)); + : std::min(1., merge_list_element.progress.load(std::memory_order_relaxed)); space_reservation->update(static_cast((1. - progress) * initial_reservation)); } @@ -1015,9 +1015,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor /// Gather ordinary columns if (chosen_merge_algorithm == MergeAlgorithm::Vertical) { - size_t sum_input_rows_exact = merge_entry->rows_read; - merge_entry->columns_written = merging_column_names.size(); - merge_entry->progress.store(column_sizes->keyColumnsWeight(), std::memory_order_relaxed); + size_t sum_input_rows_exact = merge_list_element.rows_read; + merge_list_element.columns_written = merging_column_names.size(); + merge_list_element.progress.store(column_sizes->keyColumnsWeight(), std::memory_order_relaxed); BlockInputStreams column_part_streams(parts.size()); @@ -1046,7 +1046,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor { const String & column_name = it_name_and_type->name; Names column_names{column_name}; - Float64 progress_before = merge_entry->progress.load(std::memory_order_relaxed); + Float64 progress_before = merge_list_element.progress.load(std::memory_order_relaxed); MergeStageProgress column_progress(progress_before, column_sizes->columnWeight(column_name)); for (size_t part_num = 0; part_num < parts.size(); ++part_num) @@ -1055,7 +1055,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor data, metadata_snapshot, parts[part_num], column_names, read_with_direct_io, true); column_part_source->setProgressCallback( - MergeProgressCallback(merge_entry, watch_prev_elapsed, column_progress)); + MergeProgressCallback(merge_list_element, watch_prev_elapsed, column_progress)); QueryPipeline column_part_pipeline; column_part_pipeline.init(Pipe(std::move(column_part_source))); @@ -1104,9 +1104,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor /// NOTE: 'progress' is modified by single thread, but it may be concurrently read from MergeListElement::getInfo() (StorageSystemMerges). - merge_entry->columns_written += 1; - merge_entry->bytes_written_uncompressed += column_gathered_stream.getProfileInfo().bytes; - merge_entry->progress.store(progress_before + column_sizes->columnWeight(column_name), std::memory_order_relaxed); + merge_list_element.columns_written += 1; + merge_list_element.bytes_written_uncompressed += column_gathered_stream.getProfileInfo().bytes; + merge_list_element.progress.store(progress_before + column_sizes->columnWeight(column_name), std::memory_order_relaxed); } } @@ -1115,16 +1115,16 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor /// Print overall profiling info. NOTE: it may duplicates previous messages { - double elapsed_seconds = merge_entry->watch.elapsedSeconds(); + double elapsed_seconds = merge_list_element.watch.elapsedSeconds(); LOG_DEBUG(log, "Merge sorted {} rows, containing {} columns ({} merged, {} gathered) in {} sec., {} rows/sec., {}/sec.", - merge_entry->rows_read, + merge_list_element.rows_read, all_column_names.size(), merging_column_names.size(), gathering_column_names.size(), elapsed_seconds, - merge_entry->rows_read / elapsed_seconds, - ReadableSize(merge_entry->bytes_read_uncompressed / elapsed_seconds)); + merge_list_element.rows_read / elapsed_seconds, + ReadableSize(merge_list_element.bytes_read_uncompressed / elapsed_seconds)); } for (const auto & projection : metadata_snapshot->getProjections()) @@ -1160,11 +1160,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor if (projection.type == ProjectionDescription::Type::Aggregate) projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating; - // TODO Should we use a new merge_entry for projection? + MergeListElement projection_merge_list_element(merge_list_element.table_id, projection_future_part); auto merged_projection_part = mergePartsToTemporaryPart( projection_future_part, projection.metadata, - merge_entry, + projection_merge_list_element, holder, time_of_merge, context, @@ -1189,13 +1189,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor const FutureMergedMutatedPart & future_part, const StorageMetadataPtr & metadata_snapshot, const MutationCommands & commands, - MergeListEntry & merge_entry, + MergeListElement & merge_list_element, time_t time_of_mutation, ContextPtr context, const ReservationPtr & space_reservation, TableLockHolder & holder) { - checkOperationIsNotCanceled(merge_entry); + checkOperationIsNotCanceled(merge_list_element); if (future_part.parts.size() != 1) throw Exception("Trying to mutate " + toString(future_part.parts.size()) + " parts, not one. " @@ -1259,7 +1259,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor mutation_kind = interpreter->getMutationKind(); in = interpreter->execute(); updated_header = interpreter->getUpdatedHeader(); - in->setProgressCallback(MergeProgressCallback(merge_entry, watch_prev_elapsed, stage_progress)); + in->setProgressCallback(MergeProgressCallback(merge_list_element, watch_prev_elapsed, stage_progress)); } auto single_disk_volume = std::make_shared("volume_" + future_part.name, space_reservation->getDisk(), 0); @@ -1316,7 +1316,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor in, time_of_mutation, compression_codec, - merge_entry, + merge_list_element, need_remove_expired_values, need_sync, space_reservation, @@ -1395,7 +1395,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor } } - merge_entry->columns_written = storage_columns.size() - updated_header.columns(); + merge_list_element.columns_written = storage_columns.size() - updated_header.columns(); new_data_part->checksums = source_part->checksums; @@ -1413,7 +1413,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor in, time_of_mutation, compression_codec, - merge_entry, + merge_list_element, need_remove_expired_values, need_sync, space_reservation, @@ -2009,7 +2009,7 @@ void MergeTreeDataMergerMutator::writeWithProjections( BlockInputStreamPtr mutating_stream, IMergedBlockOutputStream & out, time_t time_of_mutation, - MergeListEntry & merge_entry, + MergeListElement & merge_list_element, const ReservationPtr & space_reservation, TableLockHolder & holder, ContextPtr context, @@ -2032,7 +2032,7 @@ void MergeTreeDataMergerMutator::writeWithProjections( projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } - while (checkOperationIsNotCanceled(merge_entry) && (block = mutating_stream->read())) + while (checkOperationIsNotCanceled(merge_list_element) && (block = mutating_stream->read())) { if (minmax_idx) minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); @@ -2048,8 +2048,8 @@ void MergeTreeDataMergerMutator::writeWithProjections( data, log, projection_block, projection, new_data_part.get(), ++block_num)); } - merge_entry->rows_written += block.rows(); - merge_entry->bytes_written_uncompressed += block.bytes(); + merge_list_element.rows_written += block.rows(); + merge_list_element.bytes_written_uncompressed += block.bytes(); } // Write the last block @@ -2134,10 +2134,11 @@ void MergeTreeDataMergerMutator::writeWithProjections( projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating; LOG_DEBUG(log, "Merged {} parts in level {} to {}", selected_parts.size(), current_level, projection_future_part.name); + MergeListElement projection_merge_list_element(merge_list_element.table_id, projection_future_part); next_level_parts.push_back(mergePartsToTemporaryPart( projection_future_part, projection.metadata, - merge_entry, + projection_merge_list_element, holder, time_of_mutation, context, @@ -2162,7 +2163,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( BlockInputStreamPtr mutating_stream, time_t time_of_mutation, const CompressionCodecPtr & compression_codec, - MergeListEntry & merge_entry, + MergeListElement & merge_list_element, bool need_remove_expired_values, bool need_sync, const ReservationPtr & space_reservation, @@ -2198,7 +2199,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( mutating_stream, out, time_of_mutation, - merge_entry, + merge_list_element, space_reservation, holder, context, @@ -2219,7 +2220,7 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns( BlockInputStreamPtr mutating_stream, time_t time_of_mutation, const CompressionCodecPtr & compression_codec, - MergeListEntry & merge_entry, + MergeListElement & merge_list_element, bool need_remove_expired_values, bool need_sync, const ReservationPtr & space_reservation, @@ -2255,7 +2256,7 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns( mutating_stream, out, time_of_mutation, - merge_entry, + merge_list_element, space_reservation, holder, context); @@ -2324,9 +2325,9 @@ void MergeTreeDataMergerMutator::finalizeMutatedPart( new_data_part->storage.lockSharedData(*new_data_part); } -bool MergeTreeDataMergerMutator::checkOperationIsNotCanceled(const MergeListEntry & merge_entry) const +bool MergeTreeDataMergerMutator::checkOperationIsNotCanceled(const MergeListElement & merge_list_element) const { - if (merges_blocker.isCancelled() || merge_entry->is_cancelled) + if (merges_blocker.isCancelled() || merge_list_element.is_cancelled) throw Exception("Cancelled mutating parts", ErrorCodes::ABORTED); return true; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index ca7376d8f3e7..292b8ac9f32c 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -123,7 +123,7 @@ class MergeTreeDataMergerMutator MergeTreeData::MutableDataPartPtr mergePartsToTemporaryPart( const FutureMergedMutatedPart & future_part, const StorageMetadataPtr & metadata_snapshot, - MergeListEntry & merge_entry, + MergeListElement & merge_list_element, TableLockHolder & table_lock_holder, time_t time_of_merge, ContextPtr context, @@ -139,7 +139,7 @@ class MergeTreeDataMergerMutator const FutureMergedMutatedPart & future_part, const StorageMetadataPtr & metadata_snapshot, const MutationCommands & commands, - MergeListEntry & merge_entry, + MergeListElement & merge_list_element, time_t time_of_mutation, ContextPtr context, const ReservationPtr & space_reservation, @@ -226,7 +226,7 @@ class MergeTreeDataMergerMutator BlockInputStreamPtr mutating_stream, IMergedBlockOutputStream & out, time_t time_of_mutation, - MergeListEntry & merge_entry, + MergeListElement & merge_list_element, const ReservationPtr & space_reservation, TableLockHolder & holder, ContextPtr context, @@ -241,7 +241,7 @@ class MergeTreeDataMergerMutator BlockInputStreamPtr mutating_stream, time_t time_of_mutation, const CompressionCodecPtr & compression_codec, - MergeListEntry & merge_entry, + MergeListElement & merge_list_element, bool need_remove_expired_values, bool need_sync, const ReservationPtr & space_reservation, @@ -259,7 +259,7 @@ class MergeTreeDataMergerMutator BlockInputStreamPtr mutating_stream, time_t time_of_mutation, const CompressionCodecPtr & compression_codec, - MergeListEntry & merge_entry, + MergeListElement & merge_list_element, bool need_remove_expired_values, bool need_sync, const ReservationPtr & space_reservation, @@ -291,7 +291,7 @@ public : bool need_remove_expired_values, const MergeTreeData::MergingParams & merging_params) const; - bool checkOperationIsNotCanceled(const MergeListEntry & merge_entry) const; + bool checkOperationIsNotCanceled(const MergeListElement & merge_list_element) const; private: diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 32c2c76dd106..b097c996107b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -863,7 +863,7 @@ bool StorageMergeTree::mergeSelectedParts( new_part = merger_mutator.mergePartsToTemporaryPart( future_part, metadata_snapshot, - *(merge_list_entry), + (*merge_list_entry)->ref(), table_lock_holder, time(nullptr), getContext(), @@ -1022,7 +1022,7 @@ bool StorageMergeTree::mutateSelectedPart(const StorageMetadataPtr & metadata_sn try { new_part = merger_mutator.mutatePartToTemporaryPart( - future_part, metadata_snapshot, merge_mutate_entry.commands, *(merge_list_entry), + future_part, metadata_snapshot, merge_mutate_entry.commands, (*merge_list_entry)->ref(), time(nullptr), getContext(), merge_mutate_entry.tagger->reserved_space, table_lock_holder); renameTempPartAndReplace(new_part); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e17653414420..021260a7dffc 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1776,7 +1776,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) part = merger_mutator.mergePartsToTemporaryPart( future_merged_part, metadata_snapshot, - *merge_entry, + (*merge_entry)->ref(), table_lock, entry.create_time, getContext(), @@ -1916,7 +1916,7 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM try { new_part = merger_mutator.mutatePartToTemporaryPart( - future_mutated_part, metadata_snapshot, commands, *merge_entry, + future_mutated_part, metadata_snapshot, commands, (*merge_entry)->ref(), entry.create_time, getContext(), reserved_space, table_lock); renameTempPartAndReplace(new_part, nullptr, &transaction); diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.reference b/tests/queries/0_stateless/01710_projection_vertical_merges.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.sql b/tests/queries/0_stateless/01710_projection_vertical_merges.sql new file mode 100644 index 000000000000..d54fef7e71df --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_vertical_merges.sql @@ -0,0 +1,19 @@ +-- Tags: long, no-parallel + +drop table if exists t; + +create table t (c1 Int64, c2 String, c3 DateTime, c4 Int8, c5 String, c6 String, c7 String, c8 String, c9 String, c10 String, c11 String, c12 String, c13 Int8, c14 Int64, c15 String, c16 String, c17 String, c18 Int64, c19 Int64, c20 Int64) engine MergeTree order by c18; + +insert into t (c1, c18) select number, -number from numbers(2000000); + +alter table t add projection p_norm (select * order by c1); + +optimize table t final; + +alter table t materialize projection p_norm settings mutations_sync = 1; + +set allow_experimental_projection_optimization = 1, max_rows_to_read = 3; + +select c18 from t where c1 < 0; + +drop table t; From d86a8e9f056004ba4eeb3a00ebf82186ab05de85 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 8 Oct 2021 16:27:48 +0300 Subject: [PATCH 105/472] Backport #29828 to 21.9: Postgresql partitioned table support for replica idenitity index --- src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 1b77947264e3..0682d64e150b 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -249,7 +249,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "and i.oid = ix.indexrelid " "and a.attrelid = t.oid " "and a.attnum = ANY(ix.indkey) " - "and t.relkind = 'r' " /// simple tables + "and t.relkind in ('r', 'p') " /// simple tables "and t.relname = {} " /// Connection is already done to a needed database, only table name is needed. "and ix.indisreplident = 't' " /// index is is replica identity index "ORDER BY a.attname", /// column names From c28668786ad8ccd8f9dca4d267bf5cae2a0e02cc Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 9 Oct 2021 04:28:29 +0300 Subject: [PATCH 106/472] Backport #29790 to 21.9: Fix bad cast in ParserCreateQuery --- src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp | 2 +- src/Parsers/ParserCreateQuery.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- tests/queries/0_stateless/01188_attach_table_from_path.sql | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 90917a0fd7e7..e142bdb7a27b 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -120,7 +120,7 @@ static NamesAndTypesList getColumnsList(const ASTExpressionList * columns_defini auto * literal = child->as(); new_child->arguments = std::make_shared(); - new_child->arguments->children.push_back(std::make_shared(literal->value.get())); + new_child->arguments->children.push_back(std::make_shared(literal->value.safeGet())); new_child->arguments->children.push_back(std::make_shared(Int16(++i))); child = new_child; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index d4525883e363..1da1bfba491b 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -481,7 +481,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (attach && s_from.ignore(pos, expected)) { - ParserLiteral from_path_p; + ParserStringLiteral from_path_p; if (!from_path_p.parse(pos, from_path, expected)) return false; } @@ -896,7 +896,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (ParserKeyword{"TO INNER UUID"}.ignore(pos, expected)) { - ParserLiteral literal_p; + ParserStringLiteral literal_p; if (!literal_p.parse(pos, to_inner_uuid, expected)) return false; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 024b87c9a3e3..60b9ddae3296 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -461,7 +461,7 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr & node, bool atomi [this](const auto & arg) { return checkASTUseless(arg, true); }); } else if (const auto * literal = node->as()) - return !atomic && literal->value.get(); + return !atomic && literal->value.safeGet(); else if (const auto * identifier = node->as()) return key_columns.find(identifier->getColumnName()) == std::end(key_columns); else diff --git a/tests/queries/0_stateless/01188_attach_table_from_path.sql b/tests/queries/0_stateless/01188_attach_table_from_path.sql index d72daa78f67a..afcd588bdf7d 100644 --- a/tests/queries/0_stateless/01188_attach_table_from_path.sql +++ b/tests/queries/0_stateless/01188_attach_table_from_path.sql @@ -5,6 +5,7 @@ drop table if exists mt; attach table test from 'some/path' (n UInt8) engine=Memory; -- { serverError 48 } attach table test from '/etc/passwd' (s String) engine=File(TSVRaw); -- { serverError 481 } attach table test from '../../../../../../../../../etc/passwd' (s String) engine=File(TSVRaw); -- { serverError 481 } +attach table test from 42 (s String) engine=File(TSVRaw); -- { clientError 62 } insert into table function file('01188_attach/file/data.TSV', 'TSV', 's String, n UInt8') values ('file', 42); attach table file from '01188_attach/file' (s String, n UInt8) engine=File(TSV); From a6eb9494865ec235ce234d4f37aa18c5da51415f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 10 Oct 2021 06:35:58 +0300 Subject: [PATCH 107/472] Backport #29925 to 21.9: Update CCTZ --- contrib/cctz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cctz b/contrib/cctz index c0f1bcb97fd2..9edd0861d832 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit c0f1bcb97fd2782f7c3f972fadd5aad5affac4b8 +Subproject commit 9edd0861d8328b2ae77e8fb5f4d7dcd1cf33b42b From 1e652069322863a6089364beac25df52be3ee0fe Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 11 Oct 2021 02:39:53 +0300 Subject: [PATCH 108/472] Backport #29951 to 21.9: Fix shutdown of AccessControlManager --- programs/server/Server.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 3e1804d6fe6b..9dd51b596664 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1036,6 +1036,10 @@ if (ThreadFuzzer::instance().isEffective()) server.start(); SCOPE_EXIT({ + /// Stop reloading of the main config. This must be done before `global_context->shutdown()` because + /// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart. + main_config_reloader.reset(); + /** Ask to cancel background jobs all table engines, * and also query_log. * It is important to do early, not in destructor of Context, because @@ -1076,9 +1080,6 @@ if (ThreadFuzzer::instance().isEffective()) /// Wait server pool to avoid use-after-free of destroyed context in the handlers server_pool.joinAll(); - // Uses a raw pointer to global context for getting ZooKeeper. - main_config_reloader.reset(); - /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. * At this moment, no one could own shared part of Context. */ From 44c2612b12d49936adcc296774e8d09766bf54df Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 Oct 2021 10:51:43 +0300 Subject: [PATCH 109/472] Backport #29837 to 21.9: MaterializedMySQL: Update GTID set at end of transaction --- src/Core/MySQL/MySQLReplication.cpp | 32 ++++++++++++++++--- src/Core/MySQL/MySQLReplication.h | 4 +++ .../materialize_with_ddl.py | 30 +++++++++++++++++ .../test_materialized_mysql_database/test.py | 5 +++ 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 81015507d8dc..b0c4c4953a7e 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -105,12 +105,16 @@ namespace MySQLReplication if (query.starts_with("BEGIN") || query.starts_with("COMMIT")) { typ = QUERY_EVENT_MULTI_TXN_FLAG; + if (!query.starts_with("COMMIT")) + transaction_complete = false; } else if (query.starts_with("XA")) { if (query.starts_with("XA ROLLBACK")) throw ReplicationError("ParseQueryEvent: Unsupported query event:" + query, ErrorCodes::LOGICAL_ERROR); typ = QUERY_EVENT_XA; + if (!query.starts_with("XA COMMIT")) + transaction_complete = false; } else if (query.starts_with("SAVEPOINT")) { @@ -711,9 +715,26 @@ namespace MySQLReplication { switch (event->header.type) { - case FORMAT_DESCRIPTION_EVENT: - case QUERY_EVENT: + case FORMAT_DESCRIPTION_EVENT: { + binlog_pos = event->header.log_pos; + break; + } + case QUERY_EVENT: { + auto query = std::static_pointer_cast(event); + if (query->transaction_complete && pending_gtid) + { + gtid_sets.update(*pending_gtid); + pending_gtid.reset(); + } + binlog_pos = event->header.log_pos; + break; + } case XID_EVENT: { + if (pending_gtid) + { + gtid_sets.update(*pending_gtid); + pending_gtid.reset(); + } binlog_pos = event->header.log_pos; break; } @@ -724,9 +745,11 @@ namespace MySQLReplication break; } case GTID_EVENT: { + if (pending_gtid) + gtid_sets.update(*pending_gtid); auto gtid_event = std::static_pointer_cast(event); binlog_pos = event->header.log_pos; - gtid_sets.update(gtid_event->gtid); + pending_gtid = gtid_event->gtid; break; } default: @@ -792,6 +815,7 @@ namespace MySQLReplication { event = std::make_shared(std::move(event_header)); event->parseEvent(event_payload); + position.update(event); auto query = std::static_pointer_cast(event); switch (query->typ) @@ -803,7 +827,7 @@ namespace MySQLReplication break; } default: - position.update(event); + break; } break; } diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h index cd24979caaaa..8036d40b98ca 100644 --- a/src/Core/MySQL/MySQLReplication.h +++ b/src/Core/MySQL/MySQLReplication.h @@ -383,6 +383,7 @@ namespace MySQLReplication String schema; String query; QueryType typ = QUERY_EVENT_DDL; + bool transaction_complete = true; QueryEvent(EventHeader && header_) : EventBase(std::move(header_)), thread_id(0), exec_time(0), schema_len(0), error_code(0), status_len(0) @@ -536,6 +537,9 @@ namespace MySQLReplication void update(BinlogEventPtr event); void update(UInt64 binlog_pos_, const String & binlog_name_, const String & gtid_sets_); void dump(WriteBuffer & out) const; + + private: + std::optional pending_gtid; }; class IFlavor : public MySQLProtocol::IMySQLReadPacket diff --git a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py index 23fa9894a842..5f6daea24ac1 100644 --- a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py @@ -980,3 +980,33 @@ def mysql_settings_test(clickhouse_node, mysql_node, service_name): clickhouse_node.query("DROP DATABASE test_database") mysql_node.query("DROP DATABASE test_database") +def materialized_mysql_large_transaction(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS largetransaction") + clickhouse_node.query("DROP DATABASE IF EXISTS largetransaction") + mysql_node.query("CREATE DATABASE largetransaction") + + mysql_node.query("CREATE TABLE largetransaction.test_table (" + "`key` INT NOT NULL PRIMARY KEY AUTO_INCREMENT, " + "`value` INT NOT NULL) ENGINE = InnoDB;") + num_rows = 200000 + rows_per_insert = 5000 + values = ",".join(["(1)" for _ in range(rows_per_insert)]) + for i in range(num_rows//rows_per_insert): + mysql_node.query(f"INSERT INTO largetransaction.test_table (`value`) VALUES {values};") + + + clickhouse_node.query("CREATE DATABASE largetransaction ENGINE = MaterializedMySQL('{}:3306', 'largetransaction', 'root', 'clickhouse')".format(service_name)) + check_query(clickhouse_node, "SELECT COUNT() FROM largetransaction.test_table", f"{num_rows}\n") + + mysql_node.query("UPDATE largetransaction.test_table SET value = 2;") + + # Attempt to restart clickhouse after it has started processing + # the transaction, but before it has completed it. + while int(clickhouse_node.query("SELECT COUNT() FROM largetransaction.test_table WHERE value = 2")) == 0: + time.sleep(0.2) + clickhouse_node.restart_clickhouse() + + check_query(clickhouse_node, "SELECT COUNT() FROM largetransaction.test_table WHERE value = 2", f"{num_rows}\n") + + clickhouse_node.query("DROP DATABASE largetransaction") + mysql_node.query("DROP DATABASE largetransaction") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 18cb5b3b87c9..feade1b60a0b 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -237,3 +237,8 @@ def test_materialize_with_enum(started_cluster, started_mysql_8_0, started_mysql def test_mysql_settings(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): materialize_with_ddl.mysql_settings_test(clickhouse_node, started_mysql_5_7, "mysql57") materialize_with_ddl.mysql_settings_test(clickhouse_node, started_mysql_8_0, "mysql80") + +@pytest.mark.parametrize(('clickhouse_node'), [pytest.param(node_db_ordinary, id="ordinary"), pytest.param(node_db_atomic, id="atomic")]) +def test_large_transaction(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): + materialize_with_ddl.materialized_mysql_large_transaction(clickhouse_node, started_mysql_8_0, "mysql80") + materialize_with_ddl.materialized_mysql_large_transaction(clickhouse_node, started_mysql_5_7, "mysql57") From 4c1411faa2688605670899418d5167f6048a6dd2 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 12 Oct 2021 14:42:08 +0300 Subject: [PATCH 110/472] Fix compilation --- src/Server/TCPHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 02bdd9721d4a..338131f37338 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1168,7 +1168,7 @@ void TCPHandler::receiveQuery() if (is_interserver_mode) { ClientInfo original_session_client_info = session->getClientInfo(); - session = std::make_unique(server.context(), ClientInfo::Interface::TCP_INTERSERVER); + session = std::make_unique(server.context(), ClientInfo::Interface::TCP); session->getClientInfo() = original_session_client_info; } From 9a860481ea109a9c2d67849a097983299eb2a791 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 Oct 2021 17:00:19 +0300 Subject: [PATCH 111/472] Backport #29954 to 21.9: gRPC: Fix releasing query ID and session ID at the end of query processing --- src/Interpreters/Session.cpp | 9 +++++++++ src/Interpreters/Session.h | 3 +++ src/Server/GRPCServer.cpp | 17 ++++++++++++++++- tests/integration/test_grpc_protocol/test.py | 2 +- 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index c0e08395effa..acb8ad66a425 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -424,4 +424,13 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t return query_context; } + +void Session::releaseSessionID() +{ + if (!named_session) + return; + named_session->release(); + named_session = nullptr; +} + } diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index d104e2500996..d5872813fdda 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -65,6 +65,9 @@ class Session ContextMutablePtr makeQueryContext(const ClientInfo & query_client_info) const; ContextMutablePtr makeQueryContext(ClientInfo && query_client_info) const; + /// Releases the currently used session ID so it becomes available for reuse by another session. + void releaseSessionID(); + private: ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index f0c6e208323d..a0d1a40104e0 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -540,6 +540,7 @@ namespace void finishQuery(); void onException(const Exception & exception); void onFatalError(); + void releaseQueryIDAndSessionID(); void close(); void readQueryInfo(); @@ -1169,6 +1170,7 @@ namespace addProgressToResult(); query_scope->logPeakMemoryUsage(); addLogsToResult(); + releaseQueryIDAndSessionID(); sendResult(); close(); @@ -1199,6 +1201,8 @@ namespace LOG_WARNING(log, "Couldn't send logs to client"); } + releaseQueryIDAndSessionID(); + try { sendException(exception); @@ -1218,7 +1222,7 @@ namespace { try { - finalize = true; + result.mutable_exception()->set_name("FatalError"); addLogsToResult(); sendResult(); } @@ -1228,6 +1232,17 @@ namespace } } + void Call::releaseQueryIDAndSessionID() + { + /// releaseQueryIDAndSessionID() should be called before sending the final result to the client + /// because the client may decide to send another query with the same query ID or session ID + /// immediately after it receives our final result, and it's prohibited to have + /// two queries executed at the same time with the same query ID or session ID. + io.process_list_entry.reset(); + if (session) + session->releaseSessionID(); + } + void Call::close() { responder.reset(); diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index b0c1f8067b63..c6842b164a66 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -211,7 +211,7 @@ def test_errors_handling(): assert "Table default.t already exists" in e.display_text def test_authentication(): - query("CREATE USER john IDENTIFIED BY 'qwe123'") + query("CREATE USER OR REPLACE john IDENTIFIED BY 'qwe123'") assert query("SELECT currentUser()", user_name="john", password="qwe123") == "john\n" def test_logs(): From a6064b4437452a5d770f0df62512eb4d93757460 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 Oct 2021 18:54:46 +0300 Subject: [PATCH 112/472] Backport #29879 to 21.9: Fix potential leak of query_id_holder --- src/Processors/QueryPlan/QueryIdHolder.cpp | 1 + src/Processors/QueryPlan/QueryIdHolder.h | 5 +- .../QueryPlan/ReadFromMergeTree.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 19 ++++-- src/Storages/MergeTree/MergeTreeData.h | 7 ++- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 63 ++++++++++--------- .../MergeTree/MergeTreeDataSelectExecutor.h | 2 +- .../01666_merge_tree_max_query_limit.sh | 30 +++++---- 8 files changed, 76 insertions(+), 53 deletions(-) diff --git a/src/Processors/QueryPlan/QueryIdHolder.cpp b/src/Processors/QueryPlan/QueryIdHolder.cpp index 87f6f892cd1d..6ff238e017c4 100644 --- a/src/Processors/QueryPlan/QueryIdHolder.cpp +++ b/src/Processors/QueryPlan/QueryIdHolder.cpp @@ -3,6 +3,7 @@ namespace DB { + QueryIdHolder::QueryIdHolder(const String & query_id_, const MergeTreeData & data_) : query_id(query_id_), data(data_) { } diff --git a/src/Processors/QueryPlan/QueryIdHolder.h b/src/Processors/QueryPlan/QueryIdHolder.h index ed8f9ec1d6bb..1e1ee1af0a19 100644 --- a/src/Processors/QueryPlan/QueryIdHolder.h +++ b/src/Processors/QueryPlan/QueryIdHolder.h @@ -2,13 +2,16 @@ #include +#include + namespace DB { + class MergeTreeData; /// Holds the current query id and do something meaningful in destructor. /// Currently it's used for cleaning query id in the MergeTreeData query set. -struct QueryIdHolder +struct QueryIdHolder : private boost::noncopyable { QueryIdHolder(const std::string & query_id_, const MergeTreeData & data_); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index dc3e863b8413..ba61e1c9138d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -949,7 +949,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build ProfileEvents::increment(ProfileEvents::SelectedRanges, result.selected_ranges); ProfileEvents::increment(ProfileEvents::SelectedMarks, result.selected_marks); - auto query_id_holder = MergeTreeDataSelectExecutor::checkLimits(data, result.parts_with_ranges, context); + auto query_id_holder = MergeTreeDataSelectExecutor::checkLimits(data, result, context); if (result.parts_with_ranges.empty()) { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 90bb0b593cf6..8628bc18c7f0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4977,26 +4977,33 @@ void MergeTreeData::setDataVolume(size_t bytes, size_t rows, size_t parts) total_active_size_parts.store(parts, std::memory_order_release); } -void MergeTreeData::insertQueryIdOrThrow(const String & query_id, size_t max_queries) const +bool MergeTreeData::insertQueryIdOrThrow(const String & query_id, size_t max_queries) const { std::lock_guard lock(query_id_set_mutex); + return insertQueryIdOrThrowNoLock(query_id, max_queries, lock); +} + +bool MergeTreeData::insertQueryIdOrThrowNoLock(const String & query_id, size_t max_queries, const std::lock_guard &) const +{ if (query_id_set.find(query_id) != query_id_set.end()) - return; + return false; if (query_id_set.size() >= max_queries) throw Exception( ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for table {}. Maximum is: {}", log_name, max_queries); query_id_set.insert(query_id); + return true; } void MergeTreeData::removeQueryId(const String & query_id) const { std::lock_guard lock(query_id_set_mutex); + removeQueryIdNoLock(query_id, lock); +} + +void MergeTreeData::removeQueryIdNoLock(const String & query_id, const std::lock_guard &) const +{ if (query_id_set.find(query_id) == query_id_set.end()) - { - /// Do not throw exception, because this method is used in destructor. LOG_WARNING(log, "We have query_id removed but it's not recorded. This is a bug"); - assert(false); - } else query_id_set.erase(query_id); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 2871c845ac8c..2ce558641034 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -773,11 +773,16 @@ class MergeTreeData : public IStorage, public WithMutableContext /// section from config.xml. CompressionCodecPtr getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const; + std::lock_guard getQueryIdSetLock() const { return std::lock_guard(query_id_set_mutex); } + /// Record current query id where querying the table. Throw if there are already `max_queries` queries accessing the same table. - void insertQueryIdOrThrow(const String & query_id, size_t max_queries) const; + /// Returns false if the `query_id` already exists in the running set, otherwise return true. + bool insertQueryIdOrThrow(const String & query_id, size_t max_queries) const; + bool insertQueryIdOrThrowNoLock(const String & query_id, size_t max_queries, const std::lock_guard &) const; /// Remove current query id after query finished. void removeQueryId(const String & query_id) const; + void removeQueryIdNoLock(const String & query_id, const std::lock_guard &) const; /// Return the partition expression types as a Tuple type. Return DataTypeUInt8 if partition expression is empty. DataTypePtr getPartitionValueType() const; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index d57ccf645af9..2085a66004fa 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -986,47 +986,48 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd std::shared_ptr MergeTreeDataSelectExecutor::checkLimits( const MergeTreeData & data, - const RangesInDataParts & parts_with_ranges, + const ReadFromMergeTree::AnalysisResult & result, const ContextPtr & context) { const auto & settings = context->getSettingsRef(); - // Check limitations. query_id is used as the quota RAII's resource key. - String query_id; + const auto data_settings = data.getSettings(); + auto max_partitions_to_read + = settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data_settings->max_partitions_to_read; + if (max_partitions_to_read > 0) { - const auto data_settings = data.getSettings(); - auto max_partitions_to_read - = settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data_settings->max_partitions_to_read; - if (max_partitions_to_read > 0) - { - std::set partitions; - for (const auto & part_with_ranges : parts_with_ranges) - partitions.insert(part_with_ranges.data_part->info.partition_id); - if (partitions.size() > size_t(max_partitions_to_read)) - throw Exception( - ErrorCodes::TOO_MANY_PARTITIONS, - "Too many partitions to read. Current {}, max {}", - partitions.size(), - max_partitions_to_read); - } + std::set partitions; + for (const auto & part_with_ranges : result.parts_with_ranges) + partitions.insert(part_with_ranges.data_part->info.partition_id); + if (partitions.size() > size_t(max_partitions_to_read)) + throw Exception( + ErrorCodes::TOO_MANY_PARTITIONS, + "Too many partitions to read. Current {}, max {}", + partitions.size(), + max_partitions_to_read); + } - if (data_settings->max_concurrent_queries > 0 && data_settings->min_marks_to_honor_max_concurrent_queries > 0) + if (data_settings->max_concurrent_queries > 0 && data_settings->min_marks_to_honor_max_concurrent_queries > 0 + && result.selected_marks >= data_settings->min_marks_to_honor_max_concurrent_queries) + { + auto query_id = context->getCurrentQueryId(); + if (!query_id.empty()) { - size_t sum_marks = 0; - for (const auto & part : parts_with_ranges) - sum_marks += part.getMarksCount(); - - if (sum_marks >= data_settings->min_marks_to_honor_max_concurrent_queries) + auto lock = data.getQueryIdSetLock(); + if (data.insertQueryIdOrThrowNoLock(query_id, data_settings->max_concurrent_queries, lock)) { - query_id = context->getCurrentQueryId(); - if (!query_id.empty()) - data.insertQueryIdOrThrow(query_id, data_settings->max_concurrent_queries); + try + { + return std::make_shared(query_id, data); + } + catch (...) + { + /// If we fail to construct the holder, remove query_id explicitly to avoid leak. + data.removeQueryIdNoLock(query_id, lock); + throw; + } } } } - - if (!query_id.empty()) - return std::make_shared(query_id, data); - return nullptr; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 92c4382dc90c..3cc5033c9f18 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -197,7 +197,7 @@ class MergeTreeDataSelectExecutor /// Also, return QueryIdHolder. If not null, we should keep it until query finishes. static std::shared_ptr checkLimits( const MergeTreeData & data, - const RangesInDataParts & parts_with_ranges, + const ReadFromMergeTree::AnalysisResult & result, const ContextPtr & context); }; diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh index c5fbb35a9cde..e04c95150094 100755 --- a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh +++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh @@ -4,8 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function wait_for_query_to_start() -{ +function wait_for_query_to_start() { while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT sum(read_rows) FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done } @@ -21,14 +20,14 @@ insert into simple select number, number + 100 from numbers(5000); query_id="long_running_query-$CLICKHOUSE_DATABASE" echo "Spin up a long running query" -${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.1) from simple settings max_block_size = 1 format Null" --query_id "$query_id" > /dev/null 2>&1 & +${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.1) from simple settings max_block_size = 1 format Null" --query_id "$query_id" >/dev/null 2>&1 & wait_for_query_to_start "$query_id" # query which reads marks >= min_marks_to_honor_max_concurrent_queries is throttled echo "Check if another query with some marks to read is throttled" -${CLICKHOUSE_CLIENT} --query "select * from simple" 2> /dev/null; +${CLICKHOUSE_CLIENT} --query "select * from simple" 2>/dev/null CODE=$? -[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1; +[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1 echo "yes" # query which reads marks less than min_marks_to_honor_max_concurrent_queries is allowed @@ -41,9 +40,9 @@ ${CLICKHOUSE_CLIENT} --query "alter table simple modify setting min_marks_to_hon # Now smaller queries are also throttled echo "Check if another query with less marks to read is throttled" -${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2> /dev/null; +${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2>/dev/null CODE=$? -[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1; +[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1 echo "yes" echo "Modify max_concurrent_queries to 2" @@ -58,14 +57,21 @@ ${CLICKHOUSE_CLIENT} --query "alter table simple modify setting max_concurrent_q # Now queries are throttled again echo "Check if another query with less marks to read is throttled" -${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2> /dev/null; +${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2>/dev/null CODE=$? -[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1; +[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1 echo "yes" ${CLICKHOUSE_CLIENT} --query "KILL QUERY WHERE query_id = '$query_id' SYNC FORMAT Null" wait -${CLICKHOUSE_CLIENT} --multiline --multiquery --query " -drop table simple -" +# Check correctness of multiple subqueries +query_id=max_concurrent_queries_$RANDOM +${CLICKHOUSE_CLIENT} --query_id "$query_id" --query "select i from simple where j in (select i from simple where i < 10)" + +# We have to search the server's error log because the following warning message +# is generated during pipeline destruction and thus is not sent to the client. +${CLICKHOUSE_CLIENT} --query "system flush logs" +if [[ $(${CLICKHOUSE_CLIENT} --query "select count() > 0 from system.text_log where query_id = '$query_id' and level = 'Warning' and message like '%We have query_id removed but it\'s not recorded. This is a bug%' format TSVRaw") == 1 ]]; then echo "We have query_id removed but it's not recorded. This is a bug." >&2; exit 1; fi + +${CLICKHOUSE_CLIENT} --query "drop table simple" From 7e0c8d5d80652bded8d4338f4ad31e6adb5eccdb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 Oct 2021 22:57:06 +0300 Subject: [PATCH 113/472] Backport #30016 to 21.9: Fix crash of sample by tuple() --- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++++ tests/queries/0_stateless/02096_sample_by_tuple.reference | 0 tests/queries/0_stateless/02096_sample_by_tuple.sql | 7 +++++++ 3 files changed, 11 insertions(+) create mode 100644 tests/queries/0_stateless/02096_sample_by_tuple.reference create mode 100644 tests/queries/0_stateless/02096_sample_by_tuple.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 90bb0b593cf6..9d36567faba6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -125,10 +125,14 @@ namespace ErrorCodes extern const int ALTER_OF_COLUMN_IS_FORBIDDEN; extern const int SUPPORT_IS_DISABLED; extern const int TOO_MANY_SIMULTANEOUS_QUERIES; + extern const int INCORRECT_QUERY; } static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool allow_sampling_expression_not_in_primary_key, bool check_sample_column_is_correct) { + if (metadata.sampling_key.column_names.empty()) + throw Exception("There are no columns in sampling expression", ErrorCodes::INCORRECT_QUERY); + const auto & pk_sample_block = metadata.getPrimaryKey().sample_block; if (!pk_sample_block.has(metadata.sampling_key.column_names[0]) && !allow_sampling_expression_not_in_primary_key) throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); diff --git a/tests/queries/0_stateless/02096_sample_by_tuple.reference b/tests/queries/0_stateless/02096_sample_by_tuple.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02096_sample_by_tuple.sql b/tests/queries/0_stateless/02096_sample_by_tuple.sql new file mode 100644 index 000000000000..4996c9b83849 --- /dev/null +++ b/tests/queries/0_stateless/02096_sample_by_tuple.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS t; + +CREATE TABLE t (n UInt8) ENGINE=MergeTree ORDER BY n SAMPLE BY tuple(); -- { serverError 80 } + +CREATE TABLE t (n UInt8) ENGINE=MergeTree ORDER BY tuple(); + +ALTER TABLE t MODIFY SAMPLE BY tuple(); -- { serverError 80 } From f6204b42e5cb6adcadb17e9e8bd44a2decdc241c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 13 Oct 2021 17:03:46 +0300 Subject: [PATCH 114/472] Backport #30054 to 21.9: Remove metadata leftovers on drop database --- src/Core/Settings.h | 2 ++ src/Databases/DatabaseFactory.cpp | 1 + src/Databases/DatabaseOnDisk.cpp | 25 +++++++++++++++++-- src/Interpreters/DatabaseCatalog.cpp | 10 +++++--- src/Interpreters/DatabaseCatalog.h | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- src/Interpreters/InterpreterDropQuery.cpp | 2 +- src/Interpreters/loadMetadata.cpp | 1 + .../test_restart_server/__init__.py | 0 tests/integration/test_restart_server/test.py | 22 ++++++++++++++++ .../0_stateless/01601_detach_permanently.sql | 4 +-- 11 files changed, 60 insertions(+), 11 deletions(-) create mode 100755 tests/integration/test_restart_server/__init__.py create mode 100755 tests/integration/test_restart_server/test.py diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5b4c3d2d2d3a..cf06e62e5951 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -494,6 +494,8 @@ class IColumn; M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ + M(Bool, force_remove_data_recursively_on_drop, false, "Recursively remove data on DROP query. Avoids 'Directory not empty' error, but may silently remove detached data", 0) \ + \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 75a3b9c9e1ec..57b588d40626 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -69,6 +69,7 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m /// Before 20.7 it's possible that .sql metadata file does not exist for some old database. /// In this case Ordinary database is created on server startup if the corresponding metadata directory exists. /// So we should remove metadata directory if database creation failed. + /// TODO remove this code created = fs::create_directory(metadata_path); DatabasePtr impl = getImpl(create, metadata_path, context); diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 620e560b64cd..6facbbb03d94 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -38,6 +38,7 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int TABLE_ALREADY_EXISTS; extern const int EMPTY_LIST_OF_COLUMNS_PASSED; + extern const int DATABASE_NOT_EMPTY; } @@ -528,8 +529,28 @@ ASTPtr DatabaseOnDisk::getCreateDatabaseQuery() const void DatabaseOnDisk::drop(ContextPtr local_context) { assert(tables.empty()); - fs::remove(local_context->getPath() + getDataPath()); - fs::remove(getMetadataPath()); + if (local_context->getSettingsRef().force_remove_data_recursively_on_drop) + { + fs::remove_all(local_context->getPath() + getDataPath()); + fs::remove_all(getMetadataPath()); + } + else + { + try + { + fs::remove(local_context->getPath() + getDataPath()); + fs::remove(getMetadataPath()); + } + catch (const fs::filesystem_error & e) + { + if (e.code() != std::errc::directory_not_empty) + throw Exception(Exception::CreateFromSTDTag{}, e); + throw Exception(ErrorCodes::DATABASE_NOT_EMPTY, "Cannot drop: {}. " + "Probably database contain some detached tables or metadata leftovers from Ordinary engine. " + "If you want to remove all data anyway, try to attach database back and drop it again " + "with enabled force_remove_data_recursively_on_drop setting", e.what()); + } + } } String DatabaseOnDisk::getObjectMetadataPath(const String & object_name) const diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 0cf85fdde681..852ed0f588ac 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -326,7 +326,7 @@ void DatabaseCatalog::attachDatabase(const String & database_name, const Databas } -DatabasePtr DatabaseCatalog::detachDatabase(const String & database_name, bool drop, bool check_empty) +DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const String & database_name, bool drop, bool check_empty) { if (database_name == TEMPORARY_DATABASE) throw Exception("Cannot detach database with temporary tables.", ErrorCodes::DATABASE_ACCESS_DENIED); @@ -362,12 +362,14 @@ DatabasePtr DatabaseCatalog::detachDatabase(const String & database_name, bool d if (drop) { /// Delete the database. - db->drop(getContext()); + db->drop(local_context); /// Old ClickHouse versions did not store database.sql files + /// Remove metadata dir (if exists) to avoid recreation of .sql file on server startup + fs::path database_metadata_dir = fs::path(getContext()->getPath()) / "metadata" / escapeForFileName(database_name); + fs::remove(database_metadata_dir); fs::path database_metadata_file = fs::path(getContext()->getPath()) / "metadata" / (escapeForFileName(database_name) + ".sql"); - if (fs::exists(database_metadata_file)) - fs::remove(database_metadata_file); + fs::remove(database_metadata_file); } return db; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 071b80690df2..62e9de9102ea 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -145,7 +145,7 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext DatabasePtr getSystemDatabase() const; void attachDatabase(const String & database_name, const DatabasePtr & database); - DatabasePtr detachDatabase(const String & database_name, bool drop = false, bool check_empty = true); + DatabasePtr detachDatabase(ContextPtr local_context, const String & database_name, bool drop = false, bool check_empty = true); void updateDatabaseName(const String & old_name, const String & new_name); /// database_name must be not empty diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a1313a84c36f..c1fc9b389cee 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -282,7 +282,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) assert(removed); } if (added) - DatabaseCatalog::instance().detachDatabase(database_name, false, false); + DatabaseCatalog::instance().detachDatabase(getContext(), database_name, false, false); throw; } diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 0e15c6be27cf..a03e66ff199f 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -359,7 +359,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, database->assertCanBeDetached(true); /// DETACH or DROP database itself - DatabaseCatalog::instance().detachDatabase(database_name, drop, database->shouldBeEmptyOnDetach()); + DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach()); } } diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 43f9727c3553..62de208bc3a5 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -64,6 +64,7 @@ static void loadDatabase( } else if (fs::exists(fs::path(database_path))) { + /// TODO Remove this code (it's required for compatibility with versions older than 20.7) /// Database exists, but .sql file is absent. It's old-style Ordinary database (e.g. system or default) database_attach_query = "ATTACH DATABASE " + backQuoteIfNeed(database) + " ENGINE = Ordinary"; } diff --git a/tests/integration/test_restart_server/__init__.py b/tests/integration/test_restart_server/__init__.py new file mode 100755 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_restart_server/test.py b/tests/integration/test_restart_server/test.py new file mode 100755 index 000000000000..47797f7c4a5e --- /dev/null +++ b/tests/integration/test_restart_server/test.py @@ -0,0 +1,22 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', stay_alive=True) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_drop_memory_database(start_cluster): + node.query("CREATE DATABASE test ENGINE Memory") + node.query("CREATE TABLE test.test_table(a String) ENGINE Memory") + node.query("DROP DATABASE test") + node.restart_clickhouse(kill=True) + assert node.query("SHOW DATABASES LIKE 'test'").strip() == "" + diff --git a/tests/queries/0_stateless/01601_detach_permanently.sql b/tests/queries/0_stateless/01601_detach_permanently.sql index 3af8ed573ef1..9acfad6bf3dc 100644 --- a/tests/queries/0_stateless/01601_detach_permanently.sql +++ b/tests/queries/0_stateless/01601_detach_permanently.sql @@ -129,7 +129,7 @@ SELECT 'And detach permanently again to check how database drop will behave'; DETACH table test1601_detach_permanently_ordinary.test_name_reuse PERMANENTLY; SELECT 'DROP database - Directory not empty error, but database detached'; -DROP DATABASE test1601_detach_permanently_ordinary; -- { serverError 1001 } +DROP DATABASE test1601_detach_permanently_ordinary; -- { serverError 219 } ATTACH DATABASE test1601_detach_permanently_ordinary; @@ -203,7 +203,7 @@ SELECT 'And detach permanently again to check how database drop will behave'; DETACH table test1601_detach_permanently_lazy.test_name_reuse PERMANENTLY; SELECT 'DROP database - Directory not empty error, but database deteched'; -DROP DATABASE test1601_detach_permanently_lazy; -- { serverError 1001 } +DROP DATABASE test1601_detach_permanently_lazy; -- { serverError 219 } ATTACH DATABASE test1601_detach_permanently_lazy; From f93045f633d3453ed70ed805f88a1d348d02009c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 13 Oct 2021 17:07:00 +0300 Subject: [PATCH 115/472] Backport #29762 to 21.9: May be fix s3 tests --- .../test/integration/runner/compose/docker_compose_minio.yml | 4 +++- tests/integration/test_merge_tree_s3/test.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml index 96a5f8bdc31e..33c656e83348 100644 --- a/docker/test/integration/runner/compose/docker_compose_minio.yml +++ b/docker/test/integration/runner/compose/docker_compose_minio.yml @@ -2,7 +2,9 @@ version: '2.3' services: minio1: - image: minio/minio + # Newer version of minio results in such errors: + # "AWSErrorMarshaller: Encountered AWSError 'InternalError': We encountered an internal error, please try again" + image: minio/minio:RELEASE.2021-09-23T04-46-24Z volumes: - data1-1:/data1 - ${MINIO_CERTS_DIR:-}:/certs diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 41f59e0cbb4b..72577f8c7007 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -28,7 +28,6 @@ def join(self, timeout=None): if self.exception: raise self.exception - SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) CONFIG_PATH = os.path.join(SCRIPT_DIR, './{}/node/configs/config.d/storage_conf.xml'.format(get_instances_dir())) From 4538293b639e8e913e4fc096f26761733e3f6e83 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 13 Oct 2021 19:00:28 +0300 Subject: [PATCH 116/472] Backport #30086 to 21.9: Add test with GLOBAL IN and totals. --- .../queries/0_stateless/02096_totals_global_in_bug.reference | 4 ++++ tests/queries/0_stateless/02096_totals_global_in_bug.sql | 2 ++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02096_totals_global_in_bug.reference create mode 100644 tests/queries/0_stateless/02096_totals_global_in_bug.sql diff --git a/tests/queries/0_stateless/02096_totals_global_in_bug.reference b/tests/queries/0_stateless/02096_totals_global_in_bug.reference new file mode 100644 index 000000000000..a536e1a53291 --- /dev/null +++ b/tests/queries/0_stateless/02096_totals_global_in_bug.reference @@ -0,0 +1,4 @@ +0 +2 + +2 diff --git a/tests/queries/0_stateless/02096_totals_global_in_bug.sql b/tests/queries/0_stateless/02096_totals_global_in_bug.sql new file mode 100644 index 000000000000..ac4f2b9d2bae --- /dev/null +++ b/tests/queries/0_stateless/02096_totals_global_in_bug.sql @@ -0,0 +1,2 @@ +select sum(number) from remote('127.0.0.{2,3}', numbers(2)) where number global in (select sum(number) from numbers(2) group by number with totals) group by number with totals + From 02a97cd8fc5a0e72020206877cda152ffa2305e6 Mon Sep 17 00:00:00 2001 From: Vladimir C Date: Tue, 12 Oct 2021 12:34:04 +0300 Subject: [PATCH 117/472] Merge pull request #29544 from Algunenano/join_deadlock --- src/Functions/FunctionJoinGet.cpp | 6 +- src/Functions/FunctionJoinGet.h | 16 +++-- src/Interpreters/ExpressionAnalyzer.cpp | 6 +- src/Interpreters/HashJoin.cpp | 2 +- src/Interpreters/HashJoin.h | 7 +- src/Storages/IStorage.h | 1 + src/Storages/StorageJoin.cpp | 54 ++++++++------ src/Storages/StorageJoin.h | 13 ++-- src/Storages/StorageSet.cpp | 21 +++--- src/Storages/StorageSet.h | 8 +-- .../02033_join_engine_deadlock_long.reference | 0 .../02033_join_engine_deadlock_long.sh | 71 +++++++++++++++++++ 12 files changed, 149 insertions(+), 56 deletions(-) create mode 100644 tests/queries/0_stateless/02033_join_engine_deadlock_long.reference create mode 100755 tests/queries/0_stateless/02033_join_engine_deadlock_long.sh diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index ee1736074373..61133c6c3d86 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -25,14 +25,14 @@ ColumnPtr ExecutableFunctionJoinGet::executeImpl(const ColumnsWithTypeA auto key = arguments[i]; keys.emplace_back(std::move(key)); } - return storage_join->joinGet(keys, result_columns).column; + return storage_join->joinGet(keys, result_columns, getContext()).column; } template ExecutableFunctionPtr FunctionJoinGet::prepare(const ColumnsWithTypeAndName &) const { Block result_columns {{return_type->createColumn(), return_type, attr_name}}; - return std::make_unique>(table_lock, storage_join, result_columns); + return std::make_unique>(getContext(), table_lock, storage_join, result_columns); } static std::pair, String> @@ -100,7 +100,7 @@ FunctionBasePtr JoinGetOverloadResolver::buildImpl(const ColumnsWithTyp auto return_type = storage_join->joinGetCheckAndGetReturnType(data_types, attr_name, or_null); auto table_lock = storage_join->lockForShare(getContext()->getInitialQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); - return std::make_unique>(table_lock, storage_join, attr_name, argument_types, return_type); + return std::make_unique>(getContext(), table_lock, storage_join, attr_name, argument_types, return_type); } void registerFunctionJoinGet(FunctionFactory & factory) diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h index 3ddab51e2d97..2dd0cb9fdea5 100644 --- a/src/Functions/FunctionJoinGet.h +++ b/src/Functions/FunctionJoinGet.h @@ -14,13 +14,15 @@ class StorageJoin; using StorageJoinPtr = std::shared_ptr; template -class ExecutableFunctionJoinGet final : public IExecutableFunction +class ExecutableFunctionJoinGet final : public IExecutableFunction, WithContext { public: - ExecutableFunctionJoinGet(TableLockHolder table_lock_, + ExecutableFunctionJoinGet(ContextPtr context_, + TableLockHolder table_lock_, StorageJoinPtr storage_join_, const DB::Block & result_columns_) - : table_lock(std::move(table_lock_)) + : WithContext(context_) + , table_lock(std::move(table_lock_)) , storage_join(std::move(storage_join_)) , result_columns(result_columns_) {} @@ -42,15 +44,17 @@ class ExecutableFunctionJoinGet final : public IExecutableFunction }; template -class FunctionJoinGet final : public IFunctionBase +class FunctionJoinGet final : public IFunctionBase, WithContext { public: static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet"; - FunctionJoinGet(TableLockHolder table_lock_, + FunctionJoinGet(ContextPtr context_, + TableLockHolder table_lock_, StorageJoinPtr storage_join_, String attr_name_, DataTypes argument_types_, DataTypePtr return_type_) - : table_lock(std::move(table_lock_)) + : WithContext(context_) + , table_lock(std::move(table_lock_)) , storage_join(storage_join_) , attr_name(std::move(attr_name_)) , argument_types(std::move(argument_types_)) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 40eb0cd79800..e8e44c4995e5 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -811,11 +811,11 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain return table_join; } -static JoinPtr tryGetStorageJoin(std::shared_ptr analyzed_join) +static JoinPtr tryGetStorageJoin(ContextPtr context, std::shared_ptr analyzed_join) { if (auto * table = analyzed_join->joined_storage.get()) if (auto * storage_join = dynamic_cast(table)) - return storage_join->getJoinLocked(analyzed_join); + return storage_join->getJoinLocked(analyzed_join, context); return {}; } @@ -880,7 +880,7 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin( throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join was already created for query"); /// Use StorageJoin if any. - JoinPtr join = tryGetStorageJoin(syntax->analyzed_join); + JoinPtr join = tryGetStorageJoin(getContext(), syntax->analyzed_join); if (!join) { diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index e0aad706966f..ef816eae436c 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -666,7 +666,7 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) size_t total_bytes = 0; { - if (storage_join_lock.mutex()) + if (storage_join_lock) throw DB::Exception("addJoinedBlock called when HashJoin locked to prevent updates", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 65e3f5dbabe5..3722350a683e 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -322,9 +323,9 @@ class HashJoin : public IJoin /// We keep correspondence between used_flags and hash table internal buffer. /// Hash table cannot be modified during HashJoin lifetime and must be protected with lock. - void setLock(std::shared_mutex & rwlock) + void setLock(RWLockImpl::LockHolder rwlock_holder) { - storage_join_lock = std::shared_lock(rwlock); + storage_join_lock = rwlock_holder; } void reuseJoinedData(const HashJoin & join); @@ -387,7 +388,7 @@ class HashJoin : public IJoin /// Should be set via setLock to protect hash table from modification from StorageJoin /// If set HashJoin instance is not available for modification (addJoinedBlock) - std::shared_lock storage_join_lock; + RWLockImpl::LockHolder storage_join_lock = nullptr; void init(Type type_); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 85bfbfb1f848..48c305bf36d7 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -203,6 +203,7 @@ class IStorage : public std::enable_shared_from_this, public TypePromo /// without locks. MultiVersionStorageMetadataPtr metadata; +protected: RWLockImpl::LockHolder tryLockTimed( const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 5c5b12c7475e..b71502a38d80 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -1,13 +1,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include @@ -67,17 +67,24 @@ StorageJoin::StorageJoin( restore(); } +RWLockImpl::LockHolder StorageJoin::tryLockTimedWithContext(const RWLock & lock, RWLockImpl::Type type, ContextPtr context) const +{ + const String query_id = context ? context->getInitialQueryId() : RWLockImpl::NO_QUERY; + const std::chrono::milliseconds acquire_timeout + = context ? context->getSettingsRef().lock_acquire_timeout : std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC); + return tryLockTimed(lock, type, query_id, acquire_timeout); +} + SinkToStoragePtr StorageJoin::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { std::lock_guard mutate_lock(mutate_mutex); return StorageSetOrJoinBase::write(query, metadata_snapshot, context); } -void StorageJoin::truncate( - const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder&) +void StorageJoin::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, TableExclusiveLockHolder &) { std::lock_guard mutate_lock(mutate_mutex); - std::unique_lock lock(rwlock); + TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Write, context); disk->removeRecursive(path); disk->createDirectories(path); @@ -128,7 +135,7 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) } /// Now acquire exclusive lock and modify storage. - std::unique_lock lock(rwlock); + TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Write, context); join = std::move(new_data); increment = 1; @@ -152,7 +159,7 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) } } -HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join) const +HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); if (!analyzed_join->sameStrictnessAndKind(strictness, kind)) @@ -171,34 +178,36 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join) analyzed_join->setRightKeys(key_names); HashJoinPtr join_clone = std::make_shared(analyzed_join, metadata_snapshot->getSampleBlock().sortColumns()); - join_clone->setLock(rwlock); + + RWLockImpl::LockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Read, context); + join_clone->setLock(holder); join_clone->reuseJoinedData(*join); return join_clone; } -void StorageJoin::insertBlock(const Block & block) +void StorageJoin::insertBlock(const Block & block, ContextPtr context) { - std::unique_lock lock(rwlock); + TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Write, context); join->addJoinedBlock(block, true); } -size_t StorageJoin::getSize() const +size_t StorageJoin::getSize(ContextPtr context) const { - std::shared_lock lock(rwlock); + TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Read, context); return join->getTotalRowCount(); } -std::optional StorageJoin::totalRows(const Settings &) const +std::optional StorageJoin::totalRows(const Settings &settings) const { - std::shared_lock lock(rwlock); + TableLockHolder holder = tryLockTimed(rwlock, RWLockImpl::Read, RWLockImpl::NO_QUERY, settings.lock_acquire_timeout); return join->getTotalRowCount(); } -std::optional StorageJoin::totalBytes(const Settings &) const +std::optional StorageJoin::totalBytes(const Settings &settings) const { - std::shared_lock lock(rwlock); + TableLockHolder holder = tryLockTimed(rwlock, RWLockImpl::Read, RWLockImpl::NO_QUERY, settings.lock_acquire_timeout); return join->getTotalByteCount(); } @@ -207,9 +216,9 @@ DataTypePtr StorageJoin::joinGetCheckAndGetReturnType(const DataTypes & data_typ return join->joinGetCheckAndGetReturnType(data_types, column_name, or_null); } -ColumnWithTypeAndName StorageJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const +ColumnWithTypeAndName StorageJoin::joinGet(const Block & block, const Block & block_with_columns_to_add, ContextPtr context) const { - std::shared_lock lock(rwlock); + TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Read, context); return join->joinGet(block, block_with_columns_to_add); } @@ -370,10 +379,10 @@ size_t rawSize(const StringRef & t) class JoinSource : public SourceWithProgress { public: - JoinSource(HashJoinPtr join_, std::shared_mutex & rwlock, UInt64 max_block_size_, Block sample_block_) + JoinSource(HashJoinPtr join_, TableLockHolder lock_holder_, UInt64 max_block_size_, Block sample_block_) : SourceWithProgress(sample_block_) , join(join_) - , lock(rwlock) + , lock_holder(lock_holder_) , max_block_size(max_block_size_) , sample_block(std::move(sample_block_)) { @@ -418,7 +427,7 @@ class JoinSource : public SourceWithProgress private: HashJoinPtr join; - std::shared_lock lock; + TableLockHolder lock_holder; UInt64 max_block_size; Block sample_block; @@ -568,7 +577,7 @@ Pipe StorageJoin::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & /*query_info*/, - ContextPtr /*context*/, + ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, unsigned /*num_streams*/) @@ -576,7 +585,8 @@ Pipe StorageJoin::read( metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); Block source_sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); - return Pipe(std::make_shared(join, rwlock, max_block_size, source_sample_block)); + RWLockImpl::LockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Read, context); + return Pipe(std::make_shared(join, std::move(holder), max_block_size, source_sample_block)); } } diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 6a08773ecc86..cdc475319998 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -2,7 +2,9 @@ #include +#include #include +#include #include #include @@ -35,7 +37,7 @@ class StorageJoin final : public shared_ptr_helper, public StorageS /// Return instance of HashJoin holding lock that protects from insertions to StorageJoin. /// HashJoin relies on structure of hash table that's why we need to return it with locked mutex. - HashJoinPtr getJoinLocked(std::shared_ptr analyzed_join) const; + HashJoinPtr getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context) const; /// Get result type for function "joinGet(OrNull)" DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const; @@ -43,7 +45,7 @@ class StorageJoin final : public shared_ptr_helper, public StorageS /// Execute function "joinGet(OrNull)" on data block. /// Takes rwlock for read to prevent parallel StorageJoin updates during processing data block /// (but not during processing whole query, it's safe for joinGet that doesn't involve `used_flags` from HashJoin) - ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add) const; + ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add, ContextPtr context) const; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; @@ -73,12 +75,13 @@ class StorageJoin final : public shared_ptr_helper, public StorageS /// Protect state for concurrent use in insertFromBlock and joinBlock. /// Lock is stored in HashJoin instance during query and blocks concurrent insertions. - mutable std::shared_mutex rwlock; + mutable RWLock rwlock = RWLockImpl::create(); mutable std::mutex mutate_mutex; - void insertBlock(const Block & block) override; + void insertBlock(const Block & block, ContextPtr context) override; void finishInsert() override {} - size_t getSize() const override; + size_t getSize(ContextPtr context) const override; + RWLockImpl::LockHolder tryLockTimedWithContext(const RWLock & lock, RWLockImpl::Type type, ContextPtr context) const; protected: StorageJoin( diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index c16b60af45fb..f245e1fcf17e 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -31,11 +31,11 @@ namespace ErrorCodes } -class SetOrJoinSink : public SinkToStorage +class SetOrJoinSink : public SinkToStorage, WithContext { public: SetOrJoinSink( - StorageSetOrJoinBase & table_, const StorageMetadataPtr & metadata_snapshot_, + ContextPtr ctx, StorageSetOrJoinBase & table_, const StorageMetadataPtr & metadata_snapshot_, const String & backup_path_, const String & backup_tmp_path_, const String & backup_file_name_, bool persistent_); @@ -57,6 +57,7 @@ class SetOrJoinSink : public SinkToStorage SetOrJoinSink::SetOrJoinSink( + ContextPtr ctx, StorageSetOrJoinBase & table_, const StorageMetadataPtr & metadata_snapshot_, const String & backup_path_, @@ -64,6 +65,7 @@ SetOrJoinSink::SetOrJoinSink( const String & backup_file_name_, bool persistent_) : SinkToStorage(metadata_snapshot_->getSampleBlock()) + , WithContext(ctx) , table(table_) , metadata_snapshot(metadata_snapshot_) , backup_path(backup_path_) @@ -81,7 +83,7 @@ void SetOrJoinSink::consume(Chunk chunk) /// Sort columns in the block. This is necessary, since Set and Join count on the same column order in different blocks. Block sorted_block = getPort().getHeader().cloneWithColumns(chunk.detachColumns()).sortColumns(); - table.insertBlock(sorted_block); + table.insertBlock(sorted_block, getContext()); if (persistent) backup_stream.write(sorted_block); } @@ -101,10 +103,10 @@ void SetOrJoinSink::onFinish() } -SinkToStoragePtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) +SinkToStoragePtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { UInt64 id = ++increment; - return std::make_shared(*this, metadata_snapshot, path, path + "tmp/", toString(id) + ".bin", persistent); + return std::make_shared(context, *this, metadata_snapshot, path, path + "tmp/", toString(id) + ".bin", persistent); } @@ -152,10 +154,10 @@ StorageSet::StorageSet( } -void StorageSet::insertBlock(const Block & block) { set->insertFromBlock(block.getColumnsWithTypeAndName()); } +void StorageSet::insertBlock(const Block & block, ContextPtr) { set->insertFromBlock(block.getColumnsWithTypeAndName()); } void StorageSet::finishInsert() { set->finishInsert(); } -size_t StorageSet::getSize() const { return set->getTotalRowCount(); } +size_t StorageSet::getSize(ContextPtr) const { return set->getTotalRowCount(); } std::optional StorageSet::totalRows(const Settings &) const { return set->getTotalRowCount(); } std::optional StorageSet::totalBytes(const Settings &) const { return set->getTotalByteCount(); } @@ -207,6 +209,7 @@ void StorageSetOrJoinBase::restore() void StorageSetOrJoinBase::restoreFromFile(const String & file_path) { + ContextPtr ctx = nullptr; auto backup_buf = disk->readFile(file_path); CompressedReadBuffer compressed_backup_buf(*backup_buf); NativeBlockInputStream backup_stream(compressed_backup_buf, 0); @@ -214,14 +217,14 @@ void StorageSetOrJoinBase::restoreFromFile(const String & file_path) backup_stream.readPrefix(); while (Block block = backup_stream.read()) - insertBlock(block); + insertBlock(block, ctx); finishInsert(); backup_stream.readSuffix(); /// TODO Add speed, compressed bytes, data volume in memory, compression ratio ... Generalize all statistics logging in project. LOG_INFO(&Poco::Logger::get("StorageSetOrJoinBase"), "Loaded from backup file {}. {} rows, {}. State has {} unique rows.", - file_path, backup_stream.getProfileInfo().rows, ReadableSize(backup_stream.getProfileInfo().bytes), getSize()); + file_path, backup_stream.getProfileInfo().rows, ReadableSize(backup_stream.getProfileInfo().bytes), getSize(ctx)); } diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index 1166557ec8e1..1b78676b6c55 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -51,10 +51,10 @@ class StorageSetOrJoinBase : public IStorage void restoreFromFile(const String & file_path); /// Insert the block into the state. - virtual void insertBlock(const Block & block) = 0; + virtual void insertBlock(const Block & block, ContextPtr context) = 0; /// Call after all blocks were inserted. virtual void finishInsert() = 0; - virtual size_t getSize() const = 0; + virtual size_t getSize(ContextPtr context) const = 0; }; @@ -81,9 +81,9 @@ friend struct shared_ptr_helper; private: SetPtr set; - void insertBlock(const Block & block) override; + void insertBlock(const Block & block, ContextPtr) override; void finishInsert() override; - size_t getSize() const override; + size_t getSize(ContextPtr) const override; protected: StorageSet( diff --git a/tests/queries/0_stateless/02033_join_engine_deadlock_long.reference b/tests/queries/0_stateless/02033_join_engine_deadlock_long.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh b/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh new file mode 100755 index 000000000000..2a887cbbcae6 --- /dev/null +++ b/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Tags: long, deadlock + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +create_table () { + $CLICKHOUSE_CLIENT --query " + CREATE TABLE join_block_test + ( + id String, + num Int64 + ) + ENGINE = Join(ANY, LEFT, id) + " +} + +drop_table () { + # Force a sync drop to free the memory before ending the test + # Otherwise things get interesting if you run the test many times before the database is finally dropped + $CLICKHOUSE_CLIENT --query " + DROP TABLE join_block_test SYNC + " +} + +populate_table_bg () { + ( + $CLICKHOUSE_CLIENT --query " + INSERT INTO join_block_test + SELECT toString(number) as id, number * number as num + FROM system.numbers LIMIT 3000000 + " >/dev/null + ) & +} + +read_table_bg () { + ( + $CLICKHOUSE_CLIENT --query " + SELECT * + FROM + ( + SELECT toString(number) AS user_id + FROM system.numbers LIMIT 10000 OFFSET 20000 + ) AS t1 + LEFT JOIN + ( + SELECT + * + FROM join_block_test AS i1 + ANY LEFT JOIN + ( + SELECT * + FROM join_block_test + ) AS i2 ON i1.id = toString(i2.num) + ) AS t2 ON t1.user_id = t2.id + " >/dev/null + ) & +} + +create_table +for _ in {1..5}; +do + populate_table_bg + sleep 0.05 + read_table_bg + sleep 0.05 +done + +wait +drop_table From 7be91eed8c64679402729e1f3d41065b135b70b0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 Oct 2021 05:10:39 +0300 Subject: [PATCH 118/472] Backport #29946 to 21.9: Fix data-race between LogSink::writeMarks() and LogSource in StorageLog --- src/Storages/StorageLog.cpp | 8 +++++++- src/Storages/StorageLog.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 0e156f24cc29..8796317fadb3 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -185,7 +185,10 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu UInt64 offset = 0; if (!stream_for_prefix && mark_number) + { + std::lock_guard marks_lock(file_it->second.marks_mutex); offset = file_it->second.marks[mark_number].offset; + } auto & data_file_path = file_it->second.data_file_path; auto it = streams.try_emplace(stream_name, storage.disk, data_file_path, offset, max_read_buffer_size).first; @@ -459,7 +462,10 @@ void LogSink::writeMarks(MarksForColumns && marks) writeIntBinary(mark.second.offset, *marks_stream); size_t column_index = mark.first; - storage.files[storage.column_names_by_idx[column_index]].marks.push_back(mark.second); + + auto & file = storage.files[storage.column_names_by_idx[column_index]]; + std::lock_guard marks_lock(file.marks_mutex); + file.marks.push_back(mark.second); } } diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 116bdc315208..e1955ccb0a7d 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -81,6 +81,8 @@ class StorageLog final : public shared_ptr_helper, public IStorage size_t column_index; String data_file_path; + + std::mutex marks_mutex; Marks marks; }; using Files = std::map; /// file name -> column data From b976da904b8eacc75746edebae133a1d1b0b307e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 Oct 2021 05:12:01 +0300 Subject: [PATCH 119/472] Backport #29959 to 21.9: Fix possible data-race between FileChecker and StorageLog/StorageStripeLog --- src/Storages/StorageLog.cpp | 7 ++++--- src/Storages/StorageStripeLog.cpp | 5 +---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 0e156f24cc29..08faf7119c69 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -24,6 +24,7 @@ #include #include "StorageLogSettings.h" #include +#include #include #include @@ -122,9 +123,6 @@ Chunk LogSource::generate() if (rows_read == rows_limit) return {}; - if (storage.file_checker.empty()) - return {}; - /// How many rows to read for the next block. size_t max_rows_to_read = std::min(block_size, rows_limit - rows_read); std::unordered_map caches; @@ -670,6 +668,9 @@ Pipe StorageLog::read( if (!lock) throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + if (file_checker.empty()) + return Pipe(std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()))); + Pipes pipes; const Marks & marks = getMarksWithRealRowCount(metadata_snapshot); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 6bf91a145ede..a712ca39d9fd 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -98,9 +98,6 @@ class StripeLogSource final : public SourceWithProgress protected: Chunk generate() override { - if (storage.file_checker.empty()) - return {}; - Block res; start(); @@ -340,7 +337,7 @@ Pipe StorageStripeLog::read( Pipes pipes; String index_file = table_path + "index.mrk"; - if (!disk->exists(index_file)) + if (file_checker.empty() || !disk->exists(index_file)) { return Pipe(std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()))); } From b45e73afa70135a2537088cb5bfcea6a93429ba2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 Oct 2021 15:17:10 +0300 Subject: [PATCH 120/472] Backport #30189 to 21.9: Fix INSERT SELECT incorrectly fills MATERIALIZED column based of Nullable column --- src/Interpreters/inplaceBlockConversions.cpp | 20 ++++++++++++++++++- ...02053_INSERT_SELECT_MATERIALIZED.reference | 2 ++ .../02053_INSERT_SELECT_MATERIALIZED.sql | 6 ++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02053_INSERT_SELECT_MATERIALIZED.reference create mode 100644 tests/queries/0_stateless/02053_INSERT_SELECT_MATERIALIZED.sql diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index 4ba924821d71..2841abe757e2 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -61,7 +61,25 @@ void addDefaultRequiredExpressionsRecursively( added_columns.emplace(required_column_name); for (const auto & next_required_column_name : required_columns_names) - addDefaultRequiredExpressionsRecursively(block, next_required_column_name, required_column_type, columns, default_expr_list_accum, added_columns, null_as_default); + { + /// Required columns of the default expression should not be converted to NULL, + /// since this map value to default and MATERIALIZED values will not work. + /// + /// Consider the following structure: + /// - A Nullable(Int64) + /// - X Int64 materialized coalesce(A, -1) + /// + /// With recursive_null_as_default=true you will get: + /// + /// _CAST(coalesce(A, -1), 'Int64') AS X, NULL AS A + /// + /// And this will ignore default expression. + bool recursive_null_as_default = false; + addDefaultRequiredExpressionsRecursively(block, + next_required_column_name, required_column_type, + columns, default_expr_list_accum, added_columns, + recursive_null_as_default); + } } else if (columns.has(required_column_name)) { diff --git a/tests/queries/0_stateless/02053_INSERT_SELECT_MATERIALIZED.reference b/tests/queries/0_stateless/02053_INSERT_SELECT_MATERIALIZED.reference new file mode 100644 index 000000000000..5154881396a7 --- /dev/null +++ b/tests/queries/0_stateless/02053_INSERT_SELECT_MATERIALIZED.reference @@ -0,0 +1,2 @@ +1 42 42 +1 42 42 diff --git a/tests/queries/0_stateless/02053_INSERT_SELECT_MATERIALIZED.sql b/tests/queries/0_stateless/02053_INSERT_SELECT_MATERIALIZED.sql new file mode 100644 index 000000000000..e9ea0c9f016d --- /dev/null +++ b/tests/queries/0_stateless/02053_INSERT_SELECT_MATERIALIZED.sql @@ -0,0 +1,6 @@ +-- Test from https://github.com/ClickHouse/ClickHouse/issues/29729 +create table data_02053 (id Int64, A Nullable(Int64), X Int64 materialized coalesce(A, -1)) engine=MergeTree order by id; +insert into data_02053 values (1, 42); +-- Due to insert_null_as_default A became Null and X became -1 +insert into data_02053 select 1, 42; +select *, X from data_02053 order by id; From 7e8e158799c538ab700d3915312347f0ae547652 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 16 Oct 2021 03:19:28 +0300 Subject: [PATCH 121/472] Backport #30238 to 21.9: FlatDictionary, HashedDictionary fix bytes_allocated calculation for nullable attributes --- src/Dictionaries/FlatDictionary.cpp | 5 +++++ src/Dictionaries/HashedDictionary.cpp | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 26667db1081b..11dca6635ce8 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -405,6 +405,11 @@ void FlatDictionary::calculateBytesAllocated() }; callOnDictionaryAttributeType(attribute.type, type_call); + + bytes_allocated += sizeof(attribute.is_nullable_set); + + if (attribute.is_nullable_set.has_value()) + bytes_allocated = attribute.is_nullable_set->getBufferSizeInBytes(); } if (update_field_loaded_block) diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index d462631fba88..d91142785563 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -627,6 +627,11 @@ void HashedDictionary::calculateBytesAllocated() if (attributes[i].string_arena) bytes_allocated += attributes[i].string_arena->size(); + + bytes_allocated += sizeof(attributes[i].is_nullable_set); + + if (attributes[i].is_nullable_set.has_value()) + bytes_allocated = attributes[i].is_nullable_set->getBufferSizeInBytes(); } bytes_allocated += complex_key_arena.size(); From 2e738a6f66b6feb2cfecd48f8e391e658ad5bc55 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 16 Oct 2021 07:19:05 +0300 Subject: [PATCH 122/472] Backport #29998 to 21.9: Update BoringSSL --- .gitmodules | 1 + contrib/boringssl | 2 +- contrib/boringssl-cmake/CMakeLists.txt | 49 ++++++++++++++++++-------- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/.gitmodules b/.gitmodules index 37b22527eb46..6885b7927803 100644 --- a/.gitmodules +++ b/.gitmodules @@ -213,6 +213,7 @@ [submodule "contrib/boringssl"] path = contrib/boringssl url = https://github.com/ClickHouse-Extras/boringssl.git + branch = MergeWithUpstream [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse-Extras/NuRaft.git diff --git a/contrib/boringssl b/contrib/boringssl index a6a2e2ab3e44..c1e01a441d6d 160000 --- a/contrib/boringssl +++ b/contrib/boringssl @@ -1 +1 @@ -Subproject commit a6a2e2ab3e44d97ce98e51c558e989f211de7eb3 +Subproject commit c1e01a441d6db234f4f12e63a7657d1f9e6db9c1 diff --git a/contrib/boringssl-cmake/CMakeLists.txt b/contrib/boringssl-cmake/CMakeLists.txt index 9d8c6ca60836..c827775e511f 100644 --- a/contrib/boringssl-cmake/CMakeLists.txt +++ b/contrib/boringssl-cmake/CMakeLists.txt @@ -4,7 +4,7 @@ # This file is created by generate_build_files.py and edited accordingly. -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.5) project(BoringSSL LANGUAGES C CXX) @@ -20,12 +20,7 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fno-common") - if((CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") OR CLANG) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11") - else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") - endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common -std=c11") endif() # pthread_rwlock_t requires a feature flag. @@ -55,7 +50,7 @@ add_definitions(-DBORINGSSL_IMPLEMENTATION) # builds. if(NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES) list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES) - if(NOT ${NUM_ARCHES} EQUAL 1) + if(NOT NUM_ARCHES EQUAL 1) message(FATAL_ERROR "Universal binaries not supported.") endif() list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR) @@ -78,7 +73,13 @@ elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") set(ARCH "x86") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386") - set(ARCH "x86") + # cmake uses `uname -p` to set the system processor, but Solaris + # systems support multiple architectures. + if((${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") AND CMAKE_SIZEOF_VOID_P EQUAL 8) + set(ARCH "x86_64") + else() + set(ARCH "x86") + endif() elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686") set(ARCH "x86") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") @@ -289,6 +290,21 @@ set( mac-x86_64/crypto/test/trampoline-x86_64.S ) +set( + CRYPTO_win_aarch64_SOURCES + + win-aarch64/crypto/chacha/chacha-armv8.S + win-aarch64/crypto/fipsmodule/aesv8-armx64.S + win-aarch64/crypto/fipsmodule/armv8-mont.S + win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S + win-aarch64/crypto/fipsmodule/ghashv8-armx64.S + win-aarch64/crypto/fipsmodule/sha1-armv8.S + win-aarch64/crypto/fipsmodule/sha256-armv8.S + win-aarch64/crypto/fipsmodule/sha512-armv8.S + win-aarch64/crypto/fipsmodule/vpaes-armv8.S + win-aarch64/crypto/test/trampoline-armv8.S +) + set( CRYPTO_win_x86_SOURCES @@ -331,9 +347,9 @@ set( win-x86_64/crypto/test/trampoline-x86_64.asm ) -if(APPLE AND ${ARCH} STREQUAL "aarch64") +if(APPLE AND ARCH STREQUAL "aarch64") set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES}) -elseif(APPLE AND ${ARCH} STREQUAL "arm") +elseif(APPLE AND ARCH STREQUAL "arm") set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES}) elseif(APPLE) set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES}) @@ -360,6 +376,7 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c" + "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c" @@ -389,6 +406,7 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c" "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c" "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c" + "${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c" "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c" "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c" "${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c" @@ -413,6 +431,7 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c" + "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c" @@ -452,7 +471,6 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c" "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c" "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c" - "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c" "${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c" "${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c" "${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c" @@ -499,13 +517,13 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c" - "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c" + "${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c" @@ -519,7 +537,6 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_ext.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_lu.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_obj.c" - "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_r2x.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_req.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_set.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_trs.c" @@ -589,6 +606,8 @@ add_library( "${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc" "${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc" "${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc" + "${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc" + "${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc" @@ -611,7 +630,6 @@ add_library( "${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc" "${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc" "${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc" - "${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc" @@ -633,6 +651,7 @@ add_executable( "${BORINGSSL_SOURCE_DIR}/tool/digest.cc" "${BORINGSSL_SOURCE_DIR}/tool/fd.cc" "${BORINGSSL_SOURCE_DIR}/tool/file.cc" + "${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc" "${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc" "${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc" "${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc" From 56ddc8a6a29cde3a0ed73dc48dceb2d8756d5f6b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 16 Oct 2021 15:22:13 +0300 Subject: [PATCH 123/472] Backport #30246 to 21.9: ComplexKeyHashedDictionary fix config parsing --- src/Dictionaries/HashedDictionary.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index d462631fba88..33da27c641f5 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -732,8 +732,18 @@ void registerDictionaryHashed(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - const std::string & layout_prefix = sparse ? ".layout.sparse_hashed" : ".layout.hashed"; - const bool preallocate = config.getBool(config_prefix + layout_prefix + ".preallocate", false); + std::string dictionary_layout_name; + + if (dictionary_key_type == DictionaryKeyType::Simple) + dictionary_layout_name = "hashed"; + else + dictionary_layout_name = "complex_key_hashed"; + + if (sparse) + dictionary_layout_name = "sparse_" + dictionary_layout_name; + + const std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name; + const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false); HashedDictionaryStorageConfiguration configuration{preallocate, require_nonempty, dict_lifetime}; From 6ff0a59c831755250395d8fe14669918409e56eb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 17 Oct 2021 09:24:57 +0300 Subject: [PATCH 124/472] Backport #30243 to 21.9: Fix crash with shortcircuit and LowCardinality in multiIf --- src/Functions/multiIf.cpp | 22 ++++--- ...owcardinality_shortcircuit_crash.reference | 60 +++++++++++++++++++ ...2049_lowcardinality_shortcircuit_crash.sql | 45 ++++++++++++++ 3 files changed, 115 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference create mode 100644 tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 1122d4892c61..3e5242d5f9b3 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -124,8 +124,8 @@ class FunctionMultiIf final : public FunctionIfBase */ struct Instruction { - const IColumn * condition = nullptr; - const IColumn * source = nullptr; + IColumn::Ptr condition = nullptr; + IColumn::Ptr source = nullptr; bool condition_always_true = false; bool condition_is_nullable = false; @@ -160,15 +160,15 @@ class FunctionMultiIf final : public FunctionIfBase } else { - const ColumnWithTypeAndName & cond_col = arguments[i]; + IColumn::Ptr cond_col = arguments[i].column->convertToFullColumnIfLowCardinality(); /// We skip branches that are always false. /// If we encounter a branch that is always true, we can finish. - if (cond_col.column->onlyNull()) + if (cond_col->onlyNull()) continue; - if (const auto * column_const = checkAndGetColumn(*cond_col.column)) + if (const auto * column_const = checkAndGetColumn(*cond_col)) { Field value = column_const->getField(); @@ -181,26 +181,24 @@ class FunctionMultiIf final : public FunctionIfBase } else { - if (isColumnNullable(*cond_col.column)) - instruction.condition_is_nullable = true; - - instruction.condition = cond_col.column.get(); + instruction.condition = cond_col; + instruction.condition_is_nullable = instruction.condition->isNullable(); } - instruction.condition_is_short = cond_col.column->size() < arguments[0].column->size(); + instruction.condition_is_short = cond_col->size() < arguments[0].column->size(); } const ColumnWithTypeAndName & source_col = arguments[source_idx]; instruction.source_is_short = source_col.column->size() < arguments[0].column->size(); if (source_col.type->equals(*return_type)) { - instruction.source = source_col.column.get(); + instruction.source = source_col.column; } else { /// Cast all columns to result type. converted_columns_holder.emplace_back(castColumn(source_col, return_type)); - instruction.source = converted_columns_holder.back().get(); + instruction.source = converted_columns_holder.back(); } if (instruction.source && isColumnConst(*instruction.source)) diff --git a/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference new file mode 100644 index 000000000000..c84236dce7d2 --- /dev/null +++ b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference @@ -0,0 +1,60 @@ +0 0 +1 1 +2 2 +3 3 +4 40 +5 50 +6 60 +7 70 +8 800 +9 900 +10 1000 +11 1100 +12 12000 +13 13000 +14 14000 +15 15000 +16 160000 +17 170000 +18 180000 +19 190000 +0 0 +1 1 +2 2 +3 3 +4 40 +5 50 +6 60 +7 70 +8 80000 +9 90000 +10 100000 +11 110000 +12 120000 +13 130000 +14 140000 +15 150000 +16 160000 +17 170000 +18 180000 +19 190000 +0 0 +1 1 +2 2 +3 3 +4 40 +5 50 +6 60 +7 70 +8 800 +9 900 +10 1000 +11 1100 +12 12000 +13 13000 +14 14000 +15 15000 +16 160000 +17 170000 +18 180000 +19 190000 diff --git a/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql new file mode 100644 index 000000000000..2a837380250b --- /dev/null +++ b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql @@ -0,0 +1,45 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/30231 +SELECT * +FROM ( + SELECT number, + multiIf( + CAST(number < 4, 'UInt8'), toString(number), + CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10), + CAST(number < 12, 'Nullable(UInt8)'), toString(number * 100), + CAST(number < 16, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000), + toString(number * 10000)) as m + FROM system.numbers + LIMIT 20 + ) +ORDER BY number +SETTINGS short_circuit_function_evaluation='enable'; + +SELECT * +FROM ( + SELECT number, + multiIf( + CAST(number < 4, 'UInt8'), toString(number), + CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10), + CAST(NULL, 'Nullable(UInt8)'), toString(number * 100), + CAST(NULL, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000), + toString(number * 10000)) as m + FROM system.numbers + LIMIT 20 + ) +ORDER BY number +SETTINGS short_circuit_function_evaluation='enable'; + +SELECT * +FROM ( + SELECT number, + multiIf( + CAST(number < 4, 'UInt8'), toString(number), + CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10)::LowCardinality(String), + CAST(number < 12, 'Nullable(UInt8)'), toString(number * 100)::Nullable(String), + CAST(number < 16, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000)::LowCardinality(Nullable(String)), + toString(number * 10000)) as m + FROM system.numbers + LIMIT 20 + ) +ORDER BY number +SETTINGS short_circuit_function_evaluation='enable'; From 41926ed7385631706a554feb9959b1c365f702c4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Oct 2021 11:17:13 +0300 Subject: [PATCH 125/472] Drop totals for global in. --- src/Interpreters/GlobalSubqueriesVisitor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 99197e81f803..e14a7da3ae0f 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -158,6 +158,7 @@ class GlobalSubqueriesMatcher auto external_table = external_storage_holder->getTable(); auto table_out = external_table->write({}, external_table->getInMemoryMetadataPtr(), getContext()); auto io = interpreter->execute(); + io.pipeline.dropTotalsAndExtremes(); io.pipeline.resize(1); io.pipeline.setSinks([&](const Block &, Pipe::StreamType) -> ProcessorPtr { From 8f5ab67599b59500dc3463b02c984281a01f7381 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 18 Oct 2021 17:36:56 +0300 Subject: [PATCH 126/472] Backport #30230 to 21.9: Allow identifiers starting with numbers in multiple joins --- .../JoinToSubqueryTransformVisitor.cpp | 5 ++-- .../01120_join_constants.reference | 2 ++ .../0_stateless/01120_join_constants.sql | 18 +++++++++++++ ...96_join_unusual_identifier_begin.reference | 2 ++ .../02096_join_unusual_identifier_begin.sql | 27 +++++++++++++++++++ 5 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference create mode 100644 tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index eabdeaefc049..ae0d267c3b63 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -18,7 +18,7 @@ #include #include #include - +#include namespace DB { @@ -524,7 +524,8 @@ std::vector normalizeColumnNamesExtractNeeded( size_t count = countTablesWithColumn(tables, short_name); - if (count > 1 || aliases.count(short_name)) + /// isValidIdentifierBegin retuired to be consistent with TableJoin::deduplicateAndQualifyColumnNames + if (count > 1 || aliases.count(short_name) || !isValidIdentifierBegin(short_name.at(0))) { const auto & table = tables[*table_pos]; IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column diff --git a/tests/queries/0_stateless/01120_join_constants.reference b/tests/queries/0_stateless/01120_join_constants.reference index a16427fbdf7e..91838e7a2bb0 100644 --- a/tests/queries/0_stateless/01120_join_constants.reference +++ b/tests/queries/0_stateless/01120_join_constants.reference @@ -1,2 +1,4 @@ 1 hello 1 world world 1 2 hello 0 world 1 +1 321 1 123 123 1 +2 321 0 0 123 1 diff --git a/tests/queries/0_stateless/01120_join_constants.sql b/tests/queries/0_stateless/01120_join_constants.sql index 443559c3ea11..d6d6a1be43b8 100644 --- a/tests/queries/0_stateless/01120_join_constants.sql +++ b/tests/queries/0_stateless/01120_join_constants.sql @@ -15,3 +15,21 @@ LEFT JOIN arrayJoin([1, 3]) AS k, 'world' ) AS t2 ON t1.k = t2.k; + +SELECT + t1.*, + t2.*, + 123, + isConstant('world') +FROM +( + SELECT + arrayJoin([1, 2]) AS k, + 321 +) AS t1 +LEFT JOIN +( + SELECT + arrayJoin([1, 3]) AS k, + 123 +) AS t2 ON t1.k = t2.k; diff --git a/tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference new file mode 100644 index 000000000000..e8cc5e526c03 --- /dev/null +++ b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference @@ -0,0 +1,2 @@ +1 1 1 1 1 1 +1 diff --git a/tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql new file mode 100644 index 000000000000..fc6be2eff7b0 --- /dev/null +++ b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +CREATE TABLE t1 ( `a1` Int64, `1a1` Int64 ) ENGINE = Memory; +INSERT INTO t1 VALUES (1, 1); + +CREATE TABLE t2 ( `b1` Int64, `1b1` Int64 ) ENGINE = Memory; +INSERT INTO t2 VALUES (1, 1); + +CREATE TABLE t3 ( `c1` Int64, `1c1` Int64 ) ENGINE = Memory; +INSERT INTO t3 VALUES (1, 1); + +SELECT + * +FROM t1 AS t1 +INNER JOIN t2 AS t2 ON t1.a1 = t2.b1 +INNER JOIN t3 AS t3 ON t1.a1 = t3.c1; + +SELECT t2.`1b1` FROM t1 JOIN t2 ON a1 = b1; + +-- Without quialification it doesn't work: +-- SELECT `1b1` FROM t1 JOIN t2 ON a1 = b1; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; From 3b5916aac47c363a33dc57b34f9974a559731f46 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 19 Oct 2021 01:48:06 +0300 Subject: [PATCH 127/472] Auto version update to [21.9.5.16] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 3232810fdce8..dadb3f819280 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) SET(VERSION_PATCH 5) -SET(VERSION_GITHASH 6a82e988c12d80f628303b23974a32cd0dc6480e) -SET(VERSION_DESCRIBE v21.9.5.1-stable) -SET(VERSION_STRING 21.9.5.1) +SET(VERSION_GITHASH f78bedb6c1c54627bc68eab774f1a2413bfc4486) +SET(VERSION_DESCRIBE v21.9.5.16-stable) +SET(VERSION_STRING 21.9.5.16) # end of autochange From 1c69add293da405e3ca73f480235e3e4e9195176 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 19 Oct 2021 01:51:58 +0300 Subject: [PATCH 128/472] Auto version update to [21.9.6.1] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index dadb3f819280..0768f3e98d25 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) -SET(VERSION_PATCH 5) +SET(VERSION_PATCH 6) SET(VERSION_GITHASH f78bedb6c1c54627bc68eab774f1a2413bfc4486) -SET(VERSION_DESCRIBE v21.9.5.16-stable) -SET(VERSION_STRING 21.9.5.16) +SET(VERSION_DESCRIBE v21.9.6.1-stable) +SET(VERSION_STRING 21.9.6.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index 65cde08e0c0d..ff435d6b66c9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.9.5.1) unstable; urgency=low +clickhouse (21.9.6.1) unstable; urgency=low * Modified source code - -- clickhouse-release Fri, 24 Sep 2021 14:53:31 +0300 + -- clickhouse-release Tue, 19 Oct 2021 01:51:55 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 22a5e4b526b8..0d82c882d734 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.5.* +ARG version=21.9.6.* RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 3398aae7f3d4..aa2b9bba8cbd 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.5.* +ARG version=21.9.6.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 17c5e6c17493..1f51efee0591 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.5.* +ARG version=21.9.6.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 3d1f277b8c990edf0c0a96e0a6748c36b7be353c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 19 Oct 2021 17:44:39 +0300 Subject: [PATCH 129/472] Backport #30292 to 21.9: fix replaceRegexpAll bug --- src/Functions/ReplaceRegexpImpl.h | 16 +++++++++++++++- .../02100_replaceRegexpAll_bug.reference | 11 +++++++++++ .../0_stateless/02100_replaceRegexpAll_bug.sql | 14 ++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference create mode 100644 tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 3e80dd5b3373..678189f85588 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -96,6 +96,9 @@ struct ReplaceRegexpImpl re2_st::StringPiece matches[max_captures]; size_t start_pos = 0; + bool is_first_match = true; + bool is_start_pos_added_one = false; + while (start_pos < static_cast(input.length())) { /// If no more replacements possible for current string @@ -103,6 +106,9 @@ struct ReplaceRegexpImpl if (searcher.Match(input, start_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) { + if (is_start_pos_added_one) + start_pos -= 1; + const auto & match = matches[0]; size_t bytes_to_copy = (match.data() - input.data()) - start_pos; @@ -112,6 +118,13 @@ struct ReplaceRegexpImpl res_offset += bytes_to_copy; start_pos += bytes_to_copy + match.length(); + /// To avoid infinite loop. + if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) + { + start_pos += 1; + is_start_pos_added_one = true; + } + /// Do substitution instructions for (const auto & it : instructions) { @@ -129,8 +142,9 @@ struct ReplaceRegexpImpl } } - if (replace_one || match.length() == 0) /// Stop after match of zero length, to avoid infinite loop. + if (replace_one || (!is_first_match && match.length() == 0)) can_finish_current_string = true; + is_first_match = false; } else can_finish_current_string = true; diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference new file mode 100644 index 000000000000..993dd9b1cdea --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference @@ -0,0 +1,11 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql new file mode 100644 index 000000000000..32f7f63f6d07 --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql @@ -0,0 +1,14 @@ +SELECT 'aaaabb ' == trim(leading 'b ' FROM 'b aaaabb ') x; +SELECT 'b aaaa' == trim(trailing 'b ' FROM 'b aaaabb ') x; +SELECT 'aaaa' == trim(both 'b ' FROM 'b aaaabb ') x; + +SELECT '1' == replaceRegexpAll(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll(',,1', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll('1,,', '^[,]*|[,]*$', '') x; + +SELECT '1,,' == replaceRegexpOne(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpOne(',,1', '^[,]*|[,]*$', '') x; +SELECT '1,,' == replaceRegexpOne('1,,', '^[,]*|[,]*$', '') x; + +SELECT '5935,5998,6014' == trim(BOTH ', ' FROM '5935,5998,6014, ') x; +SELECT '5935,5998,6014' == replaceRegexpAll('5935,5998,6014, ', concat('^[', regexpQuoteMeta(', '), ']*|[', regexpQuoteMeta(', '), ']*$'), '') AS x; From 6f3dbeb65beddfe9fedd9341882c4b397b6c033a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 20 Oct 2021 07:40:50 +0300 Subject: [PATCH 130/472] Backport #30358 to 21.9: Fix ca-bundle.crt in kerberized_hadoop/Dockerfile --- .../test/integration/kerberized_hadoop/Dockerfile | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 6a2fd96e7a70..5f256e350a6b 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -1,16 +1,22 @@ # docker build -t yandex/clickhouse-kerberized-hadoop . FROM sequenceiq/hadoop-docker:2.7.0 -RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo + +RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo + +# https://community.letsencrypt.org/t/rhel-centos-6-openssl-client-compatibility-after-dst-root-ca-x3-expiration/161032/81 +RUN sed -i s/xMDkzMDE0MDExNVow/0MDkzMDE4MTQwM1ow/ /etc/pki/tls/certs/ca-bundle.crt + RUN yum clean all && \ rpm --rebuilddb && \ yum -y update && \ yum -y install yum-plugin-ovl && \ yum --quiet -y install krb5-workstation.x86_64 + RUN cd /tmp && \ - curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ + curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ tar xzf commons-daemon-1.0.15-src.tar.gz && \ cd commons-daemon-1.0.15-src/src/native/unix && \ ./configure && \ From 5716c63d2fa38bcfe920d0b6f3306b4b55b45c0b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 20 Oct 2021 19:42:49 +0300 Subject: [PATCH 131/472] Backport #30177 to 21.9: Support nullable arguments in function `initializeAggregation` --- src/Functions/initializeAggregation.cpp | 1 + .../02097_initializeAggregationNullable.reference | 6 ++++++ .../0_stateless/02097_initializeAggregationNullable.sql | 8 ++++++++ 3 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/02097_initializeAggregationNullable.reference create mode 100644 tests/queries/0_stateless/02097_initializeAggregationNullable.sql diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index 2f35ef26b1af..3ead6e306fd3 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -40,6 +40,7 @@ class FunctionInitializeAggregation : public IFunction, private WithContext bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; diff --git a/tests/queries/0_stateless/02097_initializeAggregationNullable.reference b/tests/queries/0_stateless/02097_initializeAggregationNullable.reference new file mode 100644 index 000000000000..6d2e42f2ca6e --- /dev/null +++ b/tests/queries/0_stateless/02097_initializeAggregationNullable.reference @@ -0,0 +1,6 @@ +1 +AggregateFunction(uniqExact, Nullable(String)) +1 +AggregateFunction(uniqExact, Nullable(UInt8)) +1 +1 diff --git a/tests/queries/0_stateless/02097_initializeAggregationNullable.sql b/tests/queries/0_stateless/02097_initializeAggregationNullable.sql new file mode 100644 index 000000000000..aa4e6d475791 --- /dev/null +++ b/tests/queries/0_stateless/02097_initializeAggregationNullable.sql @@ -0,0 +1,8 @@ +SELECT finalizeAggregation(initializeAggregation('uniqExactState', toNullable('foo'))); +SELECT toTypeName(initializeAggregation('uniqExactState', toNullable('foo'))); + +SELECT finalizeAggregation(initializeAggregation('uniqExactState', toNullable(123))); +SELECT toTypeName(initializeAggregation('uniqExactState', toNullable(123))); + +SELECT initializeAggregation('uniqExactState', toNullable('foo')) = arrayReduce('uniqExactState', [toNullable('foo')]); +SELECT initializeAggregation('uniqExactState', toNullable(123)) = arrayReduce('uniqExactState', [toNullable(123)]); From c85ee30dabf09047d3af4494d4be7a086dbf9d9a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 20 Oct 2021 21:43:53 +0300 Subject: [PATCH 132/472] Backport #30309 to 21.9: Fix symlinks in file table function --- src/Common/filesystemHelpers.cpp | 7 ++-- src/Common/filesystemHelpers.h | 4 ++- src/Dictionaries/FileDictionarySource.cpp | 5 +-- src/Dictionaries/LibraryDictionarySource.cpp | 11 ++----- src/Storages/StorageFile.cpp | 10 ++++-- .../02051_symlinks_to_user_files.reference | 1 + .../02051_symlinks_to_user_files.sh | 32 +++++++++++++++++++ 7 files changed, 53 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/02051_symlinks_to_user_files.reference create mode 100755 tests/queries/0_stateless/02051_symlinks_to_user_files.sh diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 89214ad496e9..f9fe8c97a14a 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -118,7 +118,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p return absolute_path.starts_with(absolute_prefix_path); } -bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) +bool fileOrSymlinkPathStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) { /// Differs from pathStartsWith in how `path` is normalized before comparison. /// Make `path` absolute if it was relative and put it into normalized form: remove @@ -140,13 +140,14 @@ bool pathStartsWith(const String & path, const String & prefix_path) return pathStartsWith(filesystem_path, filesystem_prefix_path); } -bool symlinkStartsWith(const String & path, const String & prefix_path) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path) { auto filesystem_path = std::filesystem::path(path); auto filesystem_prefix_path = std::filesystem::path(prefix_path); - return symlinkStartsWith(filesystem_path, filesystem_prefix_path); + return fileOrSymlinkPathStartsWith(filesystem_path, filesystem_prefix_path); } + } diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index 71ef7844ef7c..d769ed8839ce 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -35,7 +35,9 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p /// Returns true if path starts with prefix path bool pathStartsWith(const String & path, const String & prefix_path); -bool symlinkStartsWith(const String & path, const String & prefix_path); +/// Same as pathStartsWith, but without canonization, i.e. allowed to check symlinks. +/// (Path is made absolute and normalized.) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path); } diff --git a/src/Dictionaries/FileDictionarySource.cpp b/src/Dictionaries/FileDictionarySource.cpp index bea14d88d1e8..dc10c0dedbd4 100644 --- a/src/Dictionaries/FileDictionarySource.cpp +++ b/src/Dictionaries/FileDictionarySource.cpp @@ -32,8 +32,9 @@ FileDictionarySource::FileDictionarySource( , sample_block{sample_block_} , context(context_) { - if (created_from_ddl && !pathStartsWith(filepath, context->getUserFilesPath())) - throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, context->getUserFilesPath()); + auto user_files_path = context->getUserFilesPath(); + if (created_from_ddl && !fileOrSymlinkPathStartsWith(filepath, user_files_path)) + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path); } diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index f2c5cefa5436..73e6e43dea7b 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -41,14 +41,9 @@ LibraryDictionarySource::LibraryDictionarySource( , sample_block{sample_block_} , context(Context::createCopy(context_)) { - bool path_checked = false; - if (fs::is_symlink(path)) - path_checked = symlinkStartsWith(path, context->getDictionariesLibPath()); - else - path_checked = pathStartsWith(path, context->getDictionariesLibPath()); - - if (created_from_ddl && !path_checked) - throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, context->getDictionariesLibPath()); + auto dictionaries_lib_path = context->getDictionariesLibPath(); + if (created_from_ddl && !fileOrSymlinkPathStartsWith(path, dictionaries_lib_path)) + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, dictionaries_lib_path); if (!fs::exists(path)) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "LibraryDictionarySource: Can't load library {}: file doesn't exist", path); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index cc8e397b6682..55c780f27a1d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -123,8 +124,8 @@ void checkCreationIsAllowed(ContextPtr context_global, const std::string & db_di return; /// "/dev/null" is allowed for perf testing - if (!startsWith(table_path, db_dir_path) && table_path != "/dev/null") - throw Exception("File is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (!fileOrSymlinkPathStartsWith(table_path, db_dir_path) && table_path != "/dev/null") + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File `{}` is not inside `{}`", table_path, db_dir_path); if (fs::exists(table_path) && fs::is_directory(table_path)) throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); @@ -139,7 +140,10 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user fs_table_path = user_files_absolute_path / fs_table_path; Strings paths; - const String path = fs::weakly_canonical(fs_table_path); + /// Do not use fs::canonical or fs::weakly_canonical. + /// Otherwise it will not allow to work with symlinks in `user_files_path` directory. + String path = fs::absolute(fs_table_path); + path = fs::path(path).lexically_normal(); /// Normalize path. if (path.find_first_of("*?{") == std::string::npos) { std::error_code error; diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.reference b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh new file mode 100755 index 000000000000..dfdc71e0f0b9 --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# See 01658_read_file_to_string_column.sh +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +FILE_PATH="${user_files_path}/file/" +mkdir -p ${FILE_PATH} +chmod 777 ${FILE_PATH} + +FILE="test_symlink_${CLICKHOUSE_DATABASE}" + +symlink_path=${FILE_PATH}/${FILE} +file_path=$CUR_DIR/${FILE} + +touch ${file_path} +ln -s ${file_path} ${symlink_path} +chmod ugo+w ${symlink_path} + +function cleanup() +{ + rm ${symlink_path} ${file_path} +} +trap cleanup EXIT + +${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'"; +${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')"; + From b2b3961785a5c6b2bbccde2afd628c1210a1690d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 21 Oct 2021 18:00:02 +0300 Subject: [PATCH 133/472] Backport #29686 to 21.9: MaterializedPostgreSQL fix --- .../DatabaseMaterializedPostgreSQL.cpp | 2 +- .../fetchPostgreSQLTableStructure.cpp | 10 ++--- .../fetchPostgreSQLTableStructure.h | 4 +- .../PostgreSQLReplicationHandler.cpp | 38 ++++++++++++++----- .../PostgreSQL/PostgreSQLReplicationHandler.h | 7 +--- 5 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 742eb28c7a45..88b356dcca7d 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -67,7 +67,7 @@ void DatabaseMaterializedPostgreSQL::startSynchronization() settings->materialized_postgresql_tables_list.value); postgres::Connection connection(connection_info); - NameSet tables_to_replicate; + std::set tables_to_replicate; try { tables_to_replicate = replication_handler->fetchRequiredTables(connection); diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 1b77947264e3..a92e41b43167 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -27,9 +27,9 @@ namespace ErrorCodes template -std::unordered_set fetchPostgreSQLTablesList(T & tx, const String & postgres_schema) +std::set fetchPostgreSQLTablesList(T & tx, const String & postgres_schema) { - std::unordered_set tables; + std::set tables; std::string query = fmt::format("SELECT tablename FROM pg_catalog.pg_tables " "WHERE schemaname != 'pg_catalog' AND {}", postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(postgres_schema)); @@ -271,7 +271,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(pqxx::connection & connec } -std::unordered_set fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema) +std::set fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema) { pqxx::ReadTransaction tx(connection); auto result = fetchPostgreSQLTablesList(tx, postgres_schema); @@ -291,10 +291,10 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( bool with_primary_key, bool with_replica_identity_index); template -std::unordered_set fetchPostgreSQLTablesList(pqxx::work & tx, const String & postgres_schema); +std::set fetchPostgreSQLTablesList(pqxx::work & tx, const String & postgres_schema); template -std::unordered_set fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx, const String & postgres_schema); +std::set fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx, const String & postgres_schema); } diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h index 0097287701c1..62f85e7f4140 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h @@ -21,7 +21,7 @@ struct PostgreSQLTableStructure using PostgreSQLTableStructurePtr = std::unique_ptr; -std::unordered_set fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema); +std::set fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema); PostgreSQLTableStructure fetchPostgreSQLTableStructure( pqxx::connection & connection, const String & postgres_table_name, bool use_nulls = true); @@ -32,7 +32,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( bool with_primary_key = false, bool with_replica_identity_index = false); template -std::unordered_set fetchPostgreSQLTablesList(T & tx, const String & postgres_schema); +std::set fetchPostgreSQLTablesList(T & tx, const String & postgres_schema); } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index c8c74d2ddaa0..c1a5e8d4002d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -150,7 +150,7 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) initial_sync(); } /// Always drop replication slot if it is CREATE query and not ATTACH. - else if (!is_attach || new_publication) + else if (!is_attach) { dropReplicationSlot(tx); initial_sync(); @@ -334,7 +334,6 @@ void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::work & tx) { tx.exec(query_str); LOG_TRACE(log, "Created publication {} with tables list: {}", publication_name, tables_list); - new_publication = true; } catch (Exception & e) { @@ -453,7 +452,7 @@ void PostgreSQLReplicationHandler::shutdownFinal() /// Used by MaterializedPostgreSQL database engine. -NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & connection_) +std::set PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & connection_) { pqxx::work tx(connection_.getRef()); NameSet result_tables; @@ -497,6 +496,7 @@ NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & { result_tables = fetchTablesFromPublication(tx); NameSet diff; + std::sort(expected_tables.begin(), expected_tables.end()); std::set_symmetric_difference(expected_tables.begin(), expected_tables.end(), result_tables.begin(), result_tables.end(), std::inserter(diff, diff.begin())); @@ -509,12 +509,30 @@ NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & diff_tables += ", "; diff_tables += table_name; } + String publication_tables; + for (const auto & table_name : result_tables) + { + if (!publication_tables.empty()) + publication_tables += ", "; + publication_tables += table_name; + } + String listed_tables; + for (const auto & table_name : expected_tables) + { + if (!listed_tables.empty()) + listed_tables += ", "; + listed_tables += table_name; + } - LOG_WARNING(log, - "Publication {} already exists, but specified tables list differs from publication tables list in tables: {}.", - publication_name, diff_tables); + LOG_ERROR(log, + "Publication {} already exists, but specified tables list differs from publication tables list in tables: {}. ", + "Will use tables list from setting. " + "To avoid redundant work, you can try ALTER PUBLICATION query to remove redundant tables. " + "Or you can you ALTER SETTING. " + "\nPublication tables: {}.\nTables list: {}", + publication_name, diff_tables, publication_tables, listed_tables); - connection->execWithRetry([&](pqxx::nontransaction & tx_){ dropPublication(tx_); }); + return std::set(expected_tables.begin(), expected_tables.end()); } } } @@ -524,7 +542,7 @@ NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & { if (!tables_list.empty()) { - result_tables = NameSet(expected_tables.begin(), expected_tables.end()); + result_tables = std::set(expected_tables.begin(), expected_tables.end()); } else { @@ -540,10 +558,10 @@ NameSet PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & } -NameSet PostgreSQLReplicationHandler::fetchTablesFromPublication(pqxx::work & tx) +std::set PostgreSQLReplicationHandler::fetchTablesFromPublication(pqxx::work & tx) { std::string query = fmt::format("SELECT tablename FROM pg_publication_tables WHERE pubname = '{}'", publication_name); - std::unordered_set tables; + std::set tables; for (auto table_name : tx.stream(query)) tables.insert(std::get<0>(table_name)); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 3a0bedc08521..5f3990987013 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -43,7 +43,7 @@ class PostgreSQLReplicationHandler void addStorage(const std::string & table_name, StorageMaterializedPostgreSQL * storage); /// Fetch list of tables which are going to be replicated. Used for database engine. - NameSet fetchRequiredTables(postgres::Connection & connection_); + std::set fetchRequiredTables(postgres::Connection & connection_); /// Start replication setup immediately. void startSynchronization(bool throw_on_error); @@ -57,7 +57,7 @@ class PostgreSQLReplicationHandler void createPublicationIfNeeded(pqxx::work & tx); - NameSet fetchTablesFromPublication(pqxx::work & tx); + std::set fetchTablesFromPublication(pqxx::work & tx); void dropPublication(pqxx::nontransaction & ntx); @@ -87,9 +87,6 @@ class PostgreSQLReplicationHandler /// If it is not attach, i.e. a create query, then if publication already exists - always drop it. bool is_attach; - /// If new publication is created at start up - always drop replication slot if it exists. - bool new_publication = false; - const String remote_database_name, current_database_name; /// Connection string and address for logs. From 8d41c1a12051c4c5d69e7792886a97fc3d900c1e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 21 Oct 2021 21:54:20 +0300 Subject: [PATCH 134/472] Backport #30432 to 21.9: Fix segfault on REPLACE PARTITION if session expired --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 021260a7dffc..4ff8ea1bc74a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6391,12 +6391,12 @@ void StorageReplicatedMergeTree::replacePartitionFrom( MutableDataPartsVector dst_parts; Strings block_id_paths; Strings part_checksums; + auto zookeeper = getZooKeeper(); std::vector ephemeral_locks; LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); static const String TMP_PREFIX = "tmp_replace_from_"; - auto zookeeper = getZooKeeper(); String alter_partition_version_path = zookeeper_path + "/alter_partition_version"; Coordination::Stat alter_partition_version_stat; From 7eb9e26f360d44f3f1dc82616455e0e61f4255e0 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 21 Oct 2021 23:35:40 +0300 Subject: [PATCH 135/472] Update PostgreSQLReplicationHandler.cpp --- src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index c1a5e8d4002d..fc6f5bc12f51 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -455,7 +455,7 @@ void PostgreSQLReplicationHandler::shutdownFinal() std::set PostgreSQLReplicationHandler::fetchRequiredTables(postgres::Connection & connection_) { pqxx::work tx(connection_.getRef()); - NameSet result_tables; + std::set result_tables; bool publication_exists_before_startup = isPublicationExist(tx); LOG_DEBUG(log, "Publication exists: {}, is attach: {}", publication_exists_before_startup, is_attach); From ef4f01fef0881d4ebec725cf6d2aecab7356d9d7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 22 Oct 2021 03:53:29 +0300 Subject: [PATCH 136/472] Backport #30502 to 21.9: StorageDictionary fix potential configuration race --- src/Storages/StorageDictionary.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index c8bbb7039994..e4d34c9a24bf 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -217,11 +217,20 @@ void StorageDictionary::renameInMemory(const StorageID & new_table_id) auto old_table_id = getStorageID(); IStorage::renameInMemory(new_table_id); - if (configuration) + bool has_configuration = false; { - configuration->setString("dictionary.database", new_table_id.database_name); - configuration->setString("dictionary.name", new_table_id.table_name); + std::lock_guard lock(dictionary_config_mutex); + if (configuration) + { + has_configuration = true; + configuration->setString("dictionary.database", new_table_id.database_name); + configuration->setString("dictionary.name", new_table_id.table_name); + } + } + + if (has_configuration) + { const auto & external_dictionaries_loader = getContext()->getExternalDictionariesLoader(); auto result = external_dictionaries_loader.getLoadResult(old_table_id.getInternalDictionaryName()); From 66a7439b4a5416afde321e588d1734f79a0794ca Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 23 Oct 2021 16:05:19 +0300 Subject: [PATCH 137/472] Backport #30492 to 21.9: Fix deadlock on ALTER with scalar subquery to the same table --- src/Interpreters/MutationsInterpreter.cpp | 9 +++-- src/Interpreters/TreeRewriter.cpp | 5 +-- src/Interpreters/TreeRewriter.h | 3 +- ...0_alter_scalar_circular_deadlock.reference | 4 +++ .../02100_alter_scalar_circular_deadlock.sql | 34 +++++++++++++++++++ 5 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.reference create mode 100644 tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.sql diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 4e5e3b4e86b1..1a4541752acc 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -732,8 +733,12 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & for (const String & column : stage.output_columns) all_asts->children.push_back(std::make_shared(column)); - auto syntax_result = TreeRewriter(context).analyze(all_asts, all_columns, storage, metadata_snapshot); - if (context->hasQueryContext()) + /// Executing scalar subquery on that stage can lead to deadlock + /// e.g. ALTER referencing the same table in scalar subquery + bool execute_scalar_subqueries = !dry_run; + auto syntax_result = TreeRewriter(context).analyze( + all_asts, all_columns, storage, metadata_snapshot, false, true, execute_scalar_subqueries); + if (execute_scalar_subqueries && context->hasQueryContext()) for (const auto & it : syntax_result->getScalars()) context->getQueryContext()->addScalar(it.first, it.second); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index a1b74fcd7a6c..fe8440eb3a47 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1005,7 +1005,8 @@ TreeRewriterResultPtr TreeRewriter::analyze( ConstStoragePtr storage, const StorageMetadataPtr & metadata_snapshot, bool allow_aggregations, - bool allow_self_aliases) const + bool allow_self_aliases, + bool execute_scalar_subqueries) const { if (query->as()) throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR); @@ -1017,7 +1018,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases); /// Executing scalar subqueries. Column defaults could be a scalar subquery. - executeScalarSubqueries(query, getContext(), 0, result.scalars, false); + executeScalarSubqueries(query, getContext(), 0, result.scalars, !execute_scalar_subqueries); if (settings.legacy_column_name_of_tuple_literal) markTupleLiteralsAsLegacy(query); diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 0dca00c285e1..7fb4c8c09723 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -111,7 +111,8 @@ class TreeRewriter : WithContext ConstStoragePtr storage = {}, const StorageMetadataPtr & metadata_snapshot = {}, bool allow_aggregations = false, - bool allow_self_aliases = true) const; + bool allow_self_aliases = true, + bool execute_scalar_subqueries = true) const; /// Analyze and rewrite select query TreeRewriterResultPtr analyzeSelect( diff --git a/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.reference b/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.reference new file mode 100644 index 000000000000..98fb6a686563 --- /dev/null +++ b/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.sql b/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.sql new file mode 100644 index 000000000000..32b757f54a37 --- /dev/null +++ b/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS foo; + +CREATE TABLE foo (ts DateTime, x UInt64) +ENGINE = MergeTree PARTITION BY toYYYYMMDD(ts) +ORDER BY (ts); + +INSERT INTO foo (ts, x) SELECT toDateTime('2020-01-01 00:05:00'), number from system.numbers_mt LIMIT 10; + +SET mutations_sync = 1; + +ALTER TABLE foo UPDATE x = 1 WHERE x = (SELECT x from foo WHERE x = 4); + +SELECT sum(x) == 42 FROM foo; + +ALTER TABLE foo UPDATE x = 1 WHERE x IN (SELECT x FROM foo WHERE x != 0); + +SELECT sum(x) == 9 FROM foo; + +DROP TABLE IF EXISTS bar; + +CREATE TABLE bar (ts DateTime, x UInt64) +ENGINE = Memory; + +INSERT INTO bar (ts, x) SELECT toDateTime('2020-01-01 00:05:00'), number from system.numbers_mt LIMIT 10; + +SET mutations_sync = 1; + +ALTER TABLE bar UPDATE x = 1 WHERE x = (SELECT x from bar WHERE x = 4); + +SELECT sum(x) == 42 FROM bar; + +ALTER TABLE bar UPDATE x = 1 WHERE x IN (SELECT x FROM bar WHERE x != 0); + +SELECT sum(x) == 9 FROM bar; From 708c57273a90bd7dc03d0d5933f685f4c3d85b55 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 24 Oct 2021 02:34:39 +0300 Subject: [PATCH 138/472] Update test.py --- tests/integration/test_storage_mysql/test.py | 45 -------------------- 1 file changed, 45 deletions(-) diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index 040fc1ed8e2f..956b479807b3 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -319,51 +319,6 @@ def test_external_settings(started_cluster): conn.close() -# Check that limited connection_wait_timeout (via connection_pool_size=1) will throw. -def test_settings_connection_wait_timeout(started_cluster): - table_name = 'test_settings_connection_wait_timeout' - node1.query(f'DROP TABLE IF EXISTS {table_name}') - wait_timeout = 2 - - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) - drop_mysql_table(conn, table_name) - create_mysql_table(conn, table_name) - - node1.query(''' - CREATE TABLE {} - ( - id UInt32, - name String, - age UInt32, - money UInt32 - ) - ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') - SETTINGS connection_wait_timeout={}, connection_pool_size=1 - '''.format(table_name, table_name, wait_timeout) - ) - - node1.query("INSERT INTO {} (id, name) SELECT number, concat('name_', toString(number)) from numbers(10) ".format(table_name)) - - def worker(): - node1.query("SELECT sleepEachRow(1) FROM {}".format(table_name)) - - worker_thread = threading.Thread(target=worker) - worker_thread.start() - - # ensure that first query started in worker_thread - time.sleep(1) - - started = time.time() - with pytest.raises(QueryRuntimeException, match=r"Exception: mysqlxx::Pool is full \(connection_wait_timeout is exceeded\)"): - node1.query("SELECT sleepEachRow(1) FROM {}".format(table_name)) - ended = time.time() - assert (ended - started) >= wait_timeout - - worker_thread.join() - - drop_mysql_table(conn, table_name) - conn.close() - # Regression for (k, v) IN ((k, v)) def test_mysql_in(started_cluster): table_name = 'test_mysql_in' From 852844b7057ed6355c74eda0111f50386ed35e28 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 24 Oct 2021 06:07:55 +0300 Subject: [PATCH 139/472] Backport #30562 to 21.9: Fix LimitStep header after limit push down optimization. --- src/Processors/QueryPlan/LimitStep.cpp | 2 +- .../02100_limit_push_down_bug.reference | 0 .../0_stateless/02100_limit_push_down_bug.sql | 21 +++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02100_limit_push_down_bug.reference create mode 100644 tests/queries/0_stateless/02100_limit_push_down_bug.sql diff --git a/src/Processors/QueryPlan/LimitStep.cpp b/src/Processors/QueryPlan/LimitStep.cpp index 5f5a0bd0d644..91f7e32cc00f 100644 --- a/src/Processors/QueryPlan/LimitStep.cpp +++ b/src/Processors/QueryPlan/LimitStep.cpp @@ -40,7 +40,7 @@ void LimitStep::updateInputStream(DataStream input_stream) { input_streams.clear(); input_streams.emplace_back(std::move(input_stream)); - output_stream = createOutputStream(input_streams.front(), output_stream->header, getDataStreamTraits()); + output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); } void LimitStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) diff --git a/tests/queries/0_stateless/02100_limit_push_down_bug.reference b/tests/queries/0_stateless/02100_limit_push_down_bug.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02100_limit_push_down_bug.sql b/tests/queries/0_stateless/02100_limit_push_down_bug.sql new file mode 100644 index 000000000000..2ba9d2b8818d --- /dev/null +++ b/tests/queries/0_stateless/02100_limit_push_down_bug.sql @@ -0,0 +1,21 @@ +drop table if exists tbl_repr; + +CREATE TABLE tbl_repr( +ts DateTime, +x String) +ENGINE=MergeTree ORDER BY ts; + + +SELECT * +FROM +( + SELECT + x, + length(x) + FROM tbl_repr + WHERE ts > now() + LIMIT 1 +) +WHERE x != ''; + +drop table if exists tbl_repr; From e28dab82226cd8896abf4749ccd3d17f682023ff Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 24 Oct 2021 06:18:52 +0300 Subject: [PATCH 140/472] Backport #28910 to 21.9: Fix bad optimization of ORDER BY in subquery if it contains WITH FILL --- src/Interpreters/DuplicateOrderByVisitor.cpp | 124 ++++++++++++++++++ src/Interpreters/DuplicateOrderByVisitor.h | 86 +----------- src/Interpreters/ExpressionAnalyzer.cpp | 10 +- src/Interpreters/FillingRow.h | 8 +- src/Interpreters/TreeOptimizer.cpp | 27 +++- .../Transforms/FillingTransform.cpp | 13 +- ...der_by_with_fill_misoptimization.reference | 9 ++ ...015_order_by_with_fill_misoptimization.sql | 1 + ...fill_monotonic_functions_removal.reference | 3 + ..._with_fill_monotonic_functions_removal.sql | 6 + ...by_with_fill_redundant_functions.reference | 9 ++ ...order_by_with_fill_redundant_functions.sql | 1 + ...le_with_fill_for_the_same_column.reference | 0 ...multiple_with_fill_for_the_same_column.sql | 1 + .../2019_multiple_weird_with_fill.reference | 45 +++++++ .../2019_multiple_weird_with_fill.sql | 14 ++ 16 files changed, 265 insertions(+), 92 deletions(-) create mode 100644 src/Interpreters/DuplicateOrderByVisitor.cpp create mode 100644 tests/queries/0_stateless/2015_order_by_with_fill_misoptimization.reference create mode 100644 tests/queries/0_stateless/2015_order_by_with_fill_misoptimization.sql create mode 100644 tests/queries/0_stateless/2016_order_by_with_fill_monotonic_functions_removal.reference create mode 100644 tests/queries/0_stateless/2016_order_by_with_fill_monotonic_functions_removal.sql create mode 100644 tests/queries/0_stateless/2017_order_by_with_fill_redundant_functions.reference create mode 100644 tests/queries/0_stateless/2017_order_by_with_fill_redundant_functions.sql create mode 100644 tests/queries/0_stateless/2018_multiple_with_fill_for_the_same_column.reference create mode 100644 tests/queries/0_stateless/2018_multiple_with_fill_for_the_same_column.sql create mode 100644 tests/queries/0_stateless/2019_multiple_weird_with_fill.reference create mode 100644 tests/queries/0_stateless/2019_multiple_weird_with_fill.sql diff --git a/src/Interpreters/DuplicateOrderByVisitor.cpp b/src/Interpreters/DuplicateOrderByVisitor.cpp new file mode 100644 index 000000000000..df063fc849e4 --- /dev/null +++ b/src/Interpreters/DuplicateOrderByVisitor.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_TYPE_OF_AST_NODE; +} + + +namespace +{ + +/// Checks if SELECT has stateful functions +class ASTFunctionStatefulData +{ +public: + using TypeToVisit = ASTFunction; + + ContextPtr context; + bool & is_stateful; + void visit(ASTFunction & ast_function, ASTPtr &) + { + auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(ast_function.name); + + if (aggregate_function_properties && aggregate_function_properties->is_order_dependent) + { + is_stateful = true; + return; + } + + const auto & function = FunctionFactory::instance().tryGet(ast_function.name, context); + + if (function && function->isStateful()) + { + is_stateful = true; + return; + } + } +}; + +using ASTFunctionStatefulMatcher = OneTypeMatcher; +using ASTFunctionStatefulVisitor = InDepthNodeVisitor; + +} + + +void DuplicateOrderByFromSubqueriesData::visit(ASTSelectQuery & select_query, ASTPtr &) +{ + if (done) + return; + done = true; + + if (select_query.orderBy()) + { + /// If we have limits then the ORDER BY is non-removable. + if (select_query.limitBy() + || select_query.limitByOffset() + || select_query.limitByLength() + || select_query.limitLength() + || select_query.limitOffset()) + { + return; + } + + /// If ORDER BY contains filling (in addition to sorting) it is non-removable. + for (const auto & child : select_query.orderBy()->children) + { + auto * ast = child->as(); + if (!ast || ast->children.empty()) + throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + + if (ast->with_fill) + return; + } + + select_query.setExpression(ASTSelectQuery::Expression::ORDER_BY, nullptr); + } +} + +void DuplicateOrderByData::visit(ASTSelectQuery & select_query, ASTPtr &) +{ + if (select_query.orderBy() || select_query.groupBy()) + { + for (auto & elem : select_query.children) + { + if (elem->as()) + { + bool is_stateful = false; + ASTFunctionStatefulVisitor::Data data{context, is_stateful}; + ASTFunctionStatefulVisitor(data).visit(elem); + if (is_stateful) //-V547 + return; + } + } + + if (auto select_table_ptr = select_query.tables()) + { + if (auto * select_table = select_table_ptr->as()) + { + if (!select_table->children.empty()) + { + DuplicateOrderByFromSubqueriesVisitor::Data data{false}; + DuplicateOrderByFromSubqueriesVisitor(data).visit(select_table->children[0]); + } + } + } + } +} + +} + diff --git a/src/Interpreters/DuplicateOrderByVisitor.h b/src/Interpreters/DuplicateOrderByVisitor.h index 4231b2600af0..de8cb4c8f32a 100644 --- a/src/Interpreters/DuplicateOrderByVisitor.h +++ b/src/Interpreters/DuplicateOrderByVisitor.h @@ -1,51 +1,13 @@ #pragma once -#include -#include -#include #include -#include -#include -#include -#include -#include #include -#include -namespace DB -{ -/// Checks if SELECT has stateful functions -class ASTFunctionStatefulData +namespace DB { -public: - using TypeToVisit = ASTFunction; - - ContextPtr context; - bool & is_stateful; - void visit(ASTFunction & ast_function, ASTPtr &) - { - auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(ast_function.name); - - if (aggregate_function_properties && aggregate_function_properties->is_order_dependent) - { - is_stateful = true; - return; - } - - const auto & function = FunctionFactory::instance().tryGet(ast_function.name, context); - - if (function && function->isStateful()) - { - is_stateful = true; - return; - } - } -}; - -using ASTFunctionStatefulMatcher = OneTypeMatcher; -using ASTFunctionStatefulVisitor = InDepthNodeVisitor; +class ASTSelectQuery; /// Erases unnecessary ORDER BY from subquery class DuplicateOrderByFromSubqueriesData @@ -55,19 +17,7 @@ class DuplicateOrderByFromSubqueriesData bool done = false; - void visit(ASTSelectQuery & select_query, ASTPtr &) - { - if (done) - return; - - if (select_query.orderBy() && !select_query.limitBy() && !select_query.limitByOffset() && - !select_query.limitByLength() && !select_query.limitLength() && !select_query.limitOffset()) - { - select_query.setExpression(ASTSelectQuery::Expression::ORDER_BY, nullptr); - } - - done = true; - } + void visit(ASTSelectQuery & select_query, ASTPtr &); }; using DuplicateOrderByFromSubqueriesMatcher = OneTypeMatcher; @@ -82,35 +32,7 @@ class DuplicateOrderByData ContextPtr context; - void visit(ASTSelectQuery & select_query, ASTPtr &) - { - if (select_query.orderBy() || select_query.groupBy()) - { - for (auto & elem : select_query.children) - { - if (elem->as()) - { - bool is_stateful = false; - ASTFunctionStatefulVisitor::Data data{context, is_stateful}; - ASTFunctionStatefulVisitor(data).visit(elem); - if (is_stateful) //-V547 - return; - } - } - - if (auto select_table_ptr = select_query.tables()) - { - if (auto * select_table = select_table_ptr->as()) - { - if (!select_table->children.empty()) - { - DuplicateOrderByFromSubqueriesVisitor::Data data{false}; - DuplicateOrderByFromSubqueriesVisitor(data).visit(select_table->children[0]); - } - } - } - } - } + void visit(ASTSelectQuery & select_query, ASTPtr &); }; using DuplicateOrderByMatcher = OneTypeMatcher; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index c8a5ed6c56af..30ac0148558c 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1227,7 +1227,15 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai { const auto * ast = child->as(); if (!ast || ast->children.empty()) - throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + + if (getContext()->getSettingsRef().enable_positional_arguments) + { + auto new_argument = checkPositionalArgument(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY); + if (new_argument) + ast->children[0] = new_argument; + } + ASTPtr order_expression = ast->children.at(0); step.addRequiredOutput(order_expression->getColumnName()); diff --git a/src/Interpreters/FillingRow.h b/src/Interpreters/FillingRow.h index 434a92707181..604f4b1ee741 100644 --- a/src/Interpreters/FillingRow.h +++ b/src/Interpreters/FillingRow.h @@ -24,14 +24,14 @@ class FillingRow void initFromDefaults(size_t from_pos = 0); - Field & operator[](size_t ind) { return row[ind]; } - const Field & operator[](size_t ind) const { return row[ind]; } + Field & operator[](size_t index) { return row[index]; } + const Field & operator[](size_t index) const { return row[index]; } size_t size() const { return row.size(); } bool operator<(const FillingRow & other) const; bool operator==(const FillingRow & other) const; - int getDirection(size_t ind) const { return description[ind].direction; } - FillColumnDescription & getFillDescription(size_t ind) { return description[ind].fill_description; } + int getDirection(size_t index) const { return description[index].direction; } + FillColumnDescription & getFillDescription(size_t index) { return description[index].fill_description; } private: Row row; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c1a265d9a06d..04201b97573d 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -39,6 +39,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int UNKNOWN_TYPE_OF_AST_NODE; } namespace @@ -263,7 +264,8 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) String name = elem->children.front()->getColumnName(); const auto & order_by_elem = elem->as(); - if (elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second) + if (order_by_elem.with_fill /// Always keep elements WITH FILL as they affects other. + || elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second) unique_elems.emplace_back(elem); } @@ -406,6 +408,17 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context if (!order_by) return; + for (const auto & child : order_by->children) + { + auto * order_by_element = child->as(); + + if (!order_by_element || order_by_element->children.empty()) + throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + + if (order_by_element->with_fill) + return; + } + std::unordered_set group_by_hashes; if (auto group_by = select_query->groupBy()) { @@ -421,6 +434,7 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context for (size_t i = 0; i < order_by->children.size(); ++i) { auto * order_by_element = order_by->children[i]->as(); + auto & ast_func = order_by_element->children[0]; if (!ast_func->as()) continue; @@ -456,6 +470,17 @@ void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, Co if (!order_by) return; + for (const auto & child : order_by->children) + { + auto * order_by_element = child->as(); + + if (!order_by_element || order_by_element->children.empty()) + throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + + if (order_by_element->with_fill) + return; + } + std::unordered_set prev_keys; ASTs modified; modified.reserve(order_by->children.size()); diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 831130d06d18..3ff89c302ff8 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -81,7 +81,7 @@ FillingTransform::FillingTransform( }; std::vector is_fill_column(header_.columns()); - for (size_t i = 0; i < sort_description.size(); ++i) + for (size_t i = 0, size = sort_description.size(); i < size; ++i) { size_t block_position = header_.getPositionByName(sort_description[i].column_name); is_fill_column[block_position] = true; @@ -103,6 +103,11 @@ FillingTransform::FillingTransform( } } + std::set unique_positions; + for (auto pos : fill_column_positions) + if (!unique_positions.insert(pos).second) + throw Exception("Multiple WITH FILL for identical expressions is not supported in ORDER BY", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + for (size_t i = 0; i < header_.columns(); ++i) if (!is_fill_column[i]) other_column_positions.push_back(i); @@ -114,7 +119,7 @@ IProcessor::Status FillingTransform::prepare() { should_insert_first = next_row < filling_row; - for (size_t i = 0; i < filling_row.size(); ++i) + for (size_t i = 0, size = filling_row.size(); i < size; ++i) next_row[i] = filling_row.getFillDescription(i).fill_to; if (filling_row < next_row) @@ -227,9 +232,9 @@ void FillingTransform::setResultColumns(Chunk & chunk, MutableColumns & fill_col /// fill_columns always non-empty. size_t num_rows = fill_columns[0]->size(); - for (size_t i = 0; i < fill_columns.size(); ++i) + for (size_t i = 0, size = fill_columns.size(); i < size; ++i) result_columns[fill_column_positions[i]] = std::move(fill_columns[i]); - for (size_t i = 0; i < other_columns.size(); ++i) + for (size_t i = 0, size = other_columns.size(); i < size; ++i) result_columns[other_column_positions[i]] = std::move(other_columns[i]); chunk.setColumns(std::move(result_columns), num_rows); diff --git a/tests/queries/0_stateless/2015_order_by_with_fill_misoptimization.reference b/tests/queries/0_stateless/2015_order_by_with_fill_misoptimization.reference new file mode 100644 index 000000000000..07258cd829ac --- /dev/null +++ b/tests/queries/0_stateless/2015_order_by_with_fill_misoptimization.reference @@ -0,0 +1,9 @@ + + + + + + + + +Hello diff --git a/tests/queries/0_stateless/2015_order_by_with_fill_misoptimization.sql b/tests/queries/0_stateless/2015_order_by_with_fill_misoptimization.sql new file mode 100644 index 000000000000..f0d90f151b2c --- /dev/null +++ b/tests/queries/0_stateless/2015_order_by_with_fill_misoptimization.sql @@ -0,0 +1 @@ +SELECT s FROM (SELECT 5 AS x, 'Hello' AS s ORDER BY x WITH FILL FROM 1 TO 10) ORDER BY s; diff --git a/tests/queries/0_stateless/2016_order_by_with_fill_monotonic_functions_removal.reference b/tests/queries/0_stateless/2016_order_by_with_fill_monotonic_functions_removal.reference new file mode 100644 index 000000000000..264f29a6ecd1 --- /dev/null +++ b/tests/queries/0_stateless/2016_order_by_with_fill_monotonic_functions_removal.reference @@ -0,0 +1,3 @@ +2021-07-07 15:21:00 +2021-07-07 15:21:05 +2021-07-07 15:21:10 diff --git a/tests/queries/0_stateless/2016_order_by_with_fill_monotonic_functions_removal.sql b/tests/queries/0_stateless/2016_order_by_with_fill_monotonic_functions_removal.sql new file mode 100644 index 000000000000..bf232ed5c864 --- /dev/null +++ b/tests/queries/0_stateless/2016_order_by_with_fill_monotonic_functions_removal.sql @@ -0,0 +1,6 @@ +SELECT toStartOfMinute(some_time) AS ts +FROM +( + SELECT toDateTime('2021-07-07 15:21:05') AS some_time +) +ORDER BY ts ASC WITH FILL FROM toDateTime('2021-07-07 15:21:00') TO toDateTime('2021-07-07 15:21:15') STEP 5; diff --git a/tests/queries/0_stateless/2017_order_by_with_fill_redundant_functions.reference b/tests/queries/0_stateless/2017_order_by_with_fill_redundant_functions.reference new file mode 100644 index 000000000000..07193989308c --- /dev/null +++ b/tests/queries/0_stateless/2017_order_by_with_fill_redundant_functions.reference @@ -0,0 +1,9 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/2017_order_by_with_fill_redundant_functions.sql b/tests/queries/0_stateless/2017_order_by_with_fill_redundant_functions.sql new file mode 100644 index 000000000000..6f3e6787c344 --- /dev/null +++ b/tests/queries/0_stateless/2017_order_by_with_fill_redundant_functions.sql @@ -0,0 +1 @@ +SELECT x FROM (SELECT 5 AS x) ORDER BY -x, x WITH FILL FROM 1 TO 10; diff --git a/tests/queries/0_stateless/2018_multiple_with_fill_for_the_same_column.reference b/tests/queries/0_stateless/2018_multiple_with_fill_for_the_same_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/2018_multiple_with_fill_for_the_same_column.sql b/tests/queries/0_stateless/2018_multiple_with_fill_for_the_same_column.sql new file mode 100644 index 000000000000..32b38388cf6d --- /dev/null +++ b/tests/queries/0_stateless/2018_multiple_with_fill_for_the_same_column.sql @@ -0,0 +1 @@ +SELECT x, y FROM (SELECT 5 AS x, 'Hello' AS y) ORDER BY x WITH FILL FROM 3 TO 7, y, x WITH FILL FROM 1 TO 10; -- { serverError 475 } diff --git a/tests/queries/0_stateless/2019_multiple_weird_with_fill.reference b/tests/queries/0_stateless/2019_multiple_weird_with_fill.reference new file mode 100644 index 000000000000..822d290564a8 --- /dev/null +++ b/tests/queries/0_stateless/2019_multiple_weird_with_fill.reference @@ -0,0 +1,45 @@ +3 -10 +3 -9 +3 -8 +3 -7 +3 -6 +3 -5 +3 -4 +3 -3 +3 -2 +4 -10 +4 -9 +4 -8 +4 -7 +4 -6 +4 -5 +4 -4 +4 -3 +4 -2 +5 -10 +5 -9 +5 -8 +5 -7 +5 -6 +5 -5 Hello +5 -4 +5 -3 +5 -2 +6 -10 +6 -9 +6 -8 +6 -7 +6 -6 +6 -5 +6 -4 +6 -3 +6 -2 +7 -10 +7 -9 +7 -8 +7 -7 +7 -6 +7 -5 +7 -4 +7 -3 +7 -2 diff --git a/tests/queries/0_stateless/2019_multiple_weird_with_fill.sql b/tests/queries/0_stateless/2019_multiple_weird_with_fill.sql new file mode 100644 index 000000000000..a2ed33c51ddc --- /dev/null +++ b/tests/queries/0_stateless/2019_multiple_weird_with_fill.sql @@ -0,0 +1,14 @@ +SELECT + x, + -x, + y +FROM +( + SELECT + 5 AS x, + 'Hello' AS y +) +ORDER BY + x ASC WITH FILL FROM 3 TO 7, + y ASC, + -x ASC WITH FILL FROM -10 TO -1; From 35e7ad97d8452ac532a9b1da94b7bbbd5ec54f63 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 24 Oct 2021 09:07:57 +0300 Subject: [PATCH 141/472] Update ExpressionAnalyzer.cpp --- src/Interpreters/ExpressionAnalyzer.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 30ac0148558c..57bf9e28c488 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1229,13 +1229,6 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai if (!ast || ast->children.empty()) throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); - if (getContext()->getSettingsRef().enable_positional_arguments) - { - auto new_argument = checkPositionalArgument(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY); - if (new_argument) - ast->children[0] = new_argument; - } - ASTPtr order_expression = ast->children.at(0); step.addRequiredOutput(order_expression->getColumnName()); From 1d42ba67a8c1c03f16723a08542e87920322ed1c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 25 Oct 2021 20:19:45 +0300 Subject: [PATCH 142/472] Backport #30244 to 21.9: Fix `LIKE` function --- src/Functions/MatchImpl.h | 6 +++--- .../queries/0_stateless/02045_like_function.reference | 2 ++ tests/queries/0_stateless/02045_like_function.sql | 10 ++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02045_like_function.reference create mode 100644 tests/queries/0_stateless/02045_like_function.sql diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index e1e8394f7b15..d518f173d16a 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -198,7 +198,7 @@ struct MatchImpl } /// We check that the entry does not pass through the boundaries of strings. - if (pos + strstr_pattern.size() < begin + offsets[i]) + if (pos + required_substring.size() < begin + offsets[i]) { /// And if it does not, if necessary, we check the regexp. @@ -342,7 +342,7 @@ struct MatchImpl const UInt8 * next_pos = begin; /// If required substring is larger than string size - it cannot be found. - if (strstr_pattern.size() <= n) + if (required_substring.size() <= n) { Searcher searcher(required_substring.data(), required_substring.size(), end - pos); @@ -358,7 +358,7 @@ struct MatchImpl } next_pos += n; - if (pos + strstr_pattern.size() <= next_pos) + if (pos + required_substring.size() <= next_pos) { /// And if it does not, if necessary, we check the regexp. diff --git a/tests/queries/0_stateless/02045_like_function.reference b/tests/queries/0_stateless/02045_like_function.reference new file mode 100644 index 000000000000..0633853274a0 --- /dev/null +++ b/tests/queries/0_stateless/02045_like_function.reference @@ -0,0 +1,2 @@ +1 +1 1 1 1 1 1 diff --git a/tests/queries/0_stateless/02045_like_function.sql b/tests/queries/0_stateless/02045_like_function.sql new file mode 100644 index 000000000000..d395e8d4579e --- /dev/null +++ b/tests/queries/0_stateless/02045_like_function.sql @@ -0,0 +1,10 @@ +SELECT 'r\\a1bbb' LIKE '%r\\\\a1%bbb%' AS res; + +WITH lower('\RealVNC\WinVNC4 /v password') as CommandLine +SELECT + CommandLine LIKE '%\\\\realvnc\\\\winvnc4%password%' as t1, + CommandLine LIKE '%\\\\realvnc\\\\winvnc4 %password%' as t2, + CommandLine LIKE '%\\\\realvnc\\\\winvnc4%password' as t3, + CommandLine LIKE '%\\\\realvnc\\\\winvnc4 %password' as t4, + CommandLine LIKE '%realvnc%winvnc4%password%' as t5, + CommandLine LIKE '%\\\\winvnc4%password%' as t6; From 9430059e206c9840fc38d13d434721054770ff3e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 25 Oct 2021 22:17:59 +0300 Subject: [PATCH 143/472] Backport #30494 to 21.9: Fix reading from empty file on encrypted disk --- src/Disks/DiskEncrypted.cpp | 8 + src/Disks/tests/gtest_disk_encrypted.cpp | 292 +++++++++++++++++++++++ src/IO/ReadBufferFromFileDecorator.cpp | 10 +- src/IO/ReadBufferFromFileDecorator.h | 2 + 4 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 src/Disks/tests/gtest_disk_encrypted.cpp diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 9980dc0d8dc0..9f4f2b458f12 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -246,6 +248,12 @@ std::unique_ptr DiskEncrypted::readFile( { auto wrapped_path = wrappedPath(path); auto buffer = delegate->readFile(wrapped_path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + if (buffer->eof()) + { + /// File is empty, that's a normal case, see DiskEncrypted::truncateFile(). + /// There is no header so we just return `ReadBufferFromString("")`. + return std::make_unique(std::make_unique(std::string_view{}), wrapped_path); + } auto settings = current_settings.get(); FileEncryption::Header header = readHeader(*buffer); String key = getKey(path, header, *settings); diff --git a/src/Disks/tests/gtest_disk_encrypted.cpp b/src/Disks/tests/gtest_disk_encrypted.cpp new file mode 100644 index 000000000000..e401df4c72b7 --- /dev/null +++ b/src/Disks/tests/gtest_disk_encrypted.cpp @@ -0,0 +1,292 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + +constexpr auto kHeaderSize = FileEncryption::Header::kSize; + + +class DiskEncryptedTest : public ::testing::Test +{ +protected: + void SetUp() override + { + /// Make local disk. + temp_dir = std::make_unique(); + temp_dir->createDirectories(); + local_disk = std::make_shared("local_disk", getDirectory(), 0); + } + + void TearDown() override + { + encrypted_disk.reset(); + local_disk.reset(); + } + + void makeEncryptedDisk(FileEncryption::Algorithm algorithm, const String & key, const String & path = "") + { + auto settings = std::make_unique(); + settings->wrapped_disk = local_disk; + settings->current_algorithm = algorithm; + settings->keys[0] = key; + settings->current_key_id = 0; + settings->disk_path = path; + encrypted_disk = std::make_shared("encrypted_disk", std::move(settings)); + } + + String getFileNames() + { + Strings file_names; + encrypted_disk->listFiles("", file_names); + return boost::algorithm::join(file_names, ", "); + } + + String getDirectory() + { + return temp_dir->path() + "/"; + } + + String getFileContents(const String & file_name) + { + auto buf = encrypted_disk->readFile(file_name, {}, 0); + String str; + readStringUntilEOF(str, *buf); + return str; + } + + static String getBinaryRepresentation(const String & abs_path) + { + auto buf = createReadBufferFromFileBase(abs_path, {}, 0); + String str; + readStringUntilEOF(str, *buf); + return str; + } + + static void checkBinaryRepresentation(const String & abs_path, size_t size) + { + String str = getBinaryRepresentation(abs_path); + EXPECT_EQ(str.size(), size); + if (str.size() >= 3) + { + EXPECT_EQ(str.substr(0, 3), "ENC"); + } + } + + std::unique_ptr temp_dir; + std::shared_ptr local_disk; + std::shared_ptr encrypted_disk; +}; + + +TEST_F(DiskEncryptedTest, WriteAndRead) +{ + makeEncryptedDisk(FileEncryption::Algorithm::AES_128_CTR, "1234567890123456"); + + /// No files + EXPECT_EQ(getFileNames(), ""); + + /// Write a file. + { + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + writeString(StringRef{"Some text"}, *buf); + } + + /// Now we have one file. + EXPECT_EQ(getFileNames(), "a.txt"); + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 9); + + /// Read the file. + EXPECT_EQ(getFileContents("a.txt"), "Some text"); + checkBinaryRepresentation(getDirectory() + "a.txt", kHeaderSize + 9); + + /// Remove the file. + encrypted_disk->removeFile("a.txt"); + + /// No files again. + EXPECT_EQ(getFileNames(), ""); +} + + +TEST_F(DiskEncryptedTest, Append) +{ + makeEncryptedDisk(FileEncryption::Algorithm::AES_128_CTR, "1234567890123456"); + + /// Write a file (we use the append mode). + { + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + writeString(StringRef{"Some text"}, *buf); + } + + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 9); + EXPECT_EQ(getFileContents("a.txt"), "Some text"); + checkBinaryRepresentation(getDirectory() + "a.txt", kHeaderSize + 9); + + /// Append the file. + { + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + writeString(StringRef{" Another text"}, *buf); + } + + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 22); + EXPECT_EQ(getFileContents("a.txt"), "Some text Another text"); + checkBinaryRepresentation(getDirectory() + "a.txt", kHeaderSize + 22); +} + + +TEST_F(DiskEncryptedTest, Truncate) +{ + makeEncryptedDisk(FileEncryption::Algorithm::AES_128_CTR, "1234567890123456"); + + /// Write a file (we use the append mode). + { + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + writeString(StringRef{"Some text"}, *buf); + } + + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 9); + EXPECT_EQ(getFileContents("a.txt"), "Some text"); + checkBinaryRepresentation(getDirectory() + "a.txt", kHeaderSize + 9); + + /// Truncate the file. + encrypted_disk->truncateFile("a.txt", 4); + + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 4); + EXPECT_EQ(getFileContents("a.txt"), "Some"); + checkBinaryRepresentation(getDirectory() + "a.txt", kHeaderSize + 4); + + /// Truncate the file to zero size. + encrypted_disk->truncateFile("a.txt", 0); + + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); + EXPECT_EQ(getFileContents("a.txt"), ""); + checkBinaryRepresentation(getDirectory() + "a.txt", 0); +} + + +TEST_F(DiskEncryptedTest, ZeroFileSize) +{ + makeEncryptedDisk(FileEncryption::Algorithm::AES_128_CTR, "1234567890123456"); + + /// Write nothing to a file. + { + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + } + + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); + EXPECT_EQ(getFileContents("a.txt"), ""); + checkBinaryRepresentation(getDirectory() + "a.txt", 0); + + /// Append the file with nothing. + { + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + } + + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); + EXPECT_EQ(getFileContents("a.txt"), ""); + checkBinaryRepresentation(getDirectory() + "a.txt", 0); + + /// Truncate the file to zero size. + encrypted_disk->truncateFile("a.txt", 0); + + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); + EXPECT_EQ(getFileContents("a.txt"), ""); + checkBinaryRepresentation(getDirectory() + "a.txt", 0); +} + + +TEST_F(DiskEncryptedTest, AnotherFolder) +{ + /// Encrypted disk will store its files at the path "folder1/folder2/". + local_disk->createDirectories("folder1/folder2"); + makeEncryptedDisk(FileEncryption::Algorithm::AES_128_CTR, "1234567890123456", "folder1/folder2/"); + + /// Write a file. + { + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + writeString(StringRef{"Some text"}, *buf); + } + + /// Now we have one file. + EXPECT_EQ(getFileNames(), "a.txt"); + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 9); + + /// Read the file. + EXPECT_EQ(getFileContents("a.txt"), "Some text"); + checkBinaryRepresentation(getDirectory() + "folder1/folder2/a.txt", kHeaderSize + 9); +} + + +TEST_F(DiskEncryptedTest, RandomIV) +{ + makeEncryptedDisk(FileEncryption::Algorithm::AES_128_CTR, "1234567890123456"); + + /// Write two files with the same contents. + { + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + writeString(StringRef{"Some text"}, *buf); + } + { + auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + writeString(StringRef{"Some text"}, *buf); + } + + /// Now we have two files. + EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 9); + EXPECT_EQ(encrypted_disk->getFileSize("b.txt"), 9); + + /// Read the files. + EXPECT_EQ(getFileContents("a.txt"), "Some text"); + EXPECT_EQ(getFileContents("b.txt"), "Some text"); + checkBinaryRepresentation(getDirectory() + "a.txt", kHeaderSize + 9); + checkBinaryRepresentation(getDirectory() + "b.txt", kHeaderSize + 9); + + String bina = getBinaryRepresentation(getDirectory() + "a.txt"); + String binb = getBinaryRepresentation(getDirectory() + "b.txt"); + constexpr size_t iv_offset = 16; + constexpr size_t iv_size = FileEncryption::InitVector::kSize; + EXPECT_EQ(bina.substr(0, iv_offset), binb.substr(0, iv_offset)); /// Part of the header before IV is the same. + EXPECT_NE(bina.substr(iv_offset, iv_size), binb.substr(iv_offset, iv_size)); /// IV differs. + EXPECT_EQ(bina.substr(iv_offset + iv_size, kHeaderSize - iv_offset - iv_size), + binb.substr(iv_offset + iv_size, kHeaderSize - iv_offset - iv_size)); /// Part of the header after IV is the same. + EXPECT_NE(bina.substr(kHeaderSize), binb.substr(kHeaderSize)); /// Encrypted data differs. +} + + +#if 0 +/// TODO: Try to change DiskEncrypted::writeFile() to fix this test. +/// It fails sometimes with quite an unexpected error: +/// libc++abi: terminating with uncaught exception of type std::__1::__fs::filesystem::filesystem_error: +/// filesystem error: in file_size: No such file or directory [/tmp/tmp14608aaaaaa/a.txt] +/// Aborted (core dumped) +/// It happens because for encrypted disks file appending is not atomic (see DiskEncrypted::writeFile()) +/// and a file could be removed after checking its existence but before getting its size. +TEST_F(DiskEncryptedTest, RemoveFileDuringWriting) +{ + makeEncryptedDisk(FileEncryption::Algorithm::AES_128_CTR, "1234567890123456"); + + size_t n = 100000; + std::thread t1{[&] + { + for (size_t i = 0; i != n; ++i) + encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + }}; + + std::thread t2{[&] + { + for (size_t i = 0; i != n; ++i) + encrypted_disk->removeFileIfExists("a.txt"); + }}; + + t1.join(); + t2.join(); +} +#endif diff --git a/src/IO/ReadBufferFromFileDecorator.cpp b/src/IO/ReadBufferFromFileDecorator.cpp index 5810eccbac72..f4a996fc278c 100644 --- a/src/IO/ReadBufferFromFileDecorator.cpp +++ b/src/IO/ReadBufferFromFileDecorator.cpp @@ -5,7 +5,13 @@ namespace DB { ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr impl_) - : impl(std::move(impl_)) + : ReadBufferFromFileDecorator(std::move(impl_), "") +{ +} + + +ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr impl_, const String & file_name_) + : impl(std::move(impl_)), file_name(file_name_) { swap(*impl); } @@ -13,6 +19,8 @@ ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr(impl.get())) return buffer->getFileName(); return std::string(); diff --git a/src/IO/ReadBufferFromFileDecorator.h b/src/IO/ReadBufferFromFileDecorator.h index 1122e02bb206..c83ec669203a 100644 --- a/src/IO/ReadBufferFromFileDecorator.h +++ b/src/IO/ReadBufferFromFileDecorator.h @@ -11,6 +11,7 @@ class ReadBufferFromFileDecorator : public ReadBufferFromFileBase { public: explicit ReadBufferFromFileDecorator(std::unique_ptr impl_); + ReadBufferFromFileDecorator(std::unique_ptr impl_, const String & file_name_); std::string getFileName() const override; @@ -22,6 +23,7 @@ class ReadBufferFromFileDecorator : public ReadBufferFromFileBase protected: std::unique_ptr impl; + String file_name; }; } From f7fc48434761774f344771d1727d82c5614f088e Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 26 Oct 2021 11:45:14 +0300 Subject: [PATCH 144/472] Fix compilation. --- src/Disks/tests/gtest_disk_encrypted.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/tests/gtest_disk_encrypted.cpp b/src/Disks/tests/gtest_disk_encrypted.cpp index e401df4c72b7..53c571769f19 100644 --- a/src/Disks/tests/gtest_disk_encrypted.cpp +++ b/src/Disks/tests/gtest_disk_encrypted.cpp @@ -57,7 +57,7 @@ class DiskEncryptedTest : public ::testing::Test String getFileContents(const String & file_name) { - auto buf = encrypted_disk->readFile(file_name, {}, 0); + auto buf = encrypted_disk->readFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, 0, 0, 0, nullptr); String str; readStringUntilEOF(str, *buf); return str; @@ -65,7 +65,7 @@ class DiskEncryptedTest : public ::testing::Test static String getBinaryRepresentation(const String & abs_path) { - auto buf = createReadBufferFromFileBase(abs_path, {}, 0); + auto buf = createReadBufferFromFileBase(abs_path, 0, 0, 0, nullptr); String str; readStringUntilEOF(str, *buf); return str; From 27919de9666be4031e7938340ad86709deec091d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 26 Oct 2021 20:23:24 +0300 Subject: [PATCH 145/472] Backport #30668 to 21.9: Fix PREWHERE with WHERE in case of always true PREWHERE --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 13 +++++++++---- .../02021_prewhere_always_true_where.reference | 1 + .../02021_prewhere_always_true_where.sql | 5 +++++ 3 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02021_prewhere_always_true_where.reference create mode 100644 tests/queries/0_stateless/02021_prewhere_always_true_where.sql diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 7517908feb35..2b46fa531277 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1044,10 +1044,15 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r /// Filter in WHERE instead else { - auto type = getSampleBlock().getByName(prewhere_info->prewhere_column_name).type; - ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared(), ""); - result.columns[prewhere_column_pos] = castColumn(col, type); - result.clearFilter(); // Acting as a flag to not filter in PREWHERE + if (prewhere_info->remove_prewhere_column) + result.columns.erase(result.columns.begin() + prewhere_column_pos); + else + { + auto type = getSampleBlock().getByName(prewhere_info->prewhere_column_name).type; + ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared(), ""); + result.columns[prewhere_column_pos] = castColumn(col, type); + result.clearFilter(); // Acting as a flag to not filter in PREWHERE + } } } diff --git a/tests/queries/0_stateless/02021_prewhere_always_true_where.reference b/tests/queries/0_stateless/02021_prewhere_always_true_where.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/02021_prewhere_always_true_where.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02021_prewhere_always_true_where.sql b/tests/queries/0_stateless/02021_prewhere_always_true_where.sql new file mode 100644 index 000000000000..95dcb6a15c24 --- /dev/null +++ b/tests/queries/0_stateless/02021_prewhere_always_true_where.sql @@ -0,0 +1,5 @@ +drop table if exists data_02021; +create table data_02021 (key Int) engine=MergeTree() order by key; +insert into data_02021 values (1); +select count() from data_02021 prewhere 1 or ignore(key) where ignore(key)=0; +drop table data_02021; From 67dc2730d1fafbdb06cc7b8af3fe302b6e683ba0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 26 Oct 2021 20:24:15 +0300 Subject: [PATCH 146/472] Backport #30667 to 21.9: CompiledExpressionCache limit elements size --- programs/server/Server.cpp | 7 ++++++- programs/server/config.xml | 5 ++++- programs/server/config.yaml.example | 5 ++++- src/Common/LRUCache.h | 17 ++++++++++------- .../JIT/CompiledExpressionCache.cpp | 4 ++-- src/Interpreters/JIT/CompiledExpressionCache.h | 2 +- 6 files changed, 27 insertions(+), 13 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 9dd51b596664..dc0aa8930714 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -946,9 +946,14 @@ if (ThreadFuzzer::instance().isEffective()) global_context->setMMappedFileCache(mmap_cache_size); #if USE_EMBEDDED_COMPILER + /// 128 MB constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); - CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size); + + constexpr size_t compiled_expression_cache_elements_size_default = 10000; + size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default); + + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); #endif /// Set path for format schema files diff --git a/programs/server/config.xml b/programs/server/config.xml index f0a89a34d24b..3640226a4d4d 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -329,9 +329,12 @@ --> 1000 - + 134217728 + + 10000 + /var/lib/clickhouse/ diff --git a/programs/server/config.yaml.example b/programs/server/config.yaml.example index ae4eac49a641..455b4713e007 100644 --- a/programs/server/config.yaml.example +++ b/programs/server/config.yaml.example @@ -279,9 +279,12 @@ mark_cache_size: 5368709120 # also it can be dropped manually by the SYSTEM DROP MMAP CACHE query. mmap_cache_size: 1000 -# Cache size for compiled expressions. +# Cache size in bytes for compiled expressions. compiled_expression_cache_size: 134217728 +# Cache size in elements for compiled expressions. +compiled_expression_cache_elements_size: 10000 + # Path to data directory, with trailing slash. path: /var/lib/clickhouse/ diff --git a/src/Common/LRUCache.h b/src/Common/LRUCache.h index 10533a154d10..fedbcb8e4478 100644 --- a/src/Common/LRUCache.h +++ b/src/Common/LRUCache.h @@ -36,12 +36,13 @@ class LRUCache using Mapped = TMapped; using MappedPtr = std::shared_ptr; -private: - using Clock = std::chrono::steady_clock; - -public: - LRUCache(size_t max_size_) - : max_size(std::max(static_cast(1), max_size_)) {} + /** Initialize LRUCache with max_size and max_elements_size. + * max_elements_size == 0 means no elements size restrictions. + */ + LRUCache(size_t max_size_, size_t max_elements_size_ = 0) + : max_size(std::max(static_cast(1), max_size_)) + , max_elements_size(max_elements_size_) + {} MappedPtr get(const Key & key) { @@ -252,6 +253,7 @@ class LRUCache /// Total weight of values. size_t current_size = 0; const size_t max_size; + const size_t max_elements_size; std::atomic hits {0}; std::atomic misses {0}; @@ -311,7 +313,8 @@ class LRUCache { size_t current_weight_lost = 0; size_t queue_size = cells.size(); - while ((current_size > max_size) && (queue_size > 1)) + + while ((current_size > max_size || (max_elements_size != 0 && queue_size > max_elements_size)) && (queue_size > 1)) { const Key & key = queue.front(); diff --git a/src/Interpreters/JIT/CompiledExpressionCache.cpp b/src/Interpreters/JIT/CompiledExpressionCache.cpp index 98f4eec982d5..674e02236f58 100644 --- a/src/Interpreters/JIT/CompiledExpressionCache.cpp +++ b/src/Interpreters/JIT/CompiledExpressionCache.cpp @@ -16,12 +16,12 @@ CompiledExpressionCacheFactory & CompiledExpressionCacheFactory::instance() return factory; } -void CompiledExpressionCacheFactory::init(size_t cache_size) +void CompiledExpressionCacheFactory::init(size_t cache_size_in_bytes, size_t cache_size_in_elements) { if (cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "CompiledExpressionCache was already initialized"); - cache = std::make_unique(cache_size); + cache = std::make_unique(cache_size_in_bytes, cache_size_in_elements); } CompiledExpressionCache * CompiledExpressionCacheFactory::tryGetCache() diff --git a/src/Interpreters/JIT/CompiledExpressionCache.h b/src/Interpreters/JIT/CompiledExpressionCache.h index 5182a77d77a8..7d20627d5d26 100644 --- a/src/Interpreters/JIT/CompiledExpressionCache.h +++ b/src/Interpreters/JIT/CompiledExpressionCache.h @@ -52,7 +52,7 @@ class CompiledExpressionCacheFactory public: static CompiledExpressionCacheFactory & instance(); - void init(size_t cache_size); + void init(size_t cache_size_in_bytes, size_t cache_size_in_elements); CompiledExpressionCache * tryGetCache(); }; From a42edc9da212782a9e42719d81ce656edea2b519 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 27 Oct 2021 16:31:55 +0300 Subject: [PATCH 147/472] Backport #30663 to 21.9: Fix case-insensetive search in UTF8 strings --- src/Common/StringSearcher.h | 133 ++++++------------ ...163_search_case_insensetive_utf8.reference | 12 ++ .../01163_search_case_insensetive_utf8.sql | 12 ++ 3 files changed, 66 insertions(+), 91 deletions(-) create mode 100644 tests/queries/0_stateless/01163_search_case_insensetive_utf8.reference create mode 100644 tests/queries/0_stateless/01163_search_case_insensetive_utf8.sql diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index 09011d910eae..706e2d56235d 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -116,9 +116,9 @@ class StringSearcher : private StringSearcherBase /// lower and uppercase variants of the first octet of the first character in `needle` size_t length_l = UTF8::convertCodePointToUTF8(first_l_u32, l_seq, sizeof(l_seq)); - size_t length_r = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq)); + size_t length_u = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq)); - if (length_l != length_r) + if (length_l != length_u) throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER}; } @@ -183,6 +183,31 @@ class StringSearcher : private StringSearcherBase #endif } + template > + ALWAYS_INLINE bool compareTrivial(const CharT * haystack_pos, const CharT * const haystack_end, const uint8_t * needle_pos) const + { + while (haystack_pos < haystack_end && needle_pos < needle_end) + { + auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos); + auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos); + + /// Invalid UTF-8, should not compare equals + if (!haystack_code_point || !needle_code_point) + break; + + /// Not equals case insensitive. + if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point)) + break; + + /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true) + const auto len = UTF8::seqLength(*haystack_pos); + haystack_pos += len; + needle_pos += len; + } + + return needle_pos == needle_end; + } + template > ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * haystack_end, const CharT * pos) const { @@ -200,34 +225,15 @@ class StringSearcher : private StringSearcherBase { if (mask == cachemask) { - pos += cache_valid_len; - auto needle_pos = needle + cache_valid_len; - - while (needle_pos < needle_end) - { - auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos); - auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos); - - /// Invalid UTF-8, should not compare equals - if (!haystack_code_point || !needle_code_point) - break; - - /// Not equals case insensitive. - if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point)) - break; - - /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true) - const auto len = UTF8::seqLength(*pos); - pos += len; - needle_pos += len; - } - - if (needle_pos == needle_end) + if (compareTrivial(pos, haystack_end, needle)) return true; } } else if ((mask & cachemask) == cachemask) - return true; + { + if (compareTrivial(pos, haystack_end, needle)) + return true; + } return false; } @@ -238,25 +244,7 @@ class StringSearcher : private StringSearcherBase pos += first_needle_symbol_is_ascii; auto needle_pos = needle + first_needle_symbol_is_ascii; - while (needle_pos < needle_end) - { - auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos); - auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos); - - /// Invalid UTF-8, should not compare equals - if (!haystack_code_point || !needle_code_point) - break; - - /// Not equals case insensitive. - if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point)) - break; - - const auto len = UTF8::seqLength(*pos); - pos += len; - needle_pos += len; - } - - if (needle_pos == needle_end) + if (compareTrivial(pos, haystack_end, needle_pos)) return true; } @@ -299,40 +287,21 @@ class StringSearcher : private StringSearcherBase const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel); const auto v_against_u_offset = _mm_cmpeq_epi8(v_haystack_offset, cacheu); const auto v_against_l_or_u_offset = _mm_or_si128(v_against_l_offset, v_against_u_offset); - const auto mask_offset = _mm_movemask_epi8(v_against_l_or_u_offset); + const auto mask_offset_both = _mm_movemask_epi8(v_against_l_or_u_offset); if (0xffff == cachemask) { - if (mask_offset == cachemask) + if (mask_offset_both == cachemask) { - auto haystack_pos = haystack + cache_valid_len; - auto needle_pos = needle + cache_valid_len; - - while (haystack_pos < haystack_end && needle_pos < needle_end) - { - auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos); - auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos); - - /// Invalid UTF-8, should not compare equals - if (!haystack_code_point || !needle_code_point) - break; - - /// Not equals case insensitive. - if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point)) - break; - - /// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true) - const auto len = UTF8::seqLength(*haystack_pos); - haystack_pos += len; - needle_pos += len; - } - - if (needle_pos == needle_end) + if (compareTrivial(haystack, haystack_end, needle)) return haystack; } } - else if ((mask_offset & cachemask) == cachemask) - return haystack; + else if ((mask_offset_both & cachemask) == cachemask) + { + if (compareTrivial(haystack, haystack_end, needle)) + return haystack; + } /// first octet was ok, but not the first 16, move to start of next sequence and reapply haystack += UTF8::seqLength(*haystack); @@ -349,25 +318,7 @@ class StringSearcher : private StringSearcherBase auto haystack_pos = haystack + first_needle_symbol_is_ascii; auto needle_pos = needle + first_needle_symbol_is_ascii; - while (haystack_pos < haystack_end && needle_pos < needle_end) - { - auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos); - auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos); - - /// Invalid UTF-8, should not compare equals - if (!haystack_code_point || !needle_code_point) - break; - - /// Not equals case insensitive. - if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point)) - break; - - const auto len = UTF8::seqLength(*haystack_pos); - haystack_pos += len; - needle_pos += len; - } - - if (needle_pos == needle_end) + if (compareTrivial(haystack_pos, haystack_end, needle_pos)) return haystack; } diff --git a/tests/queries/0_stateless/01163_search_case_insensetive_utf8.reference b/tests/queries/0_stateless/01163_search_case_insensetive_utf8.reference new file mode 100644 index 000000000000..66f4ca4a5a83 --- /dev/null +++ b/tests/queries/0_stateless/01163_search_case_insensetive_utf8.reference @@ -0,0 +1,12 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/01163_search_case_insensetive_utf8.sql b/tests/queries/0_stateless/01163_search_case_insensetive_utf8.sql new file mode 100644 index 000000000000..99bdd38ceae4 --- /dev/null +++ b/tests/queries/0_stateless/01163_search_case_insensetive_utf8.sql @@ -0,0 +1,12 @@ +SELECT positionCaseInsensitiveUTF8(materialize('сссссс'), 'Ё'); +SELECT countSubstringsCaseInsensitiveUTF8(materialize('сссссс'), 'ё'); +SELECT positionCaseInsensitiveUTF8(materialize('сссссссс'), 'ё'); +SELECT countSubstringsCaseInsensitiveUTF8(materialize('сссссссс'), 'Ё'); +SELECT countSubstringsCaseInsensitiveUTF8(materialize('ссссссссссссссссссс'), 'ёёёёёёё'); +SELECT positionCaseInsensitiveUTF8(materialize('ссссссссссссссссссс'), 'ёЁёЁёЁё'); +SELECT countSubstringsCaseInsensitiveUTF8(materialize('ссссссссссссссссссс'), 'ёЁёЁёЁёЁёЁ'); +SELECT positionCaseInsensitiveUTF8(materialize('ссссссссссссссссссс'), 'ЁЁЁЁЁЁЁЁЁЁ'); +SELECT countSubstringsCaseInsensitiveUTF8(materialize('ссссссссссссссссссс'), 'ёЁёЁёЁёссс'); +SELECT positionCaseInsensitiveUTF8(materialize('ссссссссссссссссссс'), 'ёЁёЁёЁёссс'); +SELECT countSubstringsCaseInsensitiveUTF8(materialize('ссссссссссссссссссс'), 'ЁС'); +SELECT positionCaseInsensitiveUTF8(materialize('ссссссссссссссссссс'), 'ёс'); From 3b88d93bf8297fb8bfede85c864d55bf2f94d08e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 27 Oct 2021 20:34:31 +0300 Subject: [PATCH 148/472] Backport #29328 to 21.9: Fix hanging DDL queries on Replicated database --- src/Databases/DatabaseReplicated.cpp | 43 +++++++++++++++++++++++++--- src/Databases/DatabaseReplicated.h | 1 + 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 8e8fb4e2d6df..1889307fcf95 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -298,10 +298,30 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt /// Write host name to replica_path, it will protect from multiple replicas with the same name auto host_id = getHostID(getContext(), db_uuid); - Coordination::Requests ops; - ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", "0", zkutil::CreateMode::Persistent)); - current_zookeeper->multi(ops); + for (int attempts = 10; attempts > 0; --attempts) + { + Coordination::Stat stat; + String max_log_ptr_str = current_zookeeper->get(zookeeper_path + "/max_log_ptr", &stat); + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", "0", zkutil::CreateMode::Persistent)); + /// In addition to creating the replica nodes, we record the max_log_ptr at the instant where + /// we declared ourself as an existing replica. We'll need this during recoverLostReplica to + /// notify other nodes that issued new queries while this node was recovering. + ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/max_log_ptr", stat.version)); + Coordination::Responses responses; + const auto code = current_zookeeper->tryMulti(ops, responses); + if (code == Coordination::Error::ZOK) + { + max_log_ptr_at_creation = parse(max_log_ptr_str); + break; + } + else if (code == Coordination::Error::ZNODEEXISTS || attempts == 1) + { + /// If its our last attempt, or if the replica already exists, fail immediately. + zkutil::KeeperMultiException::check(code, ops, responses); + } + } createEmptyLogEntry(current_zookeeper); } @@ -612,6 +632,21 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep InterpreterCreateQuery(query_ast, create_query_context).execute(); } + if (max_log_ptr_at_creation != 0) + { + /// If the replica is new and some of the queries applied during recovery + /// where issued after the replica was created, then other nodes might be + /// waiting for this node to notify them that the query was applied. + for (UInt32 ptr = max_log_ptr_at_creation; ptr <= max_log_ptr; ++ptr) + { + auto entry_name = DDLTaskBase::getLogEntryName(ptr); + auto path = fs::path(zookeeper_path) / "log" / entry_name / "finished" / getFullReplicaName(); + auto status = ExecutionStatus(0).serializeText(); + auto res = current_zookeeper->tryCreate(path, status, zkutil::CreateMode::Persistent); + if (res == Coordination::Error::ZOK) + LOG_INFO(log, "Marked recovered {} as finished", entry_name); + } + } current_zookeeper->set(replica_path + "/log_ptr", toString(max_log_ptr)); } diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 41b1bf13e5f2..35cb2600f723 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -90,6 +90,7 @@ class DatabaseReplicated : public DatabaseAtomic std::atomic_bool is_readonly = true; std::unique_ptr ddl_worker; + UInt32 max_log_ptr_at_creation = 0; mutable ClusterPtr cluster; }; From 4ef7d2ee2d47ae8dfabdb0ca5400034f594c9086 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 27 Oct 2021 22:32:46 +0300 Subject: [PATCH 149/472] Backport #30717 to 21.9: Fix 00975_move_partition_merge_tree --- src/Storages/StorageMergeTree.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index b097c996107b..eea05ddf2a9a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1249,6 +1249,7 @@ bool StorageMergeTree::optimize( ActionLock StorageMergeTree::stopMergesAndWait() { + /// TODO allow to stop merges in specific partition only (like it's done in ReplicatedMergeTree) std::unique_lock lock(currently_processing_in_background_mutex); /// Asks to complete merges and does not allow them to start. @@ -1402,6 +1403,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con { auto lock1 = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto lock2 = source_table->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); + auto merges_blocker = stopMergesAndWait(); auto source_metadata_snapshot = source_table->getInMemoryMetadataPtr(); auto my_metadata_snapshot = getInMemoryMetadataPtr(); @@ -1475,6 +1477,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const { auto lock1 = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto lock2 = dest_table->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); + auto merges_blocker = stopMergesAndWait(); auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) From 389151ee4607ba3d025913b8774ff1d6cfecf920 Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Thu, 28 Oct 2021 11:44:04 +0800 Subject: [PATCH 150/472] add a local_engine demo --- utils/CMakeLists.txt | 1 + utils/local-engine/CMakeLists.txt | 19 +++ utils/local-engine/local_engine.cpp | 196 ++++++++++++++++++++++++++++ utils/local-engine/table.csv | 5 + utils/local-engine/table.json | 20 +++ 5 files changed, 241 insertions(+) create mode 100644 utils/local-engine/CMakeLists.txt create mode 100644 utils/local-engine/local_engine.cpp create mode 100644 utils/local-engine/table.csv create mode 100644 utils/local-engine/table.json diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index a6bf2843e9a7..519509914d86 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -16,6 +16,7 @@ add_subdirectory (report) # Not used in package if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (compressor) + add_subdirectory (local-engine) add_subdirectory (iotest) add_subdirectory (corrector_utf8) add_subdirectory (zookeeper-cli) diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt new file mode 100644 index 000000000000..3b60786ffefd --- /dev/null +++ b/utils/local-engine/CMakeLists.txt @@ -0,0 +1,19 @@ +set(RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include") +function(add_cxx_compile_options option) + add_compile_options("$<$,CXX>:${option}>") +endfunction() +add_cxx_compile_options(-Wzero-as-null-pointer-constant) +add_executable (local_engine local_engine.cpp) +target_include_directories(local_engine PRIVATE ${RAPIDJSON_INCLUDE_DIR}) + +set (CLICKHOUSE_SERVER_LINK + PRIVATE + dbms + clickhouse_aggregate_functions + clickhouse_common_io + clickhouse_functions + clickhouse_storages_system + ) + +target_link_libraries(local_engine ${CLICKHOUSE_SERVER_LINK}) + diff --git a/utils/local-engine/local_engine.cpp b/utils/local-engine/local_engine.cpp new file mode 100644 index 000000000000..b782975a3450 --- /dev/null +++ b/utils/local-engine/local_engine.cpp @@ -0,0 +1,196 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +using namespace DB; +using namespace rapidjson; + +/** + * SQL example: + * SELECT min(x1),max(x2),sum(x3),count(x4),avg(x5) FROM table1 WHERE x6=* GROUP BY x7 + * + * table defination + * SQL columns: + * project + * filter + * aggregate + */ +Block getTableHeader(std::map & cols) +{ + auto internalCols = std::make_shared>(); + internalCols->reserve(cols.size()); + for (const auto & [key, value] : cols) + { + ColumnWithTypeAndName col; + auto & data_type_factory = DataTypeFactory::instance(); + auto type = data_type_factory.get(value); + internalCols->push_back(ColumnWithTypeAndName(type->createColumn(), type, key)); + } + return Block(*internalCols); +} + +std::shared_ptr getSource(const std::string &file, Block &header) { +// std::unique_ptr buf = std::make_unique(file); + auto buf = ReadBufferFromFile(file); + FormatSettings settings; + return std::make_shared(header, buf, RowInputFormatParams{.max_block_size=100}, false, settings); +} + + +std::shared_ptr> getColumns(Document & config) +{ + auto columns = std::make_shared>(); + auto cols = config["columns"].GetArray(); + for (auto * it = cols.Begin(); it != cols.End(); it++) + { + auto col = it->GetObject(); + if (columns->contains(col["name"].GetString())) + { + throw std::logic_error("duplicate column"); + } + columns->emplace(col["name"].GetString(), col["type"].GetString()); + } + return columns; +} + +void registerAllFunctions() +{ + registerFunctions(); + registerAggregateFunctions(); +} + +FunctionOverloadResolverPtr getFunction(const std::string & name) +{ + auto shared_context = Context::createShared(); + auto global_context = Context::createGlobal(shared_context.get()); + auto & factory = FunctionFactory::instance(); + return factory.get(name, global_context); +} + +ActionsDAG::NodeRawConstPtrs getArguments(ActionsDAG::NodeRawConstPtrs nodes, std::vector& args) { + ActionsDAG::NodeRawConstPtrs result; + result.reserve(args.size()); + for (const auto &item : nodes) + { + if (std::find(args.begin(), args.end(), item->result_name) != args.end()) { + result.emplace_back(item); + } + } + return result; +} + +NamesAndTypesList blockToNameAndTypeList(Block & header) +{ + NamesAndTypesList types; + for (const auto &name : header.getNames()) + { + auto column = header.findByName(name); + types.push_back(NameAndTypePair(column->name, column->type)); + } + return types; +} + +QueryPlanStepPtr buildFilter(Block & header) +{ + auto actions_dag = std::make_shared(std::move(blockToNameAndTypeList(header))); +// auto int_type = std::make_shared(); +// auto const_node = actions_dag->addInput(ColumnWithTypeAndName(int_type->createColumnConst(1, 4), int_type, "_1")); +// actions_dag->addOrReplaceInIndex(const_node); + std::string empty_string; + std::vector args = {"x1", "x2"}; + const auto & filter_node = actions_dag->addFunction(std::move(getFunction("less")), getArguments(actions_dag->getIndex(), args), std::move(empty_string)); + actions_dag->getIndex().push_back(&filter_node); + DataStream input_stream = DataStream{.header=header}; + auto filter = std::make_unique(input_stream, actions_dag, std::move(filter_node.result_name), true); + return std::move(filter); +} + +int main(int argc, char ** argv) +{ + registerAllFunctions(); + auto & factory = FunctionFactory::instance(); + std::ifstream ifs("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.json"); + IStreamWrapper isw(ifs); + + Document d; + d.ParseStream(isw); + auto cols = getColumns(d); + auto header = getTableHeader(*cols); +// std::for_each(header.getNames().begin(), header.getNames().end(), [](const std::string & name) { +// std::cout << name << std::endl; +// }); + + QueryPlan query_plan; +// auto source = getSource("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.csv", header); + auto x1 = ColumnInt32::create({1,2,3}); + auto x2 = ColumnInt32::create({3,2,1}); + Columns columns; + columns.emplace_back(std::move(x1)); + columns.emplace_back(std::move(x2)); + Chunk chunk(std::move(columns),3); + auto source = std::make_shared(header, std::move(chunk)); + std::shared_ptr query_pipelines = std::make_shared(); + + auto source_step = std::make_unique(Pipe(source), "CSV"); + query_plan.addStep(std::move(source_step)); + + auto filter = buildFilter(header); + query_plan.addStep(std::move(filter)); + QueryPlanOptimizationSettings optimization_settings{.optimize_plan=false}; + auto query_pipline = query_plan.buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); + + auto buffer = WriteBufferFromFile("/Users/neng.liu/Documents/GitHub/ClickHouse/output.txt"); + auto output = std::make_shared(buffer, header, true, RowOutputFormatParams(), FormatSettings()); + query_pipline->setOutputFormat(output); + auto executor = query_pipline->execute(); + executor->execute(1); +// auto chunk = source->generate(); +// std::cout << chunk.getNumRows(); +} + +// auto col = ColumnUInt8::create(1, 1); +// Columns columns; +// columns.emplace_back(std::move(col)); +// Chunk chunk(std::move(columns), 1); +// +// Block header = {ColumnWithTypeAndName(ColumnUInt8::create(), std::make_shared(), "x")}; +// +// auto source = std::make_shared(std::move(header), std::move(chunk)); +// auto sink = std::make_shared(source->getPort().getHeader()); +// +// connect(source->getPort(), sink->getPort()); +// +// Processors processors; +// processors.emplace_back(std::move(source)); +// processors.emplace_back(std::move(sink)); +// +// PipelineExecutor executor(processors); +// executor.execute(1); diff --git a/utils/local-engine/table.csv b/utils/local-engine/table.csv new file mode 100644 index 000000000000..52ada4d5aa3f --- /dev/null +++ b/utils/local-engine/table.csv @@ -0,0 +1,5 @@ +1,1 +2,5 +3,3 +4,4 +5,3 diff --git a/utils/local-engine/table.json b/utils/local-engine/table.json new file mode 100644 index 000000000000..8176efada2b6 --- /dev/null +++ b/utils/local-engine/table.json @@ -0,0 +1,20 @@ +{ + "columns": [ + { + "name": "x1", + "type": "Int" + }, + { + "name": "x2", + "type": "Int" + } + ], + "query": { + "project": { + "function": "min", + "column" : "x1" + }, + "filter": {}, + "aggregate": ["x2","x3","x4"] + } +} \ No newline at end of file From e4591ccca54df0f2246137ff07b897bd3dd3f9dd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 28 Oct 2021 20:41:56 +0300 Subject: [PATCH 151/472] Backport #28502 to 21.9: Fix \"Column is not under aggregate function and not in GROUP BY\" with PREWHERE --- src/Interpreters/ActionsDAG.cpp | 14 +++++++------- src/Interpreters/ActionsDAG.h | 6 +++--- src/Interpreters/ExpressionAnalyzer.cpp | 7 ++++++- .../02021_prewhere_column_optimization.reference | 7 +++++++ .../02021_prewhere_column_optimization.sql | 10 ++++++++++ 5 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/02021_prewhere_column_optimization.reference create mode 100644 tests/queries/0_stateless/02021_prewhere_column_optimization.sql diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 6375210fd7cb..d6523d8b1aa8 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -344,7 +344,7 @@ std::string ActionsDAG::dumpNames() const return out.str(); } -void ActionsDAG::removeUnusedActions(const NameSet & required_names) +void ActionsDAG::removeUnusedActions(const NameSet & required_names, bool allow_remove_inputs, bool allow_constant_folding) { NodeRawConstPtrs required_nodes; required_nodes.reserve(required_names.size()); @@ -368,10 +368,10 @@ void ActionsDAG::removeUnusedActions(const NameSet & required_names) } index.swap(required_nodes); - removeUnusedActions(); + removeUnusedActions(allow_remove_inputs, allow_constant_folding); } -void ActionsDAG::removeUnusedActions(const Names & required_names) +void ActionsDAG::removeUnusedActions(const Names & required_names, bool allow_remove_inputs, bool allow_constant_folding) { NodeRawConstPtrs required_nodes; required_nodes.reserve(required_names.size()); @@ -391,10 +391,10 @@ void ActionsDAG::removeUnusedActions(const Names & required_names) } index.swap(required_nodes); - removeUnusedActions(); + removeUnusedActions(allow_remove_inputs, allow_constant_folding); } -void ActionsDAG::removeUnusedActions(bool allow_remove_inputs) +void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_constant_folding) { std::unordered_set visited_nodes; std::stack stack; @@ -425,9 +425,9 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs) auto * node = stack.top(); stack.pop(); - if (!node->children.empty() && node->column && isColumnConst(*node->column)) + /// Constant folding. + if (allow_constant_folding && !node->children.empty() && node->column && isColumnConst(*node->column)) { - /// Constant folding. node->type = ActionsDAG::ActionType::COLUMN; for (const auto & child : node->children) diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index d10218bc913b..e87faa80c770 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -164,8 +164,8 @@ class ActionsDAG bool isInputProjected() const { return project_input; } bool isOutputProjected() const { return projected_output; } - void removeUnusedActions(const Names & required_names); - void removeUnusedActions(const NameSet & required_names); + void removeUnusedActions(const Names & required_names, bool allow_remove_inputs = true, bool allow_constant_folding = true); + void removeUnusedActions(const NameSet & required_names, bool allow_remove_inputs = true, bool allow_constant_folding = true); NameSet foldActionsByProjection( const NameSet & required_columns, @@ -273,7 +273,7 @@ class ActionsDAG private: Node & addNode(Node node); - void removeUnusedActions(bool allow_remove_inputs = true); + void removeUnusedActions(bool allow_remove_inputs = true, bool allow_constant_folding = true); #if USE_EMBEDDED_COMPILER void compileFunctions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 20b30d9dc813..edc4dcd07de1 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -975,7 +975,12 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( /// Remove unused source_columns from prewhere actions. auto tmp_actions_dag = std::make_shared(sourceColumns()); getRootActions(select_query->prewhere(), only_types, tmp_actions_dag); - tmp_actions_dag->removeUnusedActions(NameSet{prewhere_column_name}); + /// Constants cannot be removed since they can be used in other parts of the query. + /// And if they are not used anywhere, except PREWHERE, they will be removed on the next step. + tmp_actions_dag->removeUnusedActions( + NameSet{prewhere_column_name}, + /* allow_remove_inputs= */ true, + /* allow_constant_folding= */ false); auto required_columns = tmp_actions_dag->getRequiredColumnsNames(); NameSet required_source_columns(required_columns.begin(), required_columns.end()); diff --git a/tests/queries/0_stateless/02021_prewhere_column_optimization.reference b/tests/queries/0_stateless/02021_prewhere_column_optimization.reference new file mode 100644 index 000000000000..7f97e6efd696 --- /dev/null +++ b/tests/queries/0_stateless/02021_prewhere_column_optimization.reference @@ -0,0 +1,7 @@ +-- { echoOn } +select * from data_02021 prewhere 1 or ignore(key); +1 +select * from data_02021 prewhere 1 or ignore(key) where key = 1; +1 +select * from data_02021 prewhere 0 or ignore(key); +select * from data_02021 prewhere 0 or ignore(key) where key = 1; diff --git a/tests/queries/0_stateless/02021_prewhere_column_optimization.sql b/tests/queries/0_stateless/02021_prewhere_column_optimization.sql new file mode 100644 index 000000000000..4fe8b912c3fd --- /dev/null +++ b/tests/queries/0_stateless/02021_prewhere_column_optimization.sql @@ -0,0 +1,10 @@ +drop table if exists data_02021; +create table data_02021 (key Int) engine=MergeTree() order by key; +insert into data_02021 values (1); +-- { echoOn } +select * from data_02021 prewhere 1 or ignore(key); +select * from data_02021 prewhere 1 or ignore(key) where key = 1; +select * from data_02021 prewhere 0 or ignore(key); +select * from data_02021 prewhere 0 or ignore(key) where key = 1; +-- { echoOff } +drop table data_02021; From 0e83437f20a3e2942fa67dcfe58be26dc146f4b3 Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Fri, 29 Oct 2021 10:35:10 +0800 Subject: [PATCH 152/472] add a count aggregate --- utils/local-engine/local_engine.cpp | 101 +++++++++++++++++++++------- utils/local-engine/table.csv | 9 ++- 2 files changed, 80 insertions(+), 30 deletions(-) diff --git a/utils/local-engine/local_engine.cpp b/utils/local-engine/local_engine.cpp index b782975a3450..ee2b1d8f1155 100644 --- a/utils/local-engine/local_engine.cpp +++ b/utils/local-engine/local_engine.cpp @@ -1,4 +1,6 @@ +#include "Poco/Logger.h" #include +#include #include #include #include @@ -12,8 +14,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -57,9 +61,7 @@ Block getTableHeader(std::map & cols) return Block(*internalCols); } -std::shared_ptr getSource(const std::string &file, Block &header) { -// std::unique_ptr buf = std::make_unique(file); - auto buf = ReadBufferFromFile(file); +std::shared_ptr getSource(ReadBuffer & buf, Block &header) { FormatSettings settings; return std::make_shared(header, buf, RowInputFormatParams{.max_block_size=100}, false, settings); } @@ -87,12 +89,17 @@ void registerAllFunctions() registerAggregateFunctions(); } -FunctionOverloadResolverPtr getFunction(const std::string & name) +FunctionOverloadResolverPtr getFunction(const std::string & name, ContextPtr context) { - auto shared_context = Context::createShared(); - auto global_context = Context::createGlobal(shared_context.get()); + auto & factory = FunctionFactory::instance(); - return factory.get(name, global_context); + return factory.get(name, context); +} + +AggregateFunctionPtr getAggregateFunction(const std::string & name, DataTypes arg_types) { + auto & factory = AggregateFunctionFactory::instance(); + AggregateFunctionProperties properties; + return factory.get(name, arg_types, Array{}, properties); } ActionsDAG::NodeRawConstPtrs getArguments(ActionsDAG::NodeRawConstPtrs nodes, std::vector& args) { @@ -118,7 +125,7 @@ NamesAndTypesList blockToNameAndTypeList(Block & header) return types; } -QueryPlanStepPtr buildFilter(Block & header) +QueryPlanStepPtr buildFilter(Block & header, ContextPtr context) { auto actions_dag = std::make_shared(std::move(blockToNameAndTypeList(header))); // auto int_type = std::make_shared(); @@ -126,15 +133,67 @@ QueryPlanStepPtr buildFilter(Block & header) // actions_dag->addOrReplaceInIndex(const_node); std::string empty_string; std::vector args = {"x1", "x2"}; - const auto & filter_node = actions_dag->addFunction(std::move(getFunction("less")), getArguments(actions_dag->getIndex(), args), std::move(empty_string)); + const auto & filter_node = actions_dag->addFunction(std::move(getFunction("less", context)), getArguments(actions_dag->getIndex(), args), std::move(empty_string)); actions_dag->getIndex().push_back(&filter_node); DataStream input_stream = DataStream{.header=header}; auto filter = std::make_unique(input_stream, actions_dag, std::move(filter_node.result_name), true); return std::move(filter); } +void buildAgg(Block & header, QueryPlan& query_plan, ContextPtr context) +{ + auto aggregates = AggregateDescriptions(); + auto count = AggregateDescription(); + count.column_name = "count(x2)"; + count.arguments = ColumnNumbers{1}; + count.argument_names = Names{"x2"}; + auto int_type = std::make_shared(); + count.function = getAggregateFunction("count", {int_type}); + aggregates.push_back(count); + Settings settings; + Aggregator::Params params( + header, + ColumnNumbers{0}, + aggregates, + false, + settings.max_rows_to_group_by, + settings.group_by_overflow_mode, + settings.group_by_two_level_threshold, + settings.group_by_two_level_threshold_bytes, + settings.max_bytes_before_external_group_by, + settings.empty_result_for_aggregation_by_empty_set, + context->getTemporaryVolume(), + settings.max_threads, + settings.min_free_disk_space_for_temporary_data, + settings.compile_aggregate_expressions, + settings.min_count_to_compile_aggregate_expression); + + SortDescription group_by_sort_description; + + auto merge_threads = 1; + auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads + ? static_cast(settings.aggregation_memory_efficient_merge_threads) + : static_cast(settings.max_threads); + + + auto aggregating_step = std::make_unique( + query_plan.getCurrentDataStream(), + params, + true, + settings.max_block_size, + merge_threads, + temporary_data_merge_threads, + false, + nullptr, + std::move(group_by_sort_description)); + + query_plan.addStep(std::move(aggregating_step)); +} + int main(int argc, char ** argv) { + auto shared_context = Context::createShared(); + auto global_context = Context::createGlobal(shared_context.get()); registerAllFunctions(); auto & factory = FunctionFactory::instance(); std::ifstream ifs("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.json"); @@ -144,36 +203,28 @@ int main(int argc, char ** argv) d.ParseStream(isw); auto cols = getColumns(d); auto header = getTableHeader(*cols); -// std::for_each(header.getNames().begin(), header.getNames().end(), [](const std::string & name) { -// std::cout << name << std::endl; -// }); QueryPlan query_plan; -// auto source = getSource("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.csv", header); - auto x1 = ColumnInt32::create({1,2,3}); - auto x2 = ColumnInt32::create({3,2,1}); - Columns columns; - columns.emplace_back(std::move(x1)); - columns.emplace_back(std::move(x2)); - Chunk chunk(std::move(columns),3); - auto source = std::make_shared(header, std::move(chunk)); - std::shared_ptr query_pipelines = std::make_shared(); + auto file = "/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.csv"; + auto buf = std::make_unique(file); + + auto source = getSource(*buf, header); + std::unique_ptr query_pipelines = std::make_unique(); auto source_step = std::make_unique(Pipe(source), "CSV"); query_plan.addStep(std::move(source_step)); - auto filter = buildFilter(header); + auto filter = buildFilter(header, global_context); query_plan.addStep(std::move(filter)); + buildAgg(header, query_plan, global_context); QueryPlanOptimizationSettings optimization_settings{.optimize_plan=false}; auto query_pipline = query_plan.buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); auto buffer = WriteBufferFromFile("/Users/neng.liu/Documents/GitHub/ClickHouse/output.txt"); - auto output = std::make_shared(buffer, header, true, RowOutputFormatParams(), FormatSettings()); + auto output = std::make_shared(buffer, query_pipline->getHeader(), true, RowOutputFormatParams(), FormatSettings()); query_pipline->setOutputFormat(output); auto executor = query_pipline->execute(); executor->execute(1); -// auto chunk = source->generate(); -// std::cout << chunk.getNumRows(); } // auto col = ColumnUInt8::create(1, 1); diff --git a/utils/local-engine/table.csv b/utils/local-engine/table.csv index 52ada4d5aa3f..28c8e1259d28 100644 --- a/utils/local-engine/table.csv +++ b/utils/local-engine/table.csv @@ -1,5 +1,4 @@ -1,1 -2,5 -3,3 -4,4 -5,3 +1,2 +2,3 +4,5 +6,7 From b78e5c6892adbd23415f13b73740e9a1400f75aa Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 29 Oct 2021 14:51:12 +0300 Subject: [PATCH 153/472] Backport #29650 to 21.9: AddDefaultDatabaseVisitor support dictGet --- src/Core/QualifiedTableName.h | 65 ++++++++++++++++++ src/Interpreters/AddDefaultDatabaseVisitor.h | 66 ++++++++++++++----- .../ExternalDictionariesLoader.cpp | 22 +++++++ src/Interpreters/ExternalDictionariesLoader.h | 2 + src/Interpreters/InterpreterAlterQuery.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 8 ++- src/Interpreters/executeDDLQueryOnCluster.cpp | 2 +- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/SelectQueryDescription.cpp | 2 +- src/Storages/StorageMaterializedView.cpp | 1 - ...97_default_dict_get_add_database.reference | 1 + .../02097_default_dict_get_add_database.sql | 44 +++++++++++++ 12 files changed, 194 insertions(+), 23 deletions(-) create mode 100644 tests/queries/0_stateless/02097_default_dict_get_add_database.reference create mode 100644 tests/queries/0_stateless/02097_default_dict_get_add_database.sql diff --git a/src/Core/QualifiedTableName.h b/src/Core/QualifiedTableName.h index 453d55d85c72..ed87dff1f5f0 100644 --- a/src/Core/QualifiedTableName.h +++ b/src/Core/QualifiedTableName.h @@ -2,11 +2,20 @@ #include #include +#include +#include #include +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + //TODO replace with StorageID struct QualifiedTableName { @@ -30,6 +39,62 @@ struct QualifiedTableName hash_state.update(table.data(), table.size()); return hash_state.get64(); } + + std::vector getParts() const + { + if (database.empty()) + return {table}; + else + return {database, table}; + } + + std::string getFullName() const + { + if (database.empty()) + return table; + else + return database + '.' + table; + } + + /// NOTE: It's different from compound identifier parsing and does not support escaping and dots in name. + /// Usually it's better to use ParserIdentifier instead, + /// but we parse DDL dictionary name (and similar things) this way for historical reasons. + static std::optional tryParseFromString(const String & maybe_qualified_name) + { + if (maybe_qualified_name.empty()) + return {}; + + /// Do not allow dot at the beginning and at the end + auto pos = maybe_qualified_name.find('.'); + if (pos == 0 || pos == (maybe_qualified_name.size() - 1)) + return {}; + + QualifiedTableName name; + if (pos == std::string::npos) + { + name.table = std::move(maybe_qualified_name); + } + else if (maybe_qualified_name.find('.', pos + 1) != std::string::npos) + { + /// Do not allow multiple dots + return {}; + } + else + { + name.database = maybe_qualified_name.substr(0, pos); + name.table = maybe_qualified_name.substr(pos + 1); + } + + return name; + } + + static QualifiedTableName parseFromString(const String & maybe_qualified_name) + { + auto name = tryParseFromString(maybe_qualified_name); + if (!name) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid qualified name: {}", maybe_qualified_name); + return *name; + } }; } diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 4da8df64f25a..e3e25b714ea1 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -10,9 +10,13 @@ #include #include #include +#include #include #include #include +#include +#include +#include namespace DB { @@ -25,11 +29,12 @@ class AddDefaultDatabaseVisitor { public: explicit AddDefaultDatabaseVisitor( - const String & database_name_, bool only_replace_current_database_function_ = false, WriteBuffer * ostr_ = nullptr) - : database_name(database_name_) + ContextPtr context_, + const String & database_name_, + bool only_replace_current_database_function_ = false) + : context(context_) + , database_name(database_name_) , only_replace_current_database_function(only_replace_current_database_function_) - , visit_depth(0) - , ostr(ostr_) {} void visitDDL(ASTPtr & ast) const @@ -62,11 +67,19 @@ class AddDefaultDatabaseVisitor visit(select, unused); } + void visit(ASTColumns & columns) const + { + for (auto & child : columns.children) + visit(child); + } + private: + + ContextPtr context; + const String database_name; + bool only_replace_current_database_function = false; - mutable size_t visit_depth; - WriteBuffer * ostr; void visit(ASTSelectWithUnionQuery & select, ASTPtr &) const { @@ -115,15 +128,8 @@ class AddDefaultDatabaseVisitor void visit(ASTFunction & function, ASTPtr &) const { - bool is_operator_in = false; - for (const auto * name : {"in", "notIn", "globalIn", "globalNotIn"}) - { - if (function.name == name) - { - is_operator_in = true; - break; - } - } + bool is_operator_in = functionIsInOrGlobalInOperator(function.name); + bool is_dict_get = functionIsDictGet(function.name); for (auto & child : function.children) { @@ -131,7 +137,30 @@ class AddDefaultDatabaseVisitor { for (size_t i = 0; i < child->children.size(); ++i) { - if (is_operator_in && i == 1) + if (is_dict_get && i == 0) + { + if (auto * identifier = child->children[i]->as()) + { + /// Identifier already qualified + if (identifier->compound()) + continue; + + auto qualified_dictionary_name = context->getExternalDictionariesLoader().qualifyDictionaryNameWithDatabase(identifier->name(), context); + child->children[i] = std::make_shared(qualified_dictionary_name.getParts()); + } + else if (auto * literal = child->children[i]->as()) + { + auto & literal_value = literal->value; + + if (literal_value.getType() != Field::Types::String) + continue; + + auto dictionary_name = literal_value.get(); + auto qualified_dictionary_name = context->getExternalDictionariesLoader().qualifyDictionaryNameWithDatabase(dictionary_name, context); + literal_value = qualified_dictionary_name.getFullName(); + } + } + else if (is_operator_in && i == 1) { /// XXX: for some unknown reason this place assumes that argument can't be an alias, /// like in the similar code in `MarkTableIdentifierVisitor`. @@ -149,11 +178,15 @@ class AddDefaultDatabaseVisitor visit(child->children[i]); } else + { visit(child->children[i]); + } } } else + { visit(child); + } } } @@ -168,7 +201,6 @@ class AddDefaultDatabaseVisitor { if (T * t = typeid_cast(ast.get())) { - DumpASTNode dump(*ast, ostr, visit_depth, "addDefaultDatabaseName"); visit(*t, ast); return true; } diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 83931649443c..f0d17cf54f06 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -79,6 +79,28 @@ DictionaryStructure ExternalDictionariesLoader::getDictionaryStructure(const std return ExternalDictionariesLoader::getDictionaryStructure(*load_result.config); } +QualifiedTableName ExternalDictionariesLoader::qualifyDictionaryNameWithDatabase(const std::string & dictionary_name, ContextPtr query_context) const +{ + auto qualified_name = QualifiedTableName::tryParseFromString(dictionary_name); + if (!qualified_name) + { + QualifiedTableName qualified_dictionary_name; + qualified_dictionary_name.table = dictionary_name; + return qualified_dictionary_name; + } + + if (qualified_name->database.empty() && has(dictionary_name)) + { + /// This is xml dictionary + return *qualified_name; + } + + if (qualified_name->database.empty()) + qualified_name->database = query_context->getCurrentDatabase(); + + return *qualified_name; +} + std::string ExternalDictionariesLoader::resolveDictionaryName(const std::string & dictionary_name, const std::string & current_database_name) const { bool has_dictionary = has(dictionary_name); diff --git a/src/Interpreters/ExternalDictionariesLoader.h b/src/Interpreters/ExternalDictionariesLoader.h index 06f64ef30c50..61950c640465 100644 --- a/src/Interpreters/ExternalDictionariesLoader.h +++ b/src/Interpreters/ExternalDictionariesLoader.h @@ -27,6 +27,8 @@ class ExternalDictionariesLoader : public ExternalLoader, WithContext void reloadDictionary(const std::string & dictionary_name, ContextPtr context) const; + QualifiedTableName qualifyDictionaryNameWithDatabase(const std::string & dictionary_name, ContextPtr context) const; + DictionaryStructure getDictionaryStructure(const std::string & dictionary_name, ContextPtr context) const; static DictionaryStructure getDictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & key_in_config = "dictionary"); diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 76e7afb7009d..8ae43c5d4024 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -66,7 +66,7 @@ BlockIO InterpreterAlterQuery::execute() auto metadata_snapshot = table->getInMemoryMetadataPtr(); /// Add default database to table identifiers that we can encounter in e.g. default expressions, mutation expression, etc. - AddDefaultDatabaseVisitor visitor(table_id.getDatabaseName()); + AddDefaultDatabaseVisitor visitor(getContext(), table_id.getDatabaseName()); ASTPtr command_list_ptr = alter.command_list->ptr(); visitor.visit(command_list_ptr); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index c1fc9b389cee..ca295477f879 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -908,10 +908,16 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { // Expand CTE before filling default database ApplyWithSubqueryVisitor().visit(*create.select); - AddDefaultDatabaseVisitor visitor(current_database); + AddDefaultDatabaseVisitor visitor(getContext(), current_database); visitor.visit(*create.select); } + if (create.columns_list) + { + AddDefaultDatabaseVisitor visitor(getContext(), current_database); + visitor.visit(*create.columns_list); + } + /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way. TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create); diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 180a4f9af3e2..6f9fac959d5f 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -151,7 +151,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, } } - AddDefaultDatabaseVisitor visitor(current_database, !use_local_default_database); + AddDefaultDatabaseVisitor visitor(context, current_database, !use_local_default_database); visitor.visitDDL(query_ptr); /// Check access rights, assume that all servers have the same users config diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 69390850ccc5..858fdcc23b87 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -69,7 +69,7 @@ static StorageID extractDependentTable(ASTPtr & query, ContextPtr context, const if (db_and_table->database.empty()) { db_and_table->database = select_database_name; - AddDefaultDatabaseVisitor visitor(select_database_name); + AddDefaultDatabaseVisitor visitor(context, select_database_name); visitor.visit(select_query); } else diff --git a/src/Storages/SelectQueryDescription.cpp b/src/Storages/SelectQueryDescription.cpp index 05747a9a2608..018a9f0ea980 100644 --- a/src/Storages/SelectQueryDescription.cpp +++ b/src/Storages/SelectQueryDescription.cpp @@ -48,7 +48,7 @@ StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, ContextPt { if (add_default_db) { - AddDefaultDatabaseVisitor visitor(context->getCurrentDatabase(), false, nullptr); + AddDefaultDatabaseVisitor visitor(context, context->getCurrentDatabase()); visitor.visit(query); } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index f72f6fee1807..6bf9a3d0fdfe 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/tests/queries/0_stateless/02097_default_dict_get_add_database.reference b/tests/queries/0_stateless/02097_default_dict_get_add_database.reference new file mode 100644 index 000000000000..9b0ac07a68a1 --- /dev/null +++ b/tests/queries/0_stateless/02097_default_dict_get_add_database.reference @@ -0,0 +1 @@ +CREATE TABLE `02097_db`.test_table_default (`data_1` UInt64 DEFAULT dictGetUInt64(\'02097_db.test_dictionary\', \'data_column_1\', toUInt64(0)), `data_2` UInt8 DEFAULT dictGet(`02097_db`.test_dictionary, \'data_column_2\', toUInt64(0))) ENGINE = TinyLog diff --git a/tests/queries/0_stateless/02097_default_dict_get_add_database.sql b/tests/queries/0_stateless/02097_default_dict_get_add_database.sql new file mode 100644 index 000000000000..af177566476b --- /dev/null +++ b/tests/queries/0_stateless/02097_default_dict_get_add_database.sql @@ -0,0 +1,44 @@ +-- Tags: no-parallel + +DROP DATABASE IF EXISTS 02097_db; +CREATE DATABASE 02097_db; + +USE 02097_db; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + key_column UInt64, + data_column_1 UInt64, + data_column_2 UInt8 +) +ENGINE = MergeTree +ORDER BY key_column; + +DROP DICTIONARY IF EXISTS test_dictionary; +CREATE DICTIONARY test_dictionary +( + key_column UInt64 DEFAULT 0, + data_column_1 UInt64 DEFAULT 1, + data_column_2 UInt8 DEFAULT 1 +) +PRIMARY KEY key_column +LAYOUT(DIRECT()) +SOURCE(CLICKHOUSE(TABLE 'test_table')); + +DROP TABLE IF EXISTS test_table_default; +CREATE TABLE test_table_default +( + data_1 DEFAULT dictGetUInt64('test_dictionary', 'data_column_1', toUInt64(0)), + data_2 DEFAULT dictGet(test_dictionary, 'data_column_2', toUInt64(0)) +) +ENGINE=TinyLog; + +SELECT create_table_query FROM system.tables WHERE name = 'test_table_default' AND database = '02097_db'; + +DROP DICTIONARY test_dictionary; +DROP TABLE test_table; +DROP TABLE test_table_default; + +DROP DATABASE 02097_db; + From 75d08699e220d274f407fd4dd1b33308a0ed933d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 29 Oct 2021 16:47:26 +0300 Subject: [PATCH 154/472] Backport #30822 to 21.9: Fix ambiguity when extracting auxiliary ZooKeeper name --- src/Interpreters/Context.cpp | 3 ++ src/Storages/StorageReplicatedMergeTree.cpp | 41 ++++++++++++------- .../test.py | 10 +++++ 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b8537dce8226..108c2ef7712c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1684,6 +1684,9 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const auto zookeeper = shared->auxiliary_zookeepers.find(name); if (zookeeper == shared->auxiliary_zookeepers.end()) { + if (name.find(':') != std::string::npos || name.find('/') != std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid auxiliary ZooKeeper name {}: ':' and '/' are not allowed", name); + const auto & config = shared->auxiliary_zookeepers_config ? *shared->auxiliary_zookeepers_config : getConfigRef(); if (!config.has("auxiliary_zookeepers." + name)) throw Exception( diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index fd48360f17ee..c06ceaff1a40 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -192,43 +192,54 @@ zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeper() const return res; } -static std::string normalizeZooKeeperPath(std::string zookeeper_path) +static std::string normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts_with_slash, Poco::Logger * log = nullptr) { if (!zookeeper_path.empty() && zookeeper_path.back() == '/') zookeeper_path.resize(zookeeper_path.size() - 1); /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it. if (!zookeeper_path.empty() && zookeeper_path.front() != '/') + { + /// Do not allow this for new tables, print warning for tables created in old versions + if (check_starts_with_slash) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ZooKeeper path must starts with '/', got '{}'", zookeeper_path); + if (log) + LOG_WARNING(log, "ZooKeeper path ('{}') does not start with '/'. It will not be supported in future releases"); zookeeper_path = "/" + zookeeper_path; + } return zookeeper_path; } static String extractZooKeeperName(const String & path) { + static constexpr auto default_zookeeper_name = "default"; if (path.empty()) - throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - auto pos = path.find(':'); - if (pos != String::npos) + throw Exception("ZooKeeper path should not be empty", ErrorCodes::BAD_ARGUMENTS); + if (path[0] == '/') + return default_zookeeper_name; + auto pos = path.find(":/"); + if (pos != String::npos && pos < path.find('/')) { auto zookeeper_name = path.substr(0, pos); if (zookeeper_name.empty()) - throw Exception("Zookeeper path should start with '/' or ':/'", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception("Zookeeper path should start with '/' or ':/'", ErrorCodes::BAD_ARGUMENTS); return zookeeper_name; } - static constexpr auto default_zookeeper_name = "default"; return default_zookeeper_name; } -static String extractZooKeeperPath(const String & path) +static String extractZooKeeperPath(const String & path, bool check_starts_with_slash, Poco::Logger * log = nullptr) { if (path.empty()) - throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - auto pos = path.find(':'); - if (pos != String::npos) + throw Exception("ZooKeeper path should not be empty", ErrorCodes::BAD_ARGUMENTS); + if (path[0] == '/') + return normalizeZooKeeperPath(path, check_starts_with_slash, log); + auto pos = path.find(":/"); + if (pos != String::npos && pos < path.find('/')) { - return normalizeZooKeeperPath(path.substr(pos + 1, String::npos)); + return normalizeZooKeeperPath(path.substr(pos + 1, String::npos), check_starts_with_slash, log); } - return normalizeZooKeeperPath(path); + return normalizeZooKeeperPath(path, check_starts_with_slash, log); } static MergeTreePartInfo makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(const String & partition_id) @@ -275,7 +286,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( attach, [this] (const std::string & name) { enqueuePartForCheck(name); }) , zookeeper_name(extractZooKeeperName(zookeeper_path_)) - , zookeeper_path(extractZooKeeperPath(zookeeper_path_)) + , zookeeper_path(extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ !attach, log)) , replica_name(replica_name_) , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_) , reader(*this) @@ -5722,7 +5733,7 @@ void StorageReplicatedMergeTree::fetchPartition( info.table_id.uuid = UUIDHelpers::Nil; auto expand_from = query_context->getMacros()->expand(from_, info); String auxiliary_zookeeper_name = extractZooKeeperName(expand_from); - String from = extractZooKeeperPath(expand_from); + String from = extractZooKeeperPath(expand_from, /* check_starts_with_slash */ true); if (from.empty()) throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -6794,7 +6805,7 @@ void StorageReplicatedMergeTree::movePartitionToShard( if (!move_part) throw Exception("MOVE PARTITION TO SHARD is not supported, use MOVE PART instead", ErrorCodes::NOT_IMPLEMENTED); - if (normalizeZooKeeperPath(zookeeper_path) == normalizeZooKeeperPath(to)) + if (normalizeZooKeeperPath(zookeeper_path, /* check_starts_with_slash */ true) == normalizeZooKeeperPath(to, /* check_starts_with_slash */ true)) throw Exception("Source and destination are the same", ErrorCodes::BAD_ARGUMENTS); auto zookeeper = getZooKeeper(); diff --git a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py index a9dcce1b9d4f..4644790ff946 100644 --- a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py +++ b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py @@ -101,3 +101,13 @@ def test_drop_replicated_merge_tree_with_auxiliary_zookeeper(started_cluster): assert zk.exists('/clickhouse/tables/test/test_auxiliary_zookeeper') drop_table([node1, node2], "test_auxiliary_zookeeper") assert zk.exists('/clickhouse/tables/test/test_auxiliary_zookeeper') is None + +def test_path_ambiguity(started_cluster): + drop_table([node1, node2], "test_path_ambiguity1") + drop_table([node1, node2], "test_path_ambiguity2") + node1.query("create table test_path_ambiguity1 (n int) engine=ReplicatedMergeTree('/test:bad:/path', '1') order by n") + assert "Invalid auxiliary ZooKeeper name" in node1.query_and_get_error("create table test_path_ambiguity2 (n int) engine=ReplicatedMergeTree('test:bad:/path', '1') order by n") + assert "ZooKeeper path must starts with '/'" in node1.query_and_get_error("create table test_path_ambiguity2 (n int) engine=ReplicatedMergeTree('test/bad:/path', '1') order by n") + node1.query("create table test_path_ambiguity2 (n int) engine=ReplicatedMergeTree('zookeeper2:/bad:/path', '1') order by n") + drop_table([node1, node2], "test_path_ambiguity1") + drop_table([node1, node2], "test_path_ambiguity2") From ae36a4ebe9201ad4c1d2ca3ddd19e5e69e9aad95 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 31 Oct 2021 16:54:47 +0300 Subject: [PATCH 155/472] Backport #30888 to 21.9: Fix WITH FILL with set TO and FROM and no rows in result set --- .../Transforms/FillingTransform.cpp | 7 +++++-- .../02111_with_fill_no_rows.reference | 4 ++++ .../0_stateless/02111_with_fill_no_rows.sql | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02111_with_fill_no_rows.reference create mode 100644 tests/queries/0_stateless/02111_with_fill_no_rows.sql diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 3ff89c302ff8..5fe051e9498f 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -117,12 +117,12 @@ IProcessor::Status FillingTransform::prepare() { if (!on_totals && input.isFinished() && !output.isFinished() && !has_input && !generate_suffix) { - should_insert_first = next_row < filling_row; + should_insert_first = next_row < filling_row || first; for (size_t i = 0, size = filling_row.size(); i < size; ++i) next_row[i] = filling_row.getFillDescription(i).fill_to; - if (filling_row < next_row) + if (first || filling_row < next_row) { generate_suffix = true; return Status::Ready; @@ -160,6 +160,9 @@ void FillingTransform::transform(Chunk & chunk) init_columns_by_positions(empty_columns, old_fill_columns, res_fill_columns, fill_column_positions); init_columns_by_positions(empty_columns, old_other_columns, res_other_columns, other_column_positions); + if (first) + filling_row.initFromDefaults(); + if (should_insert_first && filling_row < next_row) insertFromFillingRow(res_fill_columns, res_other_columns, filling_row); diff --git a/tests/queries/0_stateless/02111_with_fill_no_rows.reference b/tests/queries/0_stateless/02111_with_fill_no_rows.reference new file mode 100644 index 000000000000..c0cc69a2d636 --- /dev/null +++ b/tests/queries/0_stateless/02111_with_fill_no_rows.reference @@ -0,0 +1,4 @@ +2019 0 +2020 0 +2021 0 +2022 0 diff --git a/tests/queries/0_stateless/02111_with_fill_no_rows.sql b/tests/queries/0_stateless/02111_with_fill_no_rows.sql new file mode 100644 index 000000000000..e671dd5f0f2c --- /dev/null +++ b/tests/queries/0_stateless/02111_with_fill_no_rows.sql @@ -0,0 +1,19 @@ +SELECT toYear(d) AS y, count() +FROM ( SELECT today() AS d WHERE 0) +GROUP BY y +ORDER BY y ASC WITH FILL FROM 2019 TO 2023; + +SELECT toYear(d) AS y, count() +FROM ( SELECT today() AS d WHERE 0) +GROUP BY y +ORDER BY y ASC WITH FILL FROM 2019; + +SELECT toYear(d) AS y, count() +FROM ( SELECT today() AS d WHERE 0) +GROUP BY y +ORDER BY y ASC WITH FILL TO 2023; + +SELECT toYear(d) AS y, count() +FROM ( SELECT today() AS d WHERE 0) +GROUP BY y +ORDER BY y ASC WITH FILL; From a98abe776cc2a9fc82eec4c207e34cc57d6569cb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 31 Oct 2021 18:50:51 +0300 Subject: [PATCH 156/472] Backport #30887 to 21.9: Fix index analysis for set index --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 4 ++-- .../0_stateless/02112_skip_index_set_and_or.reference | 0 tests/queries/0_stateless/02112_skip_index_set_and_or.sql | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02112_skip_index_set_and_or.reference create mode 100644 tests/queries/0_stateless/02112_skip_index_set_and_or.sql diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 60b9ddae3296..a08ae4499f27 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -451,9 +451,9 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr & node, bool atomi const ASTs & args = func->arguments->children; if (func->name == "and" || func->name == "indexHint") - return checkASTUseless(args[0], atomic) && checkASTUseless(args[1], atomic); + return std::all_of(args.begin(), args.end(), [this, atomic](const auto & arg) { return checkASTUseless(arg, atomic); }); else if (func->name == "or") - return checkASTUseless(args[0], atomic) || checkASTUseless(args[1], atomic); + return std::any_of(args.begin(), args.end(), [this, atomic](const auto & arg) { return checkASTUseless(arg, atomic); }); else if (func->name == "not") return checkASTUseless(args[0], atomic); else diff --git a/tests/queries/0_stateless/02112_skip_index_set_and_or.reference b/tests/queries/0_stateless/02112_skip_index_set_and_or.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02112_skip_index_set_and_or.sql b/tests/queries/0_stateless/02112_skip_index_set_and_or.sql new file mode 100644 index 000000000000..7b52e5de9c99 --- /dev/null +++ b/tests/queries/0_stateless/02112_skip_index_set_and_or.sql @@ -0,0 +1,6 @@ +drop table if exists set_index; + +create table set_index (a Int32, b Int32, INDEX b_set b type set(0) granularity 1) engine MergeTree order by tuple(); +insert into set_index values (1, 2); + +select b from set_index where a = 1 and a = 1 and b = 1 settings force_data_skipping_indices = 'b_set', optimize_move_to_prewhere=0; From f72adb71e4667cd0f97b634c097346b9f7afca19 Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Wed, 3 Nov 2021 10:21:28 +0800 Subject: [PATCH 157/472] add substrait --- utils/local-engine/Builder/CMakeLists.txt | 0 .../Builder/SerializedPlanBuilder.cpp | 1 + .../Builder/SerializedPlanBuilder.h | 36 +++++++++ utils/local-engine/CMakeLists.txt | 19 ++++- utils/local-engine/Substrait/CMakeLists.txt | 9 +++ utils/local-engine/local_engine.cpp | 81 ++++++++++--------- 6 files changed, 107 insertions(+), 39 deletions(-) create mode 100644 utils/local-engine/Builder/CMakeLists.txt create mode 100644 utils/local-engine/Builder/SerializedPlanBuilder.cpp create mode 100644 utils/local-engine/Builder/SerializedPlanBuilder.h create mode 100644 utils/local-engine/Substrait/CMakeLists.txt diff --git a/utils/local-engine/Builder/CMakeLists.txt b/utils/local-engine/Builder/CMakeLists.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp new file mode 100644 index 000000000000..11cd00611936 --- /dev/null +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -0,0 +1 @@ +#include "SerializedPlanBuilder.h" diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h new file mode 100644 index 000000000000..545906a603d0 --- /dev/null +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -0,0 +1,36 @@ +#pragma once + +#include + + +namespace dbms +{ +class SerializedPlanBuilder +{ +public: + SerializedPlanBuilder& filter(); + SerializedPlanBuilder& aggregate(); + SerializedPlanBuilder& project(); + io::substrait::Plan build(); +public: + static SerializedPlanBuilder& read(); +}; + +/** + * build a schema, need define column name and column. + * 1. column name + * 2. column type + * 3. nullability + */ +class SerializedSchemaBuilder { +public: + io::substrait::Type_NamedStruct build(); + SerializedPlanBuilder& column(std::string name, std::string type, bool nullable = false); +public: + static SerializedSchemaBuilder& builder(); + +private: + std::map type_map; + std::map nullability_map; +}; +} diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index 3b60786ffefd..40106276a68a 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -3,8 +3,20 @@ function(add_cxx_compile_options option) add_compile_options("$<$,CXX>:${option}>") endfunction() add_cxx_compile_options(-Wzero-as-null-pointer-constant) -add_executable (local_engine local_engine.cpp) -target_include_directories(local_engine PRIVATE ${RAPIDJSON_INCLUDE_DIR}) +add_subdirectory(Substrait) +add_subdirectory(Builder) +add_headers_and_sources(builder Builder) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +add_executable (local_engine + local_engine.cpp + ${builder_headers} + ${builder_sources} + ) +target_include_directories(local_engine PRIVATE + ${RAPIDJSON_INCLUDE_DIR} + ${SUBSTRAIT_HEADERS} + ) + set (CLICKHOUSE_SERVER_LINK PRIVATE @@ -13,7 +25,8 @@ set (CLICKHOUSE_SERVER_LINK clickhouse_common_io clickhouse_functions clickhouse_storages_system + substrait ) -target_link_libraries(local_engine ${CLICKHOUSE_SERVER_LINK}) +target_link_libraries(local_engine ${CLICKHOUSE_SERVER_LINK} ) diff --git a/utils/local-engine/Substrait/CMakeLists.txt b/utils/local-engine/Substrait/CMakeLists.txt new file mode 100644 index 000000000000..1b22913a8bae --- /dev/null +++ b/utils/local-engine/Substrait/CMakeLists.txt @@ -0,0 +1,9 @@ +set(protobuf_generate_PROTOC_OUT_DIR "${ClickHouse_SOURCE_DIR}/utils/local-engine/Substrait") +file(GLOB PROTOBUF_DEFINITION_FILES "/Users/neng.liu/Documents/GitHub/substrait/binary/*.proto") +include_directories(${Protobuf_INCLUDE_DIRS}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +PROTOBUF_GENERATE_CPP(SUBSTRAIT_SRCS SUBSTRAIT_HEADERS ${PROTOBUF_DEFINITION_FILES}) +add_library(substrait ${SUBSTRAIT_SRCS}) +target_include_directories(substrait PRIVATE ${PROTOBUF_INCLUDE_DIR}) +target_link_libraries(substrait libprotobuf) + diff --git a/utils/local-engine/local_engine.cpp b/utils/local-engine/local_engine.cpp index ee2b1d8f1155..125881a355ee 100644 --- a/utils/local-engine/local_engine.cpp +++ b/utils/local-engine/local_engine.cpp @@ -27,12 +27,12 @@ #include #include -#include #include #include #include #include - +#include +#include using namespace DB; using namespace rapidjson; @@ -190,41 +190,50 @@ void buildAgg(Block & header, QueryPlan& query_plan, ContextPtr context) query_plan.addStep(std::move(aggregating_step)); } -int main(int argc, char ** argv) +int main(int, char **) { - auto shared_context = Context::createShared(); - auto global_context = Context::createGlobal(shared_context.get()); - registerAllFunctions(); - auto & factory = FunctionFactory::instance(); - std::ifstream ifs("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.json"); - IStreamWrapper isw(ifs); - - Document d; - d.ParseStream(isw); - auto cols = getColumns(d); - auto header = getTableHeader(*cols); - - QueryPlan query_plan; - auto file = "/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.csv"; - auto buf = std::make_unique(file); - - auto source = getSource(*buf, header); - - std::unique_ptr query_pipelines = std::make_unique(); - auto source_step = std::make_unique(Pipe(source), "CSV"); - query_plan.addStep(std::move(source_step)); - - auto filter = buildFilter(header, global_context); - query_plan.addStep(std::move(filter)); - buildAgg(header, query_plan, global_context); - QueryPlanOptimizationSettings optimization_settings{.optimize_plan=false}; - auto query_pipline = query_plan.buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); - - auto buffer = WriteBufferFromFile("/Users/neng.liu/Documents/GitHub/ClickHouse/output.txt"); - auto output = std::make_shared(buffer, query_pipline->getHeader(), true, RowOutputFormatParams(), FormatSettings()); - query_pipline->setOutputFormat(output); - auto executor = query_pipline->execute(); - executor->execute(1); + auto plan = io::substrait::Plan(); + plan.add_relations()->read(); + auto table = plan.mutable_relations(0); + auto local_files = table->mutable_read()->mutable_local_files(); + auto file = io::substrait::ReadRel_LocalFiles_FileOrFiles(); + file.set_uri_path("test.txt"); + local_files->mutable_items()->Add(std::move(file)); + std::cout << plan.SerializeAsString(); + +// auto shared_context = Context::createShared(); +// auto global_context = Context::createGlobal(shared_context.get()); +// registerAllFunctions(); +// auto & factory = FunctionFactory::instance(); +// std::ifstream ifs("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.json"); +// IStreamWrapper isw(ifs); +// +// Document d; +// d.ParseStream(isw); +// auto cols = getColumns(d); +// auto header = getTableHeader(*cols); +// +// QueryPlan query_plan; +// auto file = "/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.csv"; +// auto buf = std::make_unique(file); +// +// auto source = getSource(*buf, header); +// +// std::unique_ptr query_pipelines = std::make_unique(); +// auto source_step = std::make_unique(Pipe(source), "CSV"); +// query_plan.addStep(std::move(source_step)); +// +// auto filter = buildFilter(header, global_context); +// query_plan.addStep(std::move(filter)); +// buildAgg(header, query_plan, global_context); +// QueryPlanOptimizationSettings optimization_settings{.optimize_plan=false}; +// auto query_pipline = query_plan.buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); +// +// auto buffer = WriteBufferFromFile("/Users/neng.liu/Documents/GitHub/ClickHouse/output.txt"); +// auto output = std::make_shared(buffer, query_pipline->getHeader(), true, RowOutputFormatParams(), FormatSettings()); +// query_pipline->setOutputFormat(output); +// auto executor = query_pipline->execute(); +// executor->execute(1); } // auto col = ColumnUInt8::create(1, 1); From 7f391c91c7fd1c144217a238833da2d83fccb9fd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 3 Nov 2021 13:17:10 +0300 Subject: [PATCH 158/472] Backport #31001 to 21.9: Fix segfault in formatRow function --- src/Functions/formatRow.cpp | 7 +++++++ tests/queries/0_stateless/02113_format_row_bug.reference | 0 tests/queries/0_stateless/02113_format_row_bug.sql | 6 ++++++ 3 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/02113_format_row_bug.reference create mode 100644 tests/queries/0_stateless/02113_format_row_bug.sql diff --git a/src/Functions/formatRow.cpp b/src/Functions/formatRow.cpp index e4389fa45f61..b728f22f1112 100644 --- a/src/Functions/formatRow.cpp +++ b/src/Functions/formatRow.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -19,6 +20,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int UNKNOWN_FORMAT; + extern const int BAD_ARGUMENTS; } namespace @@ -71,6 +73,11 @@ class FunctionFormatRow : public IFunction writeChar('\0', buffer); offsets[row] = buffer.count(); }); + + /// This function make sense only for row output formats. + if (!dynamic_cast(out.get())) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot turn rows into a {} format strings. {} function supports only row output formats", format_name, getName()); + out->write(arg_columns); return col_str; } diff --git a/tests/queries/0_stateless/02113_format_row_bug.reference b/tests/queries/0_stateless/02113_format_row_bug.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02113_format_row_bug.sql b/tests/queries/0_stateless/02113_format_row_bug.sql new file mode 100644 index 000000000000..c2144ca1537b --- /dev/null +++ b/tests/queries/0_stateless/02113_format_row_bug.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest + +select formatRow('ORC', number, toDate(number)) from numbers(5); -- { serverError 36 } +select formatRow('Parquet', number, toDate(number)) from numbers(5); -- { serverError 36 } +select formatRow('Arrow', number, toDate(number)) from numbers(5); -- { serverError 36 } +select formatRow('Native', number, toDate(number)) from numbers(5); -- { serverError 36 } From 5bd0f8790c0ede1b0e38c1bb89481caecea2d50f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 8 Nov 2021 09:36:07 +0300 Subject: [PATCH 159/472] Backport #31003 to 21.9: Allow spaces in JSONPath. --- .../JSONPath/Parsers/ParserJSONPathMemberAccess.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp index c7f047eb8fbd..0ea3bfbd0135 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -17,22 +17,17 @@ namespace DB bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type != TokenType::Dot) - { return false; - } + ++pos; - if (pos->type != TokenType::BareWord) - { + if (pos->type != TokenType::BareWord && pos->type !=TokenType::QuotedIdentifier) return false; - } ParserIdentifier name_p; ASTPtr member_name; if (!name_p.parse(pos, member_name, expected)) - { return false; - } auto member_access = std::make_shared(); node = member_access; From f1e188381e14cf53693518a7ccd31c9522fa78de Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 20 Aug 2021 13:25:07 +0300 Subject: [PATCH 160/472] Merge pull request #27743 from nikitamikhaylov/lz4-checks Better code around decompression [2] (cherry picked from commit ea3f7bee97587243d5e18fdf05f5dde881099af2) --- src/Compression/LZ4_decompress_faster.cpp | 24 +++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/Compression/LZ4_decompress_faster.cpp b/src/Compression/LZ4_decompress_faster.cpp index 6972457f11b8..28a285f00f4b 100644 --- a/src/Compression/LZ4_decompress_faster.cpp +++ b/src/Compression/LZ4_decompress_faster.cpp @@ -439,11 +439,14 @@ bool NO_INLINE decompressImpl( { s = *ip++; length += s; - } while (unlikely(s == 255)); + } while (unlikely(s == 255 && ip < input_end)); }; /// Get literal length. + if (unlikely(ip >= input_end)) + return false; + const unsigned token = *ip++; length = token >> 4; if (length == 0x0F) @@ -464,18 +467,18 @@ bool NO_INLINE decompressImpl( /// output: xyzHello, w /// ^-op (we will overwrite excessive bytes on next iteration) - { - auto * target = std::min(copy_end, output_end); - wildCopy(op, ip, target); /// Here we can write up to copy_amount - 1 bytes after buffer. + if (unlikely(copy_end > output_end)) + return false; - if (target == output_end) - return true; - } + wildCopy(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer. + + if (copy_end == output_end) + return true; ip += length; op = copy_end; - if (unlikely(ip > input_end)) + if (unlikely(ip + 1 >= input_end)) return false; /// Get match offset. @@ -528,8 +531,9 @@ bool NO_INLINE decompressImpl( copy(op, match); /// copy_amount + copy_amount - 1 - 4 * 2 bytes after buffer. if (length > copy_amount * 2) { - auto * target = std::min(copy_end, output_end); - wildCopy(op + copy_amount, match + copy_amount, target); + if (unlikely(copy_end > output_end)) + return false; + wildCopy(op + copy_amount, match + copy_amount, copy_end); } op = copy_end; From 287c7f6cb139137f53b5f6b0305bba016bcf951d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 8 Nov 2021 16:12:26 +0300 Subject: [PATCH 161/472] Backport #30995 to 21.9: Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM --- src/Storages/MergeTree/MergeTreeData.cpp | 4 +- .../__init__.py | 0 .../configs/config.xml | 4 ++ .../test.py | 50 +++++++++++++++++++ 4 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 tests/integration/test_attach_partition_with_large_destination/__init__.py create mode 100644 tests/integration/test_attach_partition_with_large_destination/configs/config.xml create mode 100644 tests/integration/test_attach_partition_with_large_destination/test.py diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 386ee3ea3fc5..9bcd1555812f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3141,7 +3141,6 @@ Pipe MergeTreeData::alterPartition( case PartitionCommand::MoveDestinationType::TABLE: { - checkPartitionCanBeDropped(command.partition); String dest_database = query_context->resolveDatabase(command.to_database); auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}, query_context); movePartitionToTable(dest_storage, command.partition, query_context); @@ -3163,7 +3162,8 @@ Pipe MergeTreeData::alterPartition( case PartitionCommand::REPLACE_PARTITION: { - checkPartitionCanBeDropped(command.partition); + if (command.replace) + checkPartitionCanBeDropped(command.partition); String from_database = query_context->resolveDatabase(command.from_database); auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}, query_context); replacePartitionFrom(from_storage, command.partition, command.replace, query_context); diff --git a/tests/integration/test_attach_partition_with_large_destination/__init__.py b/tests/integration/test_attach_partition_with_large_destination/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_attach_partition_with_large_destination/configs/config.xml b/tests/integration/test_attach_partition_with_large_destination/configs/config.xml new file mode 100644 index 000000000000..0500e2ad5542 --- /dev/null +++ b/tests/integration/test_attach_partition_with_large_destination/configs/config.xml @@ -0,0 +1,4 @@ + + 1 + 1 + diff --git a/tests/integration/test_attach_partition_with_large_destination/test.py b/tests/integration/test_attach_partition_with_large_destination/test.py new file mode 100644 index 000000000000..50f24f7a01e5 --- /dev/null +++ b/tests/integration/test_attach_partition_with_large_destination/test.py @@ -0,0 +1,50 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def create_force_drop_flag(node): + force_drop_flag_path = "/var/lib/clickhouse/flags/force_drop_table" + node.exec_in_container(["bash", "-c", "touch {} && chmod a=rw {}".format(force_drop_flag_path, force_drop_flag_path)], user="root") + +@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) +def test_attach_partition_with_large_destination(started_cluster, engine): + # Initialize + node.query("CREATE DATABASE db ENGINE={}".format(engine)) + node.query("CREATE TABLE db.destination (n UInt64) ENGINE=ReplicatedMergeTree('/test/destination', 'r1') ORDER BY n PARTITION BY n % 2") + node.query("CREATE TABLE db.source_1 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_1', 'r1') ORDER BY n PARTITION BY n % 2") + node.query("INSERT INTO db.source_1 VALUES (1), (2), (3), (4)") + node.query("CREATE TABLE db.source_2 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_2', 'r1') ORDER BY n PARTITION BY n % 2") + node.query("INSERT INTO db.source_2 VALUES (5), (6), (7), (8)") + + # Attach partition when destination partition is empty + node.query("ALTER TABLE db.destination ATTACH PARTITION 0 FROM db.source_1") + assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n" + + # REPLACE PARTITION should still respect max_partition_size_to_drop + assert node.query_and_get_error("ALTER TABLE db.destination REPLACE PARTITION 0 FROM db.source_2") + assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n" + + # Attach partition when destination partition is larger than max_partition_size_to_drop + node.query("ALTER TABLE db.destination ATTACH PARTITION 0 FROM db.source_2") + assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n6\n8\n" + + # Cleanup + create_force_drop_flag(node) + node.query("DROP TABLE db.source_1 SYNC") + create_force_drop_flag(node) + node.query("DROP TABLE db.source_2 SYNC") + create_force_drop_flag(node) + node.query("DROP TABLE db.destination SYNC") + node.query("DROP DATABASE db") \ No newline at end of file From f2f9a3ba1721bd4f3a5e33cc77d8cafa58b3761c Mon Sep 17 00:00:00 2001 From: tavplubix Date: Tue, 9 Nov 2021 00:57:59 +0300 Subject: [PATCH 162/472] Update config.xml --- .../configs/config.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_attach_partition_with_large_destination/configs/config.xml b/tests/integration/test_attach_partition_with_large_destination/configs/config.xml index 0500e2ad5542..e5f133953a6d 100644 --- a/tests/integration/test_attach_partition_with_large_destination/configs/config.xml +++ b/tests/integration/test_attach_partition_with_large_destination/configs/config.xml @@ -1,4 +1,4 @@ - + 1 1 - + From b44a4e87275055f09348e1c2976ecee2a767c11d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 9 Nov 2021 20:22:22 +0000 Subject: [PATCH 163/472] Backport #31042 to 21.9: Add additional hdfs url check --- src/Disks/HDFS/DiskHDFS.cpp | 3 +++ src/Storages/HDFS/HDFSCommon.cpp | 8 ++++++ src/Storages/HDFS/HDFSCommon.h | 4 +++ src/Storages/HDFS/StorageHDFS.cpp | 1 + .../test_disk_types/configs/storage.xml | 2 +- tests/integration/test_storage_hdfs/test.py | 2 +- .../0_stateless/02114_hdfs_bad_url.reference | 17 ++++++++++++ .../queries/0_stateless/02114_hdfs_bad_url.sh | 26 +++++++++++++++++++ 8 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02114_hdfs_bad_url.reference create mode 100755 tests/queries/0_stateless/02114_hdfs_bad_url.sh diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 4eb43eaf7b59..b0373c0bc3ca 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -2,6 +2,8 @@ #include #include +#include + #include #include #include @@ -185,6 +187,7 @@ void registerDiskHDFS(DiskFactory & factory) fs::create_directories(disk); String uri{config.getString(config_prefix + ".endpoint")}; + checkHDFSURL(uri); if (uri.back() != '/') throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index d7e57a0f9ebb..d7dfeff5ec54 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #if USE_HDFS #include @@ -21,6 +22,7 @@ namespace ErrorCodes } const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs"; +const String HDFS_URL_REGEXP = "^hdfs://[^:/]*:[0-9]*/.*"; void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_path, bool isUser) @@ -191,6 +193,12 @@ HDFSFSPtr createHDFSFS(hdfsBuilder * builder) return fs; } +void checkHDFSURL(const String & url) +{ + if (!re2::RE2::FullMatch(url, HDFS_URL_REGEXP)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}. It should have structure 'hdfs://:/'", url); +} + } #endif diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/HDFS/HDFSCommon.h index 5c70a8997c33..b564f2ff9fdf 100644 --- a/src/Storages/HDFS/HDFSCommon.h +++ b/src/Storages/HDFS/HDFSCommon.h @@ -100,5 +100,9 @@ using HDFSFSPtr = std::unique_ptr, detail::HDFSFsD HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::AbstractConfiguration &); HDFSFSPtr createHDFSFS(hdfsBuilder * builder); +/// Check that url satisfy structure 'hdfs://:/' +/// and throw exception if it doesn't; +void checkHDFSURL(const String & url); + } #endif diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 9600eb975b4c..c49ce0dfa1e7 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -49,6 +49,7 @@ StorageHDFS::StorageHDFS( : IStorage(table_id_), WithContext(context_), uri(uri_), format_name(format_name_), compression_method(compression_method_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); + checkHDFSURL(uri); StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); diff --git a/tests/integration/test_disk_types/configs/storage.xml b/tests/integration/test_disk_types/configs/storage.xml index 4d8050c050c4..def8edc21794 100644 --- a/tests/integration/test_disk_types/configs/storage.xml +++ b/tests/integration/test_disk_types/configs/storage.xml @@ -13,7 +13,7 @@ hdfs - http://hdfs1:9000/data/ + hdfs://hdfs1:9000/data/ encrypted diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index f3c83166b46a..a6ba01fdb907 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -100,7 +100,7 @@ def test_bad_hdfs_uri(started_cluster): "create table BadStorage1 (id UInt32, name String, weight Float64) ENGINE = HDFS('hads:hgsdfs100500:9000/other_storage', 'TSV')") except Exception as ex: print(ex) - assert "Illegal HDFS URI" in str(ex) + assert "Bad hdfs url" in str(ex) try: node1.query( "create table BadStorage2 (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs100500:9000/other_storage', 'TSV')") diff --git a/tests/queries/0_stateless/02114_hdfs_bad_url.reference b/tests/queries/0_stateless/02114_hdfs_bad_url.reference new file mode 100644 index 000000000000..a588883cf708 --- /dev/null +++ b/tests/queries/0_stateless/02114_hdfs_bad_url.reference @@ -0,0 +1,17 @@ +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02114_hdfs_bad_url.sh b/tests/queries/0_stateless/02114_hdfs_bad_url.sh new file mode 100755 index 000000000000..5117568b67f1 --- /dev/null +++ b/tests/queries/0_stateless/02114_hdfs_bad_url.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd:9000', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('http://hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; + From 0880d5c69b20a348e2f0749974c14f57820b1d99 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 10 Nov 2021 16:03:27 +0000 Subject: [PATCH 164/472] Backport #31150 to 21.9: Fix bug in Keeper when some logs was lost --- src/Coordination/Changelog.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 9b89ab22fa28..70ca5973fa75 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -349,6 +349,14 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin min_log_id = last_commited_log_index; max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1; } + else if (last_commited_log_index != 0 && max_log_id < last_commited_log_index - 1) /// If we have more fresh snapshot than our logs + { + LOG_WARNING(log, "Our most fresh log_id {} is smaller than stored data in snapshot {}. It can indicate data loss. Removing outdated logs.", max_log_id, last_commited_log_index - 1); + + removeAllLogs(); + min_log_id = last_commited_log_index; + max_log_id = last_commited_log_index - 1; + } else if (last_log_is_not_complete) /// if it's complete just start new one { assert(last_log_read_result != std::nullopt); From 6da478028c7fb440249602a82c81a6f90db00c23 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 11 Nov 2021 09:54:56 +0000 Subject: [PATCH 165/472] Backport #30965 to 21.9: Fix cases when intersect / except is not checked --- src/Interpreters/AddDefaultDatabaseVisitor.h | 21 ++++++- src/Interpreters/ApplyWithGlobalVisitor.cpp | 30 ++++++++- src/Interpreters/ApplyWithGlobalVisitor.h | 2 + .../InterpreterSelectIntersectExceptQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 62 ++++++++----------- .../InterpreterSelectWithUnionQuery.cpp | 4 +- src/Interpreters/PredicateRewriteVisitor.cpp | 42 +++++++++++-- src/Interpreters/PredicateRewriteVisitor.h | 6 ++ src/Interpreters/ProcessList.cpp | 1 + src/Parsers/ASTSelectIntersectExceptQuery.cpp | 21 ++++++- src/Parsers/ASTSelectIntersectExceptQuery.h | 14 +++-- ...02004_intersect_except_operators.reference | 5 ++ .../02004_intersect_except_operators.sql | 4 ++ 13 files changed, 161 insertions(+), 53 deletions(-) diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index e3e25b714ea1..e1092b74e0f2 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -84,7 +85,12 @@ class AddDefaultDatabaseVisitor void visit(ASTSelectWithUnionQuery & select, ASTPtr &) const { for (auto & child : select.list_of_selects->children) - tryVisit(child); + { + if (child->as()) + tryVisit(child); + else if (child->as()) + tryVisit(child); + } } void visit(ASTSelectQuery & select, ASTPtr &) const @@ -95,6 +101,19 @@ class AddDefaultDatabaseVisitor visitChildren(select); } + void visit(ASTSelectIntersectExceptQuery & select, ASTPtr &) const + { + for (auto & child : select.getListOfSelects()) + { + if (child->as()) + tryVisit(child); + else if (child->as()) + tryVisit(child); + else if (child->as()) + tryVisit(child); + } + } + void visit(ASTTablesInSelectQuery & tables, ASTPtr &) const { for (auto & child : tables.children) diff --git a/src/Interpreters/ApplyWithGlobalVisitor.cpp b/src/Interpreters/ApplyWithGlobalVisitor.cpp index df238e27abf7..1ac2b19a04e2 100644 --- a/src/Interpreters/ApplyWithGlobalVisitor.cpp +++ b/src/Interpreters/ApplyWithGlobalVisitor.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include namespace DB @@ -40,6 +41,31 @@ void ApplyWithGlobalVisitor::visit( { visit(*node_select, exprs, with_expression_list); } + else if (ASTSelectIntersectExceptQuery * node_intersect_except = select->as()) + { + visit(*node_intersect_except, exprs, with_expression_list); + } + } +} + +void ApplyWithGlobalVisitor::visit( + ASTSelectIntersectExceptQuery & selects, const std::map & exprs, const ASTPtr & with_expression_list) +{ + auto selects_list = selects.getListOfSelects(); + for (auto & select : selects_list) + { + if (ASTSelectWithUnionQuery * node_union = select->as()) + { + visit(*node_union, exprs, with_expression_list); + } + else if (ASTSelectQuery * node_select = select->as()) + { + visit(*node_select, exprs, with_expression_list); + } + else if (ASTSelectIntersectExceptQuery * node_intersect_except = select->as()) + { + visit(*node_intersect_except, exprs, with_expression_list); + } } } @@ -47,7 +73,7 @@ void ApplyWithGlobalVisitor::visit(ASTPtr & ast) { if (ASTSelectWithUnionQuery * node_union = ast->as()) { - if (auto * first_select = node_union->list_of_selects->children[0]->as()) + if (auto * first_select = typeid_cast(node_union->list_of_selects->children[0].get())) { ASTPtr with_expression_list = first_select->with(); if (with_expression_list) @@ -64,6 +90,8 @@ void ApplyWithGlobalVisitor::visit(ASTPtr & ast) visit(*union_child, exprs, with_expression_list); else if (auto * select_child = (*it)->as()) visit(*select_child, exprs, with_expression_list); + else if (auto * intersect_except_child = (*it)->as()) + visit(*intersect_except_child, exprs, with_expression_list); } } } diff --git a/src/Interpreters/ApplyWithGlobalVisitor.h b/src/Interpreters/ApplyWithGlobalVisitor.h index a42203c68ef2..2f7c554da406 100644 --- a/src/Interpreters/ApplyWithGlobalVisitor.h +++ b/src/Interpreters/ApplyWithGlobalVisitor.h @@ -8,6 +8,7 @@ namespace DB class ASTSelectWithUnionQuery; class ASTSelectQuery; +class ASTSelectIntersectExceptQuery; /// Pull out the WITH statement from the first child of ASTSelectWithUnion query if any. class ApplyWithGlobalVisitor @@ -18,6 +19,7 @@ class ApplyWithGlobalVisitor private: static void visit(ASTSelectWithUnionQuery & selects, const std::map & exprs, const ASTPtr & with_expression_list); static void visit(ASTSelectQuery & select, const std::map & exprs, const ASTPtr & with_expression_list); + static void visit(ASTSelectIntersectExceptQuery & select, const std::map & exprs, const ASTPtr & with_expression_list); }; } diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 9c8dda56b44f..42f48c48d2b0 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -56,7 +56,7 @@ InterpreterSelectIntersectExceptQuery::InterpreterSelectIntersectExceptQuery( ASTSelectIntersectExceptQuery * ast = query_ptr->as(); final_operator = ast->final_operator; - const auto & children = ast->children; + const auto & children = ast->getListOfSelects(); size_t num_children = children.size(); /// AST must have been changed by the visitor. diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f5a9c1f9fefa..fff4325a66aa 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -872,52 +872,44 @@ static bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query) return true; /** NOTE You can also check that the table in the subquery is distributed, and that it only looks at one shard. - * In other cases, totals will be computed on the initiating server of the query, and it is not necessary to read the data to the end. - */ - + * In other cases, totals will be computed on the initiating server of the query, and it is not necessary to read the data to the end. + */ if (auto query_table = extractTableExpression(query, 0)) { if (const auto * ast_union = query_table->as()) { - for (const auto & elem : ast_union->list_of_selects->children) + /** NOTE + * 1. For ASTSelectWithUnionQuery after normalization for union child node the height of the AST tree is at most 2. + * 2. For ASTSelectIntersectExceptQuery after normalization in case there are intersect or except nodes, + * the height of the AST tree can have any depth (each intersect/except adds a level), but the + * number of children in those nodes is always 2. + */ + std::function traverse_recursively = [&](ASTPtr child_ast) -> bool { - /// After normalization for union child node the height of the AST tree is at most 2. - if (const auto * child_union = elem->as()) + if (const auto * select_child = child_ast->as ()) { - for (const auto & child_elem : child_union->list_of_selects->children) - if (hasWithTotalsInAnySubqueryInFromClause(child_elem->as())) - return true; + if (hasWithTotalsInAnySubqueryInFromClause(select_child->as())) + return true; } - /// After normalization in case there are intersect or except nodes, the height of - /// the AST tree can have any depth (each intersect/except adds a level), but the - /// number of children in those nodes is always 2. - else if (elem->as()) + else if (const auto * union_child = child_ast->as()) { - std::function traverse_recursively = [&](ASTPtr child_ast) -> bool - { - if (const auto * child = child_ast->as ()) - return hasWithTotalsInAnySubqueryInFromClause(child->as()); - - if (const auto * child = child_ast->as()) - for (const auto & subchild : child->list_of_selects->children) - if (traverse_recursively(subchild)) - return true; - - if (const auto * child = child_ast->as()) - for (const auto & subchild : child->children) - if (traverse_recursively(subchild)) - return true; - return false; - }; - if (traverse_recursively(elem)) - return true; + for (const auto & subchild : union_child->list_of_selects->children) + if (traverse_recursively(subchild)) + return true; } - else + else if (const auto * intersect_child = child_ast->as()) { - if (hasWithTotalsInAnySubqueryInFromClause(elem->as())) - return true; + auto selects = intersect_child->getListOfSelects(); + for (const auto & subchild : selects) + if (traverse_recursively(subchild)) + return true; } - } + return false; + }; + + for (const auto & elem : ast_union->list_of_selects->children) + if (traverse_recursively(elem)) + return true; } } diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index b7494a6c965c..dffb5f30cff4 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -86,7 +86,9 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( if (num_children == 1 && settings_limit_offset_needed) { const ASTPtr first_select_ast = ast->list_of_selects->children.at(0); - ASTSelectQuery * select_query = first_select_ast->as(); + ASTSelectQuery * select_query = dynamic_cast(first_select_ast.get()); + if (!select_query) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid type in list_of_selects: {}", first_select_ast->getID()); if (!select_query->withFill() && !select_query->limit_with_ties) { diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp index 0f2a11e6ff17..b3425750b56f 100644 --- a/src/Interpreters/PredicateRewriteVisitor.cpp +++ b/src/Interpreters/PredicateRewriteVisitor.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -37,13 +38,44 @@ void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_q for (size_t index = 0; index < internal_select_list.size(); ++index) { if (auto * child_union = internal_select_list[index]->as()) + { visit(*child_union, internal_select_list[index]); - else + } + else if (auto * child_select = internal_select_list[index]->as()) + { + visitInternalSelect(index, *child_select, internal_select_list[index]); + } + else if (auto * child_intersect_except = internal_select_list[index]->as()) + { + visit(*child_intersect_except, internal_select_list[index]); + } + } +} + +void PredicateRewriteVisitorData::visitInternalSelect(size_t index, ASTSelectQuery & select_node, ASTPtr & node) +{ + if (index == 0) + visitFirstInternalSelect(select_node, node); + else + visitOtherInternalSelect(select_node, node); +} + +void PredicateRewriteVisitorData::visit(ASTSelectIntersectExceptQuery & intersect_except_query, ASTPtr &) +{ + auto internal_select_list = intersect_except_query.getListOfSelects(); + for (size_t index = 0; index < internal_select_list.size(); ++index) + { + if (auto * union_node = internal_select_list[index]->as()) + { + visit(*union_node, internal_select_list[index]); + } + else if (auto * select_node = internal_select_list[index]->as()) + { + visitInternalSelect(index, *select_node, internal_select_list[index]); + } + else if (auto * intersect_node = internal_select_list[index]->as()) { - if (index == 0) - visitFirstInternalSelect(*internal_select_list[0]->as(), internal_select_list[0]); - else - visitOtherInternalSelect(*internal_select_list[index]->as(), internal_select_list[index]); + visit(*intersect_node, internal_select_list[index]); } } } diff --git a/src/Interpreters/PredicateRewriteVisitor.h b/src/Interpreters/PredicateRewriteVisitor.h index fc076464925f..e75127115960 100644 --- a/src/Interpreters/PredicateRewriteVisitor.h +++ b/src/Interpreters/PredicateRewriteVisitor.h @@ -10,6 +10,8 @@ namespace DB { +class ASTSelectIntersectExceptQuery; + class PredicateRewriteVisitorData : WithContext { public: @@ -40,7 +42,11 @@ class PredicateRewriteVisitorData : WithContext void visitOtherInternalSelect(ASTSelectQuery & select_query, ASTPtr &); + void visit(ASTSelectIntersectExceptQuery & intersect_except_query, ASTPtr &); + bool rewriteSubquery(ASTSelectQuery & subquery, const Names & inner_columns); + + void visitInternalSelect(size_t index, ASTSelectQuery & select_node, ASTPtr & node); }; using PredicateRewriteMatcher = OneTypeMatcher; diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 06320f00dfa6..9d1c0f6d0479 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Parsers/ASTSelectIntersectExceptQuery.cpp b/src/Parsers/ASTSelectIntersectExceptQuery.cpp index 3b9cb0a2c167..62eeefba3857 100644 --- a/src/Parsers/ASTSelectIntersectExceptQuery.cpp +++ b/src/Parsers/ASTSelectIntersectExceptQuery.cpp @@ -15,12 +15,10 @@ ASTPtr ASTSelectIntersectExceptQuery::clone() const res->children.push_back(child->clone()); res->final_operator = final_operator; - - cloneOutputOptions(*res); return res; } -void ASTSelectIntersectExceptQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +void ASTSelectIntersectExceptQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); @@ -38,4 +36,21 @@ void ASTSelectIntersectExceptQuery::formatQueryImpl(const FormatSettings & setti } } +ASTs ASTSelectIntersectExceptQuery::getListOfSelects() const +{ + /** + * Because of normalization actual number of selects is 2. + * But this is checked in InterpreterSelectIntersectExceptQuery. + */ + ASTs selects; + for (const auto & child : children) + { + if (typeid_cast(child.get()) + || typeid_cast(child.get()) + || typeid_cast(child.get())) + selects.push_back(child); + } + return selects; +} + } diff --git a/src/Parsers/ASTSelectIntersectExceptQuery.h b/src/Parsers/ASTSelectIntersectExceptQuery.h index 97a8296ce2c3..86475fcba5cb 100644 --- a/src/Parsers/ASTSelectIntersectExceptQuery.h +++ b/src/Parsers/ASTSelectIntersectExceptQuery.h @@ -1,22 +1,18 @@ #pragma once -#include +#include namespace DB { -class ASTSelectIntersectExceptQuery : public ASTQueryWithOutput +class ASTSelectIntersectExceptQuery : public ASTSelectQuery { public: String getID(char) const override { return "SelectIntersectExceptQuery"; } ASTPtr clone() const override; - void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - - const char * getQueryKindString() const override { return "SelectIntersectExcept"; } - enum class Operator { UNKNOWN, @@ -24,6 +20,12 @@ class ASTSelectIntersectExceptQuery : public ASTQueryWithOutput EXCEPT }; + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + + const char * getQueryKindString() const override { return "SelectIntersectExcept"; } + + ASTs getListOfSelects() const; + /// Final operator after applying visitor. Operator final_operator = Operator::UNKNOWN; }; diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.reference b/tests/queries/0_stateless/02004_intersect_except_operators.reference index 85559496f2f1..a96a6bc72649 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.reference +++ b/tests/queries/0_stateless/02004_intersect_except_operators.reference @@ -134,3 +134,8 @@ UNION ALL SELECT 1 EXCEPT SELECT 4 +set limit=1; +select 1 intersect select 1; +1 +(((select 1) intersect select 1)); +1 diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.sql b/tests/queries/0_stateless/02004_intersect_except_operators.sql index b95051cba65c..7ed756cc56b6 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.sql +++ b/tests/queries/0_stateless/02004_intersect_except_operators.sql @@ -48,3 +48,7 @@ select 1 intersect select count() from (select 1 except select 2 intersect selec explain syntax select 1 intersect select 1; explain syntax select 1 except select 1; explain syntax select 1 union all select 2 except (select 2 except select 1 union all select 1) except select 4; + +set limit=1; +select 1 intersect select 1; +(((select 1) intersect select 1)); From 386e391cbac924d0241fc25609540c1f39421a1a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 12 Nov 2021 22:00:10 +0000 Subject: [PATCH 166/472] Backport #31249 to 21.9: Fix SHOW GRANTS when partial revokes are used. --- src/Access/AccessRights.cpp | 40 +++++++------------ .../01073_grant_and_revoke.reference | 9 ++++- .../0_stateless/01073_grant_and_revoke.sql | 18 ++++++--- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index d4b2dc8a2522..aaca059767ea 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -23,37 +23,27 @@ namespace friend bool operator<(const ProtoElement & left, const ProtoElement & right) { - static constexpr auto compare_name = [](const boost::container::small_vector & left_name, - const boost::container::small_vector & right_name, - size_t i) + /// Compare components alphabetically. + size_t min_size = std::min(left.full_name.size(), right.full_name.size()); + for (size_t i = 0; i != min_size; ++i) { - if (i < left_name.size()) - { - if (i < right_name.size()) - return left_name[i].compare(right_name[i]); - else - return 1; /// left_name is longer => left_name > right_name - } - else if (i < right_name.size()) - return 1; /// right_name is longer => left < right - else - return 0; /// left_name == right_name - }; - - if (int cmp = compare_name(left.full_name, right.full_name, 0)) - return cmp < 0; + int cmp = left.full_name[i].compare(right.full_name[i]); + if (cmp != 0) + return cmp < 0; + } - if (int cmp = compare_name(left.full_name, right.full_name, 1)) - return cmp < 0; + /// Names with less number of components first. + if (left.full_name.size() != right.full_name.size()) + return left.full_name.size() < right.full_name.size(); + /// Grants before partial revokes. if (left.is_partial_revoke != right.is_partial_revoke) - return right.is_partial_revoke; + return right.is_partial_revoke; /// if left is grant, right is partial revoke, we assume left < right + /// Grants with grant option after other grants. + /// Revoke grant option after normal revokes. if (left.grant_option != right.grant_option) - return right.grant_option; - - if (int cmp = compare_name(left.full_name, right.full_name, 2)) - return cmp < 0; + return right.grant_option; /// if left is without grant option, and right is with grant option, we assume left < right return (left.access_flags < right.access_flags); } diff --git a/tests/queries/0_stateless/01073_grant_and_revoke.reference b/tests/queries/0_stateless/01073_grant_and_revoke.reference index a19caf195330..449f21e896a2 100644 --- a/tests/queries/0_stateless/01073_grant_and_revoke.reference +++ b/tests/queries/0_stateless/01073_grant_and_revoke.reference @@ -1,11 +1,16 @@ -CREATE USER test_user_01073 A +CREATE USER test_user_01073 B +C GRANT INSERT, ALTER DELETE ON *.* TO test_user_01073 GRANT SELECT ON db1.* TO test_user_01073 GRANT SELECT ON db2.table TO test_user_01073 GRANT SELECT(col1) ON db3.table TO test_user_01073 GRANT SELECT(col1, col2) ON db4.table TO test_user_01073 -C +D GRANT ALTER DELETE ON *.* TO test_user_01073 GRANT SELECT(col1) ON db4.table TO test_user_01073 +E +GRANT SELECT ON db1.* TO test_role_01073 +REVOKE SELECT(c1, c2, c3, c4, c5) ON db1.table1 FROM test_role_01073 +REVOKE SELECT(c1) ON db1.table2 FROM test_role_01073 diff --git a/tests/queries/0_stateless/01073_grant_and_revoke.sql b/tests/queries/0_stateless/01073_grant_and_revoke.sql index ef40b26dde55..4cffd916e9f4 100644 --- a/tests/queries/0_stateless/01073_grant_and_revoke.sql +++ b/tests/queries/0_stateless/01073_grant_and_revoke.sql @@ -1,28 +1,36 @@ DROP USER IF EXISTS test_user_01073; +DROP ROLE IF EXISTS test_role_01073; +SELECT 'A'; CREATE USER test_user_01073; SHOW CREATE USER test_user_01073; -SELECT 'A'; +SELECT 'B'; SHOW GRANTS FOR test_user_01073; +SELECT 'C'; GRANT SELECT ON db1.* TO test_user_01073; GRANT SELECT ON db2.table TO test_user_01073; GRANT SELECT(col1) ON db3.table TO test_user_01073; GRANT SELECT(col1, col2) ON db4.table TO test_user_01073; GRANT INSERT ON *.* TO test_user_01073; GRANT DELETE ON *.* TO test_user_01073; - -SELECT 'B'; SHOW GRANTS FOR test_user_01073; +SELECT 'D'; REVOKE SELECT ON db1.* FROM test_user_01073; REVOKE SELECT ON db2.table FROM test_user_01073; REVOKE SELECT ON db3.table FROM test_user_01073; REVOKE SELECT(col2) ON db4.table FROM test_user_01073; REVOKE INSERT ON *.* FROM test_user_01073; - -SELECT 'C'; SHOW GRANTS FOR test_user_01073; +SELECT 'E'; +CREATE ROLE test_role_01073; +GRANT SELECT ON db1.* TO test_role_01073; +REVOKE SELECT(c1, c2, c3, c4, c5) ON db1.table1 FROM test_role_01073; +REVOKE SELECT(c1) ON db1.table2 FROM test_role_01073; +SHOW GRANTS FOR test_role_01073; + DROP USER test_user_01073; +DROP ROLE test_role_01073; From 6525df5d5bc674cbec53f12508cf78f67d9aa0db Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 12 Nov 2021 22:01:35 +0000 Subject: [PATCH 167/472] Backport #31044 to 21.9: Fix StorageMerge with aliases and where --- src/Storages/StorageMerge.cpp | 103 +++++++++++------- src/Storages/StorageMerge.h | 3 + ...storage_merge_aliases_with_where.reference | 53 +++++++++ ..._test_storage_merge_aliases_with_where.sql | 34 ++++++ 4 files changed, 153 insertions(+), 40 deletions(-) create mode 100644 tests/queries/0_stateless/01214_test_storage_merge_aliases_with_where.reference create mode 100644 tests/queries/0_stateless/01214_test_storage_merge_aliases_with_where.sql diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index cbe4a287919f..3c210ed1b47b 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -179,6 +179,28 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage( } +SelectQueryInfo StorageMerge::getModifiedQueryInfo( + const SelectQueryInfo & query_info, ContextPtr modified_context, const StorageID & current_storage_id, bool is_merge_engine) +{ + SelectQueryInfo modified_query_info = query_info; + modified_query_info.query = query_info.query->clone(); + + /// Original query could contain JOIN but we need only the first joined table and its columns. + auto & modified_select = modified_query_info.query->as(); + TreeRewriterResult new_analyzer_res = *modified_query_info.syntax_analyzer_result; + removeJoin(modified_select, new_analyzer_res, modified_context); + modified_query_info.syntax_analyzer_result = std::make_shared(std::move(new_analyzer_res)); + + if (!is_merge_engine) + { + VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", current_storage_id.table_name); + VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_database", current_storage_id.database_name); + } + + return modified_query_info; +} + + Pipe StorageMerge::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -221,10 +243,12 @@ Pipe StorageMerge::read( = getSelectedTables(local_context, query_info.query, has_database_virtual_column, has_table_virtual_column); if (selected_tables.empty()) + { + auto modified_query_info = getModifiedQueryInfo(query_info, modified_context, getStorageID(), false); /// FIXME: do we support sampling in this case? return createSources( {}, - query_info, + modified_query_info, processed_stage, max_block_size, header, @@ -235,6 +259,7 @@ Pipe StorageMerge::read( 0, has_database_virtual_column, has_table_virtual_column); + } size_t tables_count = selected_tables.size(); Float64 num_streams_multiplier @@ -263,7 +288,6 @@ Pipe StorageMerge::read( } auto sample_block = getInMemoryMetadataPtr()->getSampleBlock(); - Names required_columns; for (const auto & table : selected_tables) { @@ -282,12 +306,16 @@ Pipe StorageMerge::read( auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr(); auto storage_columns = storage_metadata_snapshot->getColumns(); - if (processed_stage == QueryProcessingStage::FetchColumns && !storage_columns.getAliases().empty()) + auto modified_query_info = getModifiedQueryInfo(query_info, modified_context, storage->getStorageID(), storage->as()); + auto syntax_result = TreeRewriter(local_context).analyzeSelect(modified_query_info.query, TreeRewriterResult({}, storage, storage_metadata_snapshot)); + + Names column_names_as_aliases; + bool with_aliases = processed_stage == QueryProcessingStage::FetchColumns && !storage_columns.getAliases().empty(); + if (with_aliases) { - auto syntax_result = TreeRewriter(local_context).analyzeSelect(query_info.query, TreeRewriterResult({}, storage, storage_metadata_snapshot)); ASTPtr required_columns_expr_list = std::make_shared(); - ASTPtr column_expr; + for (const auto & column : real_column_names) { const auto column_default = storage_columns.getDefault(column); @@ -313,21 +341,24 @@ Pipe StorageMerge::read( required_columns_expr_list->children.emplace_back(std::move(column_expr)); } - syntax_result = TreeRewriter(local_context).analyze(required_columns_expr_list, storage_columns.getAllPhysical(), - storage, storage_metadata_snapshot); + syntax_result = TreeRewriter(local_context).analyze( + required_columns_expr_list, storage_columns.getAllPhysical(), storage, storage_metadata_snapshot); auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, local_context).getActionsDAG(true); - required_columns = alias_actions->getRequiredColumns().getNames(); + + column_names_as_aliases = alias_actions->getRequiredColumns().getNames(); + if (column_names_as_aliases.empty()) + column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical())); } auto source_pipe = createSources( storage_metadata_snapshot, - query_info, + modified_query_info, processed_stage, max_block_size, header, aliases, table, - required_columns.empty() ? real_column_names : required_columns, + column_names_as_aliases.empty() ? real_column_names : column_names_as_aliases, modified_context, current_streams, has_database_virtual_column, @@ -349,7 +380,7 @@ Pipe StorageMerge::read( Pipe StorageMerge::createSources( const StorageMetadataPtr & metadata_snapshot, - SelectQueryInfo & query_info, + SelectQueryInfo & modified_query_info, const QueryProcessingStage::Enum & processed_stage, const UInt64 max_block_size, const Block & header, @@ -363,19 +394,8 @@ Pipe StorageMerge::createSources( bool concat_streams) { const auto & [database_name, storage, struct_lock, table_name] = storage_with_lock; - SelectQueryInfo modified_query_info = query_info; - modified_query_info.query = query_info.query->clone(); - - /// Original query could contain JOIN but we need only the first joined table and its columns. auto & modified_select = modified_query_info.query->as(); - TreeRewriterResult new_analyzer_res = *query_info.syntax_analyzer_result; - removeJoin(modified_select, new_analyzer_res, modified_context); - modified_query_info.syntax_analyzer_result = std::make_shared(std::move(new_analyzer_res)); - - VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", table_name); - VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_database", database_name); - Pipe pipe; if (!storage) @@ -705,27 +725,30 @@ void StorageMerge::convertingSourceStream( if (!where_expression) return; - for (size_t column_index : collections::range(0, header.columns())) + if (processed_stage > QueryProcessingStage::FetchColumns) { - ColumnWithTypeAndName header_column = header.getByPosition(column_index); - ColumnWithTypeAndName before_column = before_block_header.getByName(header_column.name); - /// If the processed_stage greater than FetchColumns and the block structure between streams is different. - /// the where expression maybe invalid because of convertingBlockInputStream. - /// So we need to throw exception. - if (!header_column.type->equals(*before_column.type.get()) && processed_stage > QueryProcessingStage::FetchColumns) + for (size_t column_index : collections::range(0, header.columns())) { - NamesAndTypesList source_columns = metadata_snapshot->getSampleBlock().getNamesAndTypesList(); - auto virtual_column = *getVirtuals().tryGetByName("_table"); - source_columns.emplace_back(NameAndTypePair{virtual_column.name, virtual_column.type}); - auto syntax_result = TreeRewriter(local_context).analyze(where_expression, source_columns); - ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, local_context}.getActions(false, false); - Names required_columns = actions->getRequiredColumns(); - - for (const auto & required_column : required_columns) + ColumnWithTypeAndName header_column = header.getByPosition(column_index); + ColumnWithTypeAndName before_column = before_block_header.getByName(header_column.name); + /// If the processed_stage greater than FetchColumns and the block structure between streams is different. + /// the where expression maybe invalid because of convertingBlockInputStream. + /// So we need to throw exception. + if (!header_column.type->equals(*before_column.type.get())) { - if (required_column == header_column.name) - throw Exception("Block structure mismatch in Merge Storage: different types:\n" + before_block_header.dumpStructure() - + "\n" + header.dumpStructure(), ErrorCodes::LOGICAL_ERROR); + NamesAndTypesList source_columns = metadata_snapshot->getSampleBlock().getNamesAndTypesList(); + auto virtual_column = *getVirtuals().tryGetByName("_table"); + source_columns.emplace_back(NameAndTypePair{virtual_column.name, virtual_column.type}); + auto syntax_result = TreeRewriter(local_context).analyze(where_expression, source_columns); + ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, local_context}.getActions(false, false); + Names required_columns = actions->getRequiredColumns(); + + for (const auto & required_column : required_columns) + { + if (required_column == header_column.name) + throw Exception("Block structure mismatch in Merge Storage: different types:\n" + before_block_header.dumpStructure() + + "\n" + header.dumpStructure(), ErrorCodes::LOGICAL_ERROR); + } } } } diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 20460e951564..cb4727fb0b69 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -126,6 +126,9 @@ class StorageMerge final : public shared_ptr_helper, public IStora const Block & header, const StorageMetadataPtr & metadata_snapshot, const Aliases & aliases, ContextPtr context, ASTPtr & query, Pipe & pipe, QueryProcessingStage::Enum processed_stage); + + static SelectQueryInfo getModifiedQueryInfo( + const SelectQueryInfo & query_info, ContextPtr modified_context, const StorageID & current_storage_id, bool is_merge_engine); }; } diff --git a/tests/queries/0_stateless/01214_test_storage_merge_aliases_with_where.reference b/tests/queries/0_stateless/01214_test_storage_merge_aliases_with_where.reference new file mode 100644 index 000000000000..569b21af1978 --- /dev/null +++ b/tests/queries/0_stateless/01214_test_storage_merge_aliases_with_where.reference @@ -0,0 +1,53 @@ +-- { echo } +SELECT * FROM tt_m order by a; +1 1 +2 4 +3 4 +5 12 +SELECT * FROM tt_m WHERE b != 0 order by b; +1 1 +2 4 +3 4 +5 12 +SELECT * FROM tt_m WHERE b != 1 order by b; +2 4 +3 4 +5 12 +SELECT * FROM tt_m WHERE b != a * 2 order by b; +1 1 +3 4 +5 12 +SELECT * FROM tt_m WHERE b / 2 != a order by b; +1 1 +3 4 +5 12 +SELECT b FROM tt_m WHERE b >= 0 order by b; +1 +4 +4 +12 +SELECT b FROM tt_m WHERE b == 12; +12 +SELECT b FROM tt_m ORDER BY b; +1 +4 +4 +12 +SELECT b, count() FROM tt_m GROUP BY b order by b; +1 1 +4 2 +12 1 +SELECT b FROM tt_m order by b LIMIT 1 BY b; +1 +4 +12 +SELECT a FROM tt_m WHERE b = 12; +5 +SELECT max(a) FROM tt_m group by b order by b; +1 +3 +5 +SELECT a FROM tt_m order by b LIMIT 1 BY b; +1 +2 +5 diff --git a/tests/queries/0_stateless/01214_test_storage_merge_aliases_with_where.sql b/tests/queries/0_stateless/01214_test_storage_merge_aliases_with_where.sql new file mode 100644 index 000000000000..20a22eb48b1b --- /dev/null +++ b/tests/queries/0_stateless/01214_test_storage_merge_aliases_with_where.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS tt1; +DROP TABLE IF EXISTS tt2; +DROP TABLE IF EXISTS tt3; +DROP TABLE IF EXISTS tt4; +DROP TABLE IF EXISTS tt_m; + +CREATE TABLE tt1 (a UInt32, b UInt32 ALIAS a) ENGINE = Memory; +CREATE TABLE tt2 (a UInt32, b UInt32 ALIAS a * 2) ENGINE = Memory; +CREATE TABLE tt3 (a UInt32, b UInt32 ALIAS c, c UInt32) ENGINE = Memory; +CREATE TABLE tt4 (a UInt32, b UInt32 ALIAS 12) ENGINE = Memory; +CREATE TABLE tt_m (a UInt32, b UInt32) ENGINE = Merge(currentDatabase(), 'tt1|tt2|tt3|tt4'); + +INSERT INTO tt1 VALUES (1); +INSERT INTO tt2 VALUES (2); +INSERT INTO tt3(a, c) VALUES (3, 4); +INSERT INTO tt4 VALUES (5); + +-- { echo } +SELECT * FROM tt_m order by a; +SELECT * FROM tt_m WHERE b != 0 order by b; +SELECT * FROM tt_m WHERE b != 1 order by b; +SELECT * FROM tt_m WHERE b != a * 2 order by b; +SELECT * FROM tt_m WHERE b / 2 != a order by b; + +SELECT b FROM tt_m WHERE b >= 0 order by b; +SELECT b FROM tt_m WHERE b == 12; +SELECT b FROM tt_m ORDER BY b; +SELECT b, count() FROM tt_m GROUP BY b order by b; +SELECT b FROM tt_m order by b LIMIT 1 BY b; + +SELECT a FROM tt_m WHERE b = 12; +SELECT max(a) FROM tt_m group by b order by b; +SELECT a FROM tt_m order by b LIMIT 1 BY b; + From 0598aaec0fd899e7fee1c2902be810d77420ef4a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 13 Nov 2021 03:57:31 +0000 Subject: [PATCH 168/472] Backport #31334 to 21.9: BloomFilter index check fix From 4b849227008cb296c6d95f1a09956055f42ff7d6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 19 Nov 2021 01:06:34 +0000 Subject: [PATCH 169/472] Backport #31169 to 21.9: ISSUES-30801: Remove not like function into RPNElement --- src/Storages/MergeTree/KeyCondition.cpp | 21 ------------------- .../01891_not_like_partition_prune.reference | 6 ++++++ .../01891_not_like_partition_prune.sql | 9 ++++++++ 3 files changed, 15 insertions(+), 21 deletions(-) create mode 100644 tests/queries/0_stateless/01891_not_like_partition_prune.reference create mode 100644 tests/queries/0_stateless/01891_not_like_partition_prune.sql diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 9521feabac5a..2e1e5f3361db 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -276,27 +276,6 @@ const KeyCondition::AtomMap KeyCondition::atom_map return true; } }, - { - "notLike", - [] (RPNElement & out, const Field & value) - { - if (value.getType() != Field::Types::String) - return false; - - String prefix = extractFixedPrefixFromLikePattern(value.get()); - if (prefix.empty()) - return false; - - String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix); - - out.function = RPNElement::FUNCTION_NOT_IN_RANGE; - out.range = !right_bound.empty() - ? Range(prefix, true, right_bound, false) - : Range::createLeftBounded(prefix, true); - - return true; - } - }, { "startsWith", [] (RPNElement & out, const Field & value) diff --git a/tests/queries/0_stateless/01891_not_like_partition_prune.reference b/tests/queries/0_stateless/01891_not_like_partition_prune.reference new file mode 100644 index 000000000000..249697548b7b --- /dev/null +++ b/tests/queries/0_stateless/01891_not_like_partition_prune.reference @@ -0,0 +1,6 @@ +1.1 +1 +1.12 +1.2 +1 +1.1 diff --git a/tests/queries/0_stateless/01891_not_like_partition_prune.sql b/tests/queries/0_stateless/01891_not_like_partition_prune.sql new file mode 100644 index 000000000000..5346a7f08a8a --- /dev/null +++ b/tests/queries/0_stateless/01891_not_like_partition_prune.sql @@ -0,0 +1,9 @@ +drop table if exists test; + +create table test (a String) Engine MergeTree order by a partition by a; +insert into test values('1'), ('1.1'), ('1.2'), ('1.12'); + +select * from test where a like '1%1' order by a; +select * from test where a not like '1%1' order by a; +select * from test where a not like '1%2' order by a; +drop table test; From e55dfa0e6ed942fa40c45671ffd3198456ebdfdd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 20 Nov 2021 01:05:08 +0000 Subject: [PATCH 170/472] Backport #31409 to 21.9: Resolve `nullptr` in STS credentials provider for S3 --- src/IO/S3Common.cpp | 159 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 158 insertions(+), 1 deletion(-) diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 74c328661c48..d6c28b13387e 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -13,12 +13,14 @@ # include # include # include +# include # include # include # include # include # include # include +# include # include # include @@ -29,6 +31,8 @@ # include # include +# include + namespace { @@ -360,6 +364,155 @@ class AWSInstanceProfileCredentialsProvider : public Aws::Auth::AWSCredentialsPr Poco::Logger * logger; }; +class AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider : public Aws::Auth::AWSCredentialsProvider +{ + /// See STSAssumeRoleWebIdentityCredentialsProvider. + +public: + explicit AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider(DB::S3::PocoHTTPClientConfiguration & aws_client_configuration) + : logger(&Poco::Logger::get("AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider")) + { + // check environment variables + String tmp_region = Aws::Environment::GetEnv("AWS_DEFAULT_REGION"); + role_arn = Aws::Environment::GetEnv("AWS_ROLE_ARN"); + token_file = Aws::Environment::GetEnv("AWS_WEB_IDENTITY_TOKEN_FILE"); + session_name = Aws::Environment::GetEnv("AWS_ROLE_SESSION_NAME"); + + // check profile_config if either m_roleArn or m_tokenFile is not loaded from environment variable + // region source is not enforced, but we need it to construct sts endpoint, if we can't find from environment, we should check if it's set in config file. + if (role_arn.empty() || token_file.empty() || tmp_region.empty()) + { + auto profile = Aws::Config::GetCachedConfigProfile(Aws::Auth::GetConfigProfileName()); + if (tmp_region.empty()) + { + tmp_region = profile.GetRegion(); + } + // If either of these two were not found from environment, use whatever found for all three in config file + if (role_arn.empty() || token_file.empty()) + { + role_arn = profile.GetRoleArn(); + token_file = profile.GetValue("web_identity_token_file"); + session_name = profile.GetValue("role_session_name"); + } + } + + if (token_file.empty()) + { + LOG_WARNING(logger, "Token file must be specified to use STS AssumeRole web identity creds provider."); + return; // No need to do further constructing + } + else + { + LOG_DEBUG(logger, "Resolved token_file from profile_config or environment variable to be {}", token_file); + } + + if (role_arn.empty()) + { + LOG_WARNING(logger, "RoleArn must be specified to use STS AssumeRole web identity creds provider."); + return; // No need to do further constructing + } + else + { + LOG_DEBUG(logger, "Resolved role_arn from profile_config or environment variable to be {}", role_arn); + } + + if (tmp_region.empty()) + { + tmp_region = Aws::Region::US_EAST_1; + } + else + { + LOG_DEBUG(logger, "Resolved region from profile_config or environment variable to be {}", tmp_region); + } + + if (session_name.empty()) + { + session_name = Aws::Utils::UUID::RandomUUID(); + } + else + { + LOG_DEBUG(logger, "Resolved session_name from profile_config or environment variable to be {}", session_name); + } + + aws_client_configuration.scheme = Aws::Http::Scheme::HTTPS; + aws_client_configuration.region = tmp_region; + + std::vector retryable_errors; + retryable_errors.push_back("IDPCommunicationError"); + retryable_errors.push_back("InvalidIdentityToken"); + + aws_client_configuration.retryStrategy = std::make_shared( + retryable_errors, /* maxRetries = */3); + + client = std::make_unique(aws_client_configuration); + initialized = true; + LOG_INFO(logger, "Creating STS AssumeRole with web identity creds provider."); + } + + Aws::Auth::AWSCredentials GetAWSCredentials() override + { + // A valid client means required information like role arn and token file were constructed correctly. + // We can use this provider to load creds, otherwise, we can just return empty creds. + if (!initialized) + { + return Aws::Auth::AWSCredentials(); + } + refreshIfExpired(); + Aws::Utils::Threading::ReaderLockGuard guard(m_reloadLock); + return credentials; + } + +protected: + void Reload() override + { + LOG_INFO(logger, "Credentials have expired, attempting to renew from STS."); + + std::ifstream token_stream(token_file.data()); + if (token_stream) + { + String token_string((std::istreambuf_iterator(token_stream)), std::istreambuf_iterator()); + token = token_string; + } + else + { + LOG_INFO(logger, "Can't open token file: {}", token_file); + return; + } + Aws::Internal::STSCredentialsClient::STSAssumeRoleWithWebIdentityRequest request{session_name, role_arn, token}; + + auto result = client->GetAssumeRoleWithWebIdentityCredentials(request); + LOG_TRACE(logger, "Successfully retrieved credentials with AWS_ACCESS_KEY: {}", result.creds.GetAWSAccessKeyId()); + credentials = result.creds; + } + +private: + void refreshIfExpired() + { + Aws::Utils::Threading::ReaderLockGuard guard(m_reloadLock); + if (!credentials.IsExpiredOrEmpty()) + { + return; + } + + guard.UpgradeToWriterLock(); + if (!credentials.IsExpiredOrEmpty()) // double-checked lock to avoid refreshing twice + { + return; + } + + Reload(); + } + + std::unique_ptr client; + Aws::Auth::AWSCredentials credentials; + Aws::String role_arn; + Aws::String token_file; + Aws::String session_name; + Aws::String token; + bool initialized = false; + Poco::Logger * logger; +}; + class S3CredentialsProviderChain : public Aws::Auth::AWSCredentialsProviderChain { public: @@ -380,7 +533,11 @@ class S3CredentialsProviderChain : public Aws::Auth::AWSCredentialsProviderChain AddProvider(std::make_shared()); AddProvider(std::make_shared()); AddProvider(std::make_shared()); - AddProvider(std::make_shared()); + + { + DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects); + AddProvider(std::make_shared(aws_client_configuration)); + } /// ECS TaskRole Credentials only available when ENVIRONMENT VARIABLE is set. const auto relative_uri = Aws::Environment::GetEnv(AWS_ECS_CONTAINER_CREDENTIALS_RELATIVE_URI); From eae65282bc4400aa3202df191b26899c3c8b5ef9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 20 Nov 2021 12:59:58 +0000 Subject: [PATCH 171/472] Backport #31337 to 21.9: fix: quota limit was not reached, but the limit was exceeded --- src/Access/EnabledQuota.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index 4affa4f3ec16..2945a205c182 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -52,9 +52,7 @@ struct EnabledQuota::Impl return end; } - /// We reset counters only if the interval's end has been calculated before. - /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. - bool need_reset_counters = (end_loaded.count() != 0); + bool need_reset_counters = false; do { @@ -66,7 +64,12 @@ struct EnabledQuota::Impl UInt64 n = static_cast((current_time - end + duration) / duration); end = end + duration * n; if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) + { + /// We reset counters only if the interval's end has been calculated before. + /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. + need_reset_counters = (end_loaded.count() != 0); break; + } end = std::chrono::system_clock::time_point{end_loaded}; } while (current_time >= end); From 6574e388a58e841d479c93de96ec6de7d64f2ad1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 20 Nov 2021 15:58:29 +0000 Subject: [PATCH 172/472] Backport #31528 to 21.9: Disable partial merge join left table buffer bytes --- src/Core/Settings.h | 4 ++-- src/Interpreters/MergeJoin.cpp | 11 +++++++---- src/Interpreters/TableJoin.cpp | 1 - src/Interpreters/TableJoin.h | 2 -- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index cf06e62e5951..fe3822182c12 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -326,9 +326,9 @@ class IColumn; M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \ M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \ M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \ - M(Bool, partial_merge_join_optimizations, true, "Enable optimizations in partial merge join", 0) \ + M(Bool, partial_merge_join_optimizations, false, "Enable optimizations in partial merge join, obsolete", 0) \ M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \ - M(UInt64, partial_merge_join_left_table_buffer_bytes, 32000000, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread. In current version work only with 'partial_merge_join_optimizations = 1'.", 0) \ + M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \ M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \ M(UInt64, join_on_disk_max_files_to_merge, 64, "For MergeJoin on disk set how much files it's allowed to sort simultaneously. Then this value bigger then more memory used and then less disk I/O needed. Minimum is 2.", 0) \ M(String, temporary_files_codec, "LZ4", "Set compression codec for temporary files (sort and join on disk). I.e. LZ4, NONE.", 0) \ diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index a2c63a4693b1..38ba43df260d 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -545,10 +545,13 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right makeSortAndMerge(key_names_left, left_sort_description, left_merge_description); makeSortAndMerge(key_names_right, right_sort_description, right_merge_description); - /// Temporary disable 'partial_merge_join_left_table_buffer_bytes' without 'partial_merge_join_optimizations' - if (table_join->enablePartialMergeJoinOptimizations()) - if (size_t max_bytes = table_join->maxBytesInLeftBuffer()) - left_blocks_buffer = std::make_shared(left_sort_description, max_bytes); + if (size_t max_bytes = table_join->maxBytesInLeftBuffer(); max_bytes > 0) + { + /// Disabled due to https://github.com/ClickHouse/ClickHouse/issues/31009 + // left_blocks_buffer = std::make_shared(left_sort_description, max_bytes); + LOG_WARNING(log, "`partial_merge_join_left_table_buffer_bytes` is disabled in current version of ClickHouse"); + UNUSED(left_blocks_buffer); + } } /// Has to be called even if totals are empty diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 86c84d9c8c9c..1938f4b4f8bf 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -28,7 +28,6 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_) , join_use_nulls(settings.join_use_nulls) , max_joined_block_rows(settings.max_joined_block_size_rows) , join_algorithm(settings.join_algorithm) - , partial_merge_join_optimizations(settings.partial_merge_join_optimizations) , partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks) , partial_merge_join_left_table_buffer_bytes(settings.partial_merge_join_left_table_buffer_bytes) , max_files_to_merge(settings.join_on_disk_max_files_to_merge) diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 4fe9565666fd..88f6b5ef7d7a 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -65,7 +65,6 @@ class TableJoin const bool join_use_nulls = false; const size_t max_joined_block_rows = 0; JoinAlgorithm join_algorithm = JoinAlgorithm::AUTO; - const bool partial_merge_join_optimizations = false; const size_t partial_merge_join_rows_in_right_blocks = 0; const size_t partial_merge_join_left_table_buffer_bytes = 0; const size_t max_files_to_merge = 0; @@ -152,7 +151,6 @@ class TableJoin size_t maxBytesInLeftBuffer() const { return partial_merge_join_left_table_buffer_bytes; } size_t maxFilesToMerge() const { return max_files_to_merge; } const String & temporaryFilesCodec() const { return temporary_files_codec; } - bool enablePartialMergeJoinOptimizations() const { return partial_merge_join_optimizations; } bool needStreamWithNonJoinedRows() const; void resetCollected(); From d007c3900d0cc472036c8369bab907761e2d30d4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 21 Nov 2021 12:59:12 +0000 Subject: [PATCH 173/472] Backport #31534 to 21.9: Fix invalid JSON in column names --- .../Formats/Impl/JSONRowOutputFormat.cpp | 6 ++++-- .../02111_json_column_name_encoding.reference | 16 ++++++++++++++++ .../02111_json_column_name_encoding.sql | 7 +++++++ 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02111_json_column_name_encoding.reference create mode 100644 tests/queries/0_stateless/02111_json_column_name_encoding.sql diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 38c6eefac1c7..877edc671161 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -26,8 +26,10 @@ JSONRowOutputFormat::JSONRowOutputFormat( need_validate_utf8 = true; WriteBufferFromOwnString buf; - writeJSONString(fields[i].name, buf, settings); - + { + WriteBufferValidUTF8 validating_buf(buf); + writeJSONString(fields[i].name, validating_buf, settings); + } fields[i].name = buf.str(); } diff --git a/tests/queries/0_stateless/02111_json_column_name_encoding.reference b/tests/queries/0_stateless/02111_json_column_name_encoding.reference new file mode 100644 index 000000000000..dd1bf2f5982d --- /dev/null +++ b/tests/queries/0_stateless/02111_json_column_name_encoding.reference @@ -0,0 +1,16 @@ +{ + "meta": + [ + { + "name": "length('�')", + "type": "UInt64" + } + ], + + "data": + [ + ["1"] + ], + + "rows": 1 +} diff --git a/tests/queries/0_stateless/02111_json_column_name_encoding.sql b/tests/queries/0_stateless/02111_json_column_name_encoding.sql new file mode 100644 index 000000000000..69af75072959 --- /dev/null +++ b/tests/queries/0_stateless/02111_json_column_name_encoding.sql @@ -0,0 +1,7 @@ +-- Tags: no-fasttest + +SET output_format_write_statistics = 0; + +SELECT + length('\x80') + FORMAT JSONCompact; From 7b0a778c382240ab5dfb101729d4992ae6e406b6 Mon Sep 17 00:00:00 2001 From: Vladimir C Date: Mon, 22 Nov 2021 11:33:24 +0300 Subject: [PATCH 174/472] Update src/Interpreters/MergeJoin.cpp --- src/Interpreters/MergeJoin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 38ba43df260d..16acae59888d 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -549,7 +549,7 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right { /// Disabled due to https://github.com/ClickHouse/ClickHouse/issues/31009 // left_blocks_buffer = std::make_shared(left_sort_description, max_bytes); - LOG_WARNING(log, "`partial_merge_join_left_table_buffer_bytes` is disabled in current version of ClickHouse"); + LOG_WARNING(&Poco::Logger::get("MergeJoin"), "`partial_merge_join_left_table_buffer_bytes` is disabled in current version of ClickHouse"); UNUSED(left_blocks_buffer); } } From c5a0e6933e83913a0e053eacdf1ce68e03636e1c Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 22 Nov 2021 11:44:04 +0300 Subject: [PATCH 175/472] Add CI scripts to release branch --- tests/ci/approve_lambda/Dockerfile | 13 + tests/ci/approve_lambda/app.py | 300 ++++++++ tests/ci/approve_lambda/requirements.txt | 3 + tests/ci/ast_fuzzer_check.py | 144 ++++ tests/ci/build_check.py | 184 +++++ tests/ci/build_download_helper.py | 97 +++ tests/ci/build_report_check.py | 159 ++++ tests/ci/ccache_utils.py | 105 +++ tests/ci/cherry_pick.py | 39 + tests/ci/cherry_pick_utils/__init__.py | 1 + tests/ci/cherry_pick_utils/backport.py | 128 ++++ tests/ci/cherry_pick_utils/cherrypick.py | 197 +++++ tests/ci/cherry_pick_utils/local.py | 91 +++ tests/ci/cherry_pick_utils/parser.py | 60 ++ tests/ci/cherry_pick_utils/query.py | 427 +++++++++++ tests/ci/cherry_pick_utils/readme.md | 3 + tests/ci/ci_config.json | 144 ++-- tests/ci/ci_config.py | 736 +++++++++++++++++++ tests/ci/clickhouse_helper.py | 163 ++++ tests/ci/commit_status_helper.py | 12 + tests/ci/compatibility_check.py | 164 +++++ tests/ci/compress_files.py | 51 ++ tests/ci/docker_images_check.py | 216 ++++++ tests/ci/docker_pull_helper.py | 59 ++ tests/ci/docs_check.py | 99 +++ tests/ci/docs_release.py | 96 +++ tests/ci/fast_test_check.py | 144 ++++ tests/ci/finish_check.py | 39 + tests/ci/functional_test_check.py | 173 +++++ tests/ci/get_robot_token.py | 20 + tests/ci/integration_test_check.py | 166 +++++ tests/ci/metrics_lambda/Dockerfile | 13 + tests/ci/metrics_lambda/app.py | 194 +++++ tests/ci/metrics_lambda/requirements.txt | 3 + tests/ci/pr_info.py | 103 +++ tests/ci/pvs_check.py | 113 +++ tests/ci/report.py | 324 ++++++++ tests/ci/run_check.py | 126 ++++ tests/ci/s3_helper.py | 114 +++ tests/ci/split_build_smoke_check.py | 118 +++ tests/ci/ssh.py | 116 +++ tests/ci/stopwatch.py | 16 + tests/ci/stress_check.py | 130 ++++ tests/ci/style_check.py | 79 ++ tests/ci/termination_lambda/Dockerfile | 13 + tests/ci/termination_lambda/app.py | 283 +++++++ tests/ci/termination_lambda/requirements.txt | 3 + tests/ci/token_lambda/Dockerfile | 13 + tests/ci/token_lambda/app.py | 106 +++ tests/ci/token_lambda/requirements.txt | 3 + tests/ci/unit_tests_check.py | 150 ++++ tests/ci/upload_result_helper.py | 64 ++ tests/ci/version_helper.py | 139 ++++ tests/ci/worker/init_builder.sh | 20 + tests/ci/worker/init_func_tester.sh | 20 + tests/ci/worker/init_stress_tester.sh | 20 + tests/ci/worker/init_style_checker.sh | 20 + tests/ci/worker/ubuntu_style_check.sh | 57 ++ 58 files changed, 6532 insertions(+), 61 deletions(-) create mode 100644 tests/ci/approve_lambda/Dockerfile create mode 100644 tests/ci/approve_lambda/app.py create mode 100644 tests/ci/approve_lambda/requirements.txt create mode 100644 tests/ci/ast_fuzzer_check.py create mode 100644 tests/ci/build_check.py create mode 100644 tests/ci/build_download_helper.py create mode 100644 tests/ci/build_report_check.py create mode 100644 tests/ci/ccache_utils.py create mode 100644 tests/ci/cherry_pick.py create mode 100644 tests/ci/cherry_pick_utils/__init__.py create mode 100644 tests/ci/cherry_pick_utils/backport.py create mode 100644 tests/ci/cherry_pick_utils/cherrypick.py create mode 100644 tests/ci/cherry_pick_utils/local.py create mode 100644 tests/ci/cherry_pick_utils/parser.py create mode 100644 tests/ci/cherry_pick_utils/query.py create mode 100644 tests/ci/cherry_pick_utils/readme.md create mode 100644 tests/ci/ci_config.py create mode 100644 tests/ci/clickhouse_helper.py create mode 100644 tests/ci/commit_status_helper.py create mode 100644 tests/ci/compatibility_check.py create mode 100644 tests/ci/compress_files.py create mode 100644 tests/ci/docker_images_check.py create mode 100644 tests/ci/docker_pull_helper.py create mode 100644 tests/ci/docs_check.py create mode 100644 tests/ci/docs_release.py create mode 100644 tests/ci/fast_test_check.py create mode 100644 tests/ci/finish_check.py create mode 100644 tests/ci/functional_test_check.py create mode 100644 tests/ci/get_robot_token.py create mode 100644 tests/ci/integration_test_check.py create mode 100644 tests/ci/metrics_lambda/Dockerfile create mode 100644 tests/ci/metrics_lambda/app.py create mode 100644 tests/ci/metrics_lambda/requirements.txt create mode 100644 tests/ci/pr_info.py create mode 100644 tests/ci/pvs_check.py create mode 100644 tests/ci/report.py create mode 100644 tests/ci/run_check.py create mode 100644 tests/ci/s3_helper.py create mode 100644 tests/ci/split_build_smoke_check.py create mode 100644 tests/ci/ssh.py create mode 100644 tests/ci/stopwatch.py create mode 100644 tests/ci/stress_check.py create mode 100644 tests/ci/style_check.py create mode 100644 tests/ci/termination_lambda/Dockerfile create mode 100644 tests/ci/termination_lambda/app.py create mode 100644 tests/ci/termination_lambda/requirements.txt create mode 100644 tests/ci/token_lambda/Dockerfile create mode 100644 tests/ci/token_lambda/app.py create mode 100644 tests/ci/token_lambda/requirements.txt create mode 100644 tests/ci/unit_tests_check.py create mode 100644 tests/ci/upload_result_helper.py create mode 100644 tests/ci/version_helper.py create mode 100644 tests/ci/worker/init_builder.sh create mode 100644 tests/ci/worker/init_func_tester.sh create mode 100644 tests/ci/worker/init_stress_tester.sh create mode 100644 tests/ci/worker/init_style_checker.sh create mode 100644 tests/ci/worker/ubuntu_style_check.sh diff --git a/tests/ci/approve_lambda/Dockerfile b/tests/ci/approve_lambda/Dockerfile new file mode 100644 index 000000000000..f53be71a8931 --- /dev/null +++ b/tests/ci/approve_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/approve_lambda/app.py b/tests/ci/approve_lambda/app.py new file mode 100644 index 000000000000..ffc5afa2f86c --- /dev/null +++ b/tests/ci/approve_lambda/app.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 + +import json +import time +import fnmatch +from collections import namedtuple +import jwt + +import requests +import boto3 + +API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse' + +SUSPICIOUS_CHANGED_FILES_NUMBER = 200 + +SUSPICIOUS_PATTERNS = [ + "tests/ci/*", + "docs/tools/*", + ".github/*", + "utils/release/*", + "docker/*", + "release", +] + +MAX_RETRY = 5 + +WorkflowDescription = namedtuple('WorkflowDescription', + ['name', 'action', 'run_id', 'event', 'sender_login', + 'workflow_id', 'fork_owner_login', 'fork_branch', 'sender_orgs']) + +TRUSTED_WORKFLOW_IDS = { + 14586616, # Cancel workflows, always trusted +} + +TRUSTED_ORG_IDS = { + 7409213, # yandex + 28471076, # altinity + 54801242, # clickhouse +} + +# Individual trusted contirbutors who are not in any trusted organization. +# Can be changed in runtime: we will append users that we learned to be in +# a trusted org, to save GitHub API calls. +TRUSTED_CONTRIBUTORS = { + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloundFlare + "BohuTANG", + "damozhaeva", # DOCSUP + "den-crane", + "gyuton", # DOCSUP + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasfl", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober" # Developer of YT +} + + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + + +def is_trusted_sender(pr_user_login, pr_user_orgs): + if pr_user_login in TRUSTED_CONTRIBUTORS: + print(f"User '{pr_user_login}' is trusted") + return True + + print(f"User '{pr_user_login}' is not trusted") + + for org_id in pr_user_orgs: + if org_id in TRUSTED_ORG_IDS: + print(f"Org '{org_id}' is trusted; will mark user {pr_user_login} as trusted") + return True + print(f"Org '{org_id}' is not trusted") + + return False + +def _exec_get_with_retry(url): + for i in range(MAX_RETRY): + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute GET request with retries") + +def _exec_post_with_retry(url, token, data=None): + headers = { + "Authorization": f"token {token}" + } + for i in range(MAX_RETRY): + try: + if data: + response = requests.post(url, headers=headers, json=data) + else: + response = requests.post(url, headers=headers) + if response.status_code == 403: + data = response.json() + if 'message' in data and data['message'] == 'This workflow run is not waiting for approval': + print("Workflow doesn't need approval") + return data + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute POST request with retry") + +def _get_pull_requests_from(owner, branch): + url = f"{API_URL}/pulls?head={owner}:{branch}" + return _exec_get_with_retry(url) + +def get_workflow_description_from_event(event): + action = event['action'] + sender_login = event['sender']['login'] + run_id = event['workflow_run']['id'] + event_type = event['workflow_run']['event'] + fork_owner = event['workflow_run']['head_repository']['owner']['login'] + fork_branch = event['workflow_run']['head_branch'] + orgs_data = _exec_get_with_retry(event['sender']['organizations_url']) + sender_orgs = [org['id'] for org in orgs_data] + name = event['workflow_run']['name'] + workflow_id = event['workflow_run']['workflow_id'] + return WorkflowDescription( + name=name, + action=action, + sender_login=sender_login, + run_id=run_id, + event=event_type, + fork_owner_login=fork_owner, + fork_branch=fork_branch, + sender_orgs=sender_orgs, + workflow_id=workflow_id, + ) + + +def get_changed_files_for_pull_request(pull_request): + number = pull_request['number'] + + changed_files = set([]) + for i in range(1, 31): + print("Requesting changed files page", i) + url = f"{API_URL}/pulls/{number}/files?page={i}&per_page=100" + data = _exec_get_with_retry(url) + print(f"Got {len(data)} changed files") + if len(data) == 0: + print("No more changed files") + break + + for change in data: + #print("Adding changed file", change['filename']) + changed_files.add(change['filename']) + + if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: + print(f"More than {len(changed_files)} changed files. Will stop fetching new files.") + break + + return changed_files + +def check_suspicious_changed_files(changed_files): + if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: + print(f"Too many files changed {len(changed_files)}, need manual approve") + return True + + for path in changed_files: + for pattern in SUSPICIOUS_PATTERNS: + if fnmatch.fnmatch(path, pattern): + print(f"File {path} match suspicious pattern {pattern}, will not approve automatically") + return True + + print("No changed files match suspicious patterns, run will be approved") + return False + +def approve_run(run_id, token): + url = f"{API_URL}/actions/runs/{run_id}/approve" + _exec_post_with_retry(url, token) + +def label_manual_approve(pull_request, token): + number = pull_request['number'] + url = f"{API_URL}/issues/{number}/labels" + data = {"labels" : "manual approve"} + + _exec_post_with_retry(url, token, data) + +def get_token_from_aws(): + private_key, app_id = get_key_and_app_from_aws() + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": app_id, + } + + encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + return get_access_token(encoded_jwt, installation_id) + +def main(event): + token = get_token_from_aws() + event_data = json.loads(event['body']) + workflow_description = get_workflow_description_from_event(event_data) + + print("Got workflow description", workflow_description) + if workflow_description.action != "requested": + print("Exiting, event action is", workflow_description.action) + return + + if workflow_description.workflow_id in TRUSTED_WORKFLOW_IDS: + print("Workflow in trusted list, approving run") + approve_run(workflow_description.run_id, token) + return + + if is_trusted_sender(workflow_description.sender_login, workflow_description.sender_orgs): + print("Sender is trusted, approving run") + approve_run(workflow_description.run_id, token) + return + + pull_requests = _get_pull_requests_from(workflow_description.fork_owner_login, workflow_description.fork_branch) + print("Got pull requests for workflow", len(pull_requests)) + if len(pull_requests) > 1: + raise Exception("Received more than one PR for workflow run") + + if len(pull_requests) < 1: + raise Exception("Cannot find any pull requests for workflow run") + + pull_request = pull_requests[0] + print("Pull request for workflow number", pull_request['number']) + + changed_files = get_changed_files_for_pull_request(pull_request) + print(f"Totally have {len(changed_files)} changed files in PR:", changed_files) + if check_suspicious_changed_files(changed_files): + print(f"Pull Request {pull_request['number']} has suspicious changes, label it for manuall approve") + label_manual_approve(pull_request, token) + else: + print(f"Pull Request {pull_request['number']} has no suspicious changes") + approve_run(workflow_description.run_id, token) + +def handler(event, _): + main(event) diff --git a/tests/ci/approve_lambda/requirements.txt b/tests/ci/approve_lambda/requirements.txt new file mode 100644 index 000000000000..c0dcf4a4dde7 --- /dev/null +++ b/tests/ci/approve_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py new file mode 100644 index 000000000000..d842d4848413 --- /dev/null +++ b/tests/ci/ast_fuzzer_check.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 + +import logging +import subprocess +import os +import json +import sys + +from github import Github + +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from ci_config import build_config_to_string +from build_download_helper import get_build_config_for_check, get_build_urls +from docker_pull_helper import get_image_with_version +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + +IMAGE_NAME = 'clickhouse/fuzzer' + +def get_run_command(pr_number, sha, download_url, workspace_path, image): + return f'docker run --network=host --volume={workspace_path}:/workspace ' \ + '--cap-add syslog --cap-add sys_admin ' \ + f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '\ + f'{image}' + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) + reports_path = os.getenv("REPORTS_PATH", "./reports") + + check_name = sys.argv[1] + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + + gh = Github(get_best_robot_token()) + + docker_image = get_image_with_version(temp_path, IMAGE_NAME) + + build_config = get_build_config_for_check(check_name) + print(build_config) + build_config_str = build_config_to_string(build_config) + print(build_config_str) + urls = get_build_urls(build_config_str, reports_path) + if not urls: + raise Exception("No build URLs found") + + for url in urls: + if url.endswith('/clickhouse'): + build_url = url + break + else: + raise Exception("Cannot binary clickhouse among build results") + + logging.info("Got build url %s", build_url) + + workspace_path = os.path.join(temp_path, 'workspace') + if not os.path.exists(workspace_path): + os.makedirs(workspace_path) + + run_command = get_run_command(pr_info.number, pr_info.sha, build_url, workspace_path, docker_image) + logging.info("Going to run %s", run_command) + + run_log_path = os.path.join(temp_path, "runlog.log") + with open(run_log_path, 'w', encoding='utf-8') as log: + with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + check_name_lower = check_name.lower().replace('(', '').replace(')', '').replace(' ', '') + s3_prefix = f'{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/' + paths = { + 'runlog.log': run_log_path, + 'main.log': os.path.join(workspace_path, 'main.log'), + 'server.log': os.path.join(workspace_path, 'server.log'), + 'fuzzer.log': os.path.join(workspace_path, 'fuzzer.log'), + 'report.html': os.path.join(workspace_path, 'report.html'), + } + + s3_helper = S3Helper('https://s3.amazonaws.com') + for f in paths: + try: + paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + '/' + f) + except Exception as ex: + logging.info("Exception uploading file %s text %s", f, ex) + paths[f] = '' + + report_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + if paths['runlog.log']: + report_url = paths['runlog.log'] + if paths['main.log']: + report_url = paths['main.log'] + if paths['server.log']: + report_url = paths['server.log'] + if paths['fuzzer.log']: + report_url = paths['fuzzer.log'] + if paths['report.html']: + report_url = paths['report.html'] + + # Try to get status message saved by the fuzzer + try: + with open(os.path.join(workspace_path, 'status.txt'), 'r', encoding='utf-8') as status_f: + status = status_f.readline().rstrip('\n') + + with open(os.path.join(workspace_path, 'description.txt'), 'r', encoding='utf-8') as desc_f: + description = desc_f.readline().rstrip('\n')[:140] + except: + status = 'failure' + description = 'Task failed: $?=' + str(retcode) + + if 'fail' in status: + test_result = [(description, 'FAIL')] + else: + test_result = [(description, 'OK')] + + ch_helper = ClickHouseHelper() + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + + logging.info("Result: '%s', '%s', '%s'", status, description, report_url) + print(f"::notice ::Report url: {report_url}") + post_commit_status(gh, pr_info.sha, check_name, description, status, report_url) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py new file mode 100644 index 000000000000..1ba5589965c9 --- /dev/null +++ b/tests/ci/build_check.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# +import subprocess +import logging +import json +import os +import sys +import time +from github import Github +from s3_helper import S3Helper +from pr_info import PRInfo +from get_robot_token import get_best_robot_token +from version_helper import get_version_from_repo, update_version_local +from ccache_utils import get_ccache_if_not_exists, upload_ccache +from ci_config import build_config_to_string, CI_CONFIG +from docker_pull_helper import get_image_with_version + + +def get_build_config(build_check_name, build_number): + if build_check_name == 'ClickHouse build check (actions)': + build_config_name = 'build_config' + elif build_check_name == 'ClickHouse special build check (actions)': + build_config_name = 'special_build_config' + else: + raise Exception(f"Unknown build check name {build_check_name}") + + return CI_CONFIG[build_config_name][build_number] + + +def _can_export_binaries(build_config): + if build_config['package_type'] != 'deb': + return False + if build_config['bundled'] != "bundled": + return False + if build_config['splitted'] == 'splitted': + return False + if build_config['sanitizer'] != '': + return True + if build_config['build_type'] != '': + return True + return False + + +def get_packager_cmd(build_config, packager_path, output_path, build_version, image_version, ccache_path, pr_info): + package_type = build_config['package_type'] + comp = build_config['compiler'] + cmd = f"cd {packager_path} && ./packager --output-dir={output_path} --package-type={package_type} --compiler={comp}" + + if build_config['build_type']: + cmd += ' --build-type={}'.format(build_config['build_type']) + if build_config['sanitizer']: + cmd += ' --sanitizer={}'.format(build_config['sanitizer']) + if build_config['bundled'] == 'unbundled': + cmd += ' --unbundled' + if build_config['splitted'] == 'splitted': + cmd += ' --split-binary' + if build_config['tidy'] == 'enable': + cmd += ' --clang-tidy' + + cmd += ' --cache=ccache' + cmd += ' --ccache_dir={}'.format(ccache_path) + + if 'alien_pkgs' in build_config and build_config['alien_pkgs']: + if pr_info == 0 or 'release' in pr_info.labels: + cmd += ' --alien-pkgs rpm tgz' + + cmd += ' --docker-image-version={}'.format(image_version) + cmd += ' --version={}'.format(build_version) + + if _can_export_binaries(build_config): + cmd += ' --with-binaries=tests' + + return cmd + +def get_image_name(build_config): + if build_config['bundled'] != 'bundled': + return 'clickhouse/unbundled-builder' + elif build_config['package_type'] != 'deb': + return 'clickhouse/binary-builder' + else: + return 'clickhouse/deb-builder' + + +def build_clickhouse(packager_cmd, logs_path): + build_log_path = os.path.join(logs_path, 'build_log.log') + with open(build_log_path, 'w') as log_file: + retcode = subprocess.Popen(packager_cmd, shell=True, stderr=log_file, stdout=log_file).wait() + if retcode == 0: + logging.info("Built successfully") + else: + logging.info("Build failed") + return build_log_path, retcode == 0 + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + caches_path = os.getenv("CACHES_PATH", temp_path) + + build_check_name = sys.argv[1] + build_number = int(sys.argv[2]) + + build_config = get_build_config(build_check_name, build_number) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + + logging.info("Repo copy path %s", repo_path) + + gh = Github(get_best_robot_token()) + + image_name = get_image_name(build_config) + docker_image = get_image_with_version(os.getenv("IMAGES_PATH"), image_name) + image_version = docker_image.version + + version = get_version_from_repo(repo_path) + version.tweak_update() + update_version_local(repo_path, pr_info.sha, version) + + build_name = build_config_to_string(build_config) + logging.info("Build short name %s", build_name) + subprocess.check_call(f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True) + + build_output_path = os.path.join(temp_path, build_name) + if not os.path.exists(build_output_path): + os.makedirs(build_output_path) + + ccache_path = os.path.join(caches_path, build_name + '_ccache') + s3_helper = S3Helper('https://s3.amazonaws.com') + + logging.info("Will try to fetch cache for our build") + get_ccache_if_not_exists(ccache_path, s3_helper, pr_info.number, temp_path) + + if not os.path.exists(ccache_path): + logging.info("cache was not fetched, will create empty dir") + os.makedirs(ccache_path) + + packager_cmd = get_packager_cmd(build_config, os.path.join(repo_path, "docker/packager"), build_output_path, version.get_version_string(), image_version, ccache_path, pr_info) + logging.info("Going to run packager with %s", packager_cmd) + + build_clickhouse_log = os.path.join(temp_path, "build_log") + if not os.path.exists(build_clickhouse_log): + os.makedirs(build_clickhouse_log) + + start = time.time() + log_path, success = build_clickhouse(packager_cmd, build_clickhouse_log) + elapsed = int(time.time() - start) + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True) + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {ccache_path}", shell=True) + logging.info("Build finished with %s, log path %s", success, log_path) + + + logging.info("Will upload cache") + upload_ccache(ccache_path, s3_helper, pr_info.number, temp_path) + + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + build_check_name.lower().replace(' ', '_') + "/" + build_name + if os.path.exists(log_path): + log_url = s3_helper.upload_build_file_to_s3(log_path, s3_path_prefix + "/" + os.path.basename(log_path)) + logging.info("Log url %s", log_url) + else: + logging.info("Build log doesn't exist") + + build_urls = s3_helper.upload_build_folder_to_s3(build_output_path, s3_path_prefix, keep_dirs_in_s3_path=False, upload_symlinks=False) + logging.info("Got build URLs %s", build_urls) + + print("::notice ::Build URLs: {}".format('\n'.join(build_urls))) + + result = { + "log_url": log_url, + "build_urls": build_urls, + "build_config": build_config, + "elapsed_seconds": elapsed, + "status": success, + } + + print("::notice ::Log URL: {}".format(log_url)) + + with open(os.path.join(temp_path, "build_urls_" + build_name + '.json'), 'w') as build_links: + json.dump(result, build_links) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py new file mode 100644 index 000000000000..2770b7370415 --- /dev/null +++ b/tests/ci/build_download_helper.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +import os +import json +import logging +import sys +import time + +import requests + +from ci_config import CI_CONFIG, build_config_to_string + +DOWNLOAD_RETRIES_COUNT = 5 + +def get_build_config_for_check(check_name): + return CI_CONFIG["tests_config"][check_name]['required_build_properties'] + +def get_build_urls(build_config_str, reports_path): + for root, _, files in os.walk(reports_path): + for f in files: + if build_config_str in f : + logging.info("Found build report json %s", f) + with open(os.path.join(root, f), 'r', encoding='utf-8') as file_handler: + build_report = json.load(file_handler) + return build_report['build_urls'] + return [] + +def dowload_build_with_progress(url, path): + logging.info("Downloading from %s to temp path %s", url, path) + for i in range(DOWNLOAD_RETRIES_COUNT): + try: + with open(path, 'wb') as f: + response = requests.get(url, stream=True) + response.raise_for_status() + total_length = response.headers.get('content-length') + if total_length is None or int(total_length) == 0: + logging.info("No content-length, will download file without progress") + f.write(response.content) + else: + dl = 0 + total_length = int(total_length) + logging.info("Content length is %ld bytes", total_length) + for data in response.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + if sys.stdout.isatty(): + done = int(50 * dl / total_length) + percent = int(100 * float(dl) / total_length) + eq_str = '=' * done + space_str = ' ' * (50 - done) + sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%") + sys.stdout.flush() + break + except Exception as ex: + sys.stdout.write("\n") + time.sleep(3) + logging.info("Exception while downloading %s, retry %s", ex, i + 1) + if os.path.exists(path): + os.remove(path) + else: + raise Exception(f"Cannot download dataset from {url}, all retries exceeded") + + sys.stdout.write("\n") + logging.info("Downloading finished") + + +def download_builds(result_path, build_urls, filter_fn): + for url in build_urls: + if filter_fn(url): + fname = os.path.basename(url.replace('%2B', '+').replace('%20', ' ')) + logging.info("Will download %s to %s", fname, result_path) + dowload_build_with_progress(url, os.path.join(result_path, fname)) + +def download_builds_filter(check_name, reports_path, result_path, filter_fn=lambda _: True): + build_config = get_build_config_for_check(check_name) + print(build_config) + build_config_str = build_config_to_string(build_config) + print(build_config_str) + urls = get_build_urls(build_config_str, reports_path) + print(urls) + + if not urls: + raise Exception("No build URLs found") + + download_builds(result_path, urls, filter_fn) + +def download_all_deb_packages(check_name, reports_path, result_path): + download_builds_filter(check_name, reports_path, result_path, lambda x: x.endswith('deb')) + +def download_shared_build(check_name, reports_path, result_path): + download_builds_filter(check_name, reports_path, result_path, lambda x: x.endswith('shared_build.tgz')) + +def download_unit_tests(check_name, reports_path, result_path): + download_builds_filter(check_name, reports_path, result_path, lambda x: x.endswith('unit_tests_dbms')) + +def download_clickhouse_binary(check_name, reports_path, result_path): + download_builds_filter(check_name, reports_path, result_path, lambda x: x.endswith('clickhouse')) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py new file mode 100644 index 000000000000..402db7c27404 --- /dev/null +++ b/tests/ci/build_report_check.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 + +import json +import logging +import os +import sys +from github import Github +from report import create_build_html_report +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from commit_status_helper import get_commit + +class BuildResult(): + def __init__(self, compiler, build_type, sanitizer, bundled, splitted, status, elapsed_seconds, with_coverage): + self.compiler = compiler + self.build_type = build_type + self.sanitizer = sanitizer + self.bundled = bundled + self.splitted = splitted + self.status = status + self.elapsed_seconds = elapsed_seconds + self.with_coverage = with_coverage + +def group_by_artifacts(build_urls): + groups = {'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'preformance': []} + for url in build_urls: + if url.endswith('performance.tgz'): + groups['performance'].append(url) + elif url.endswith('.deb') or url.endswith('.buildinfo') or url.endswith('.changes') or url.endswith('.tar.gz'): + groups['deb'].append(url) + elif url.endswith('.rpm'): + groups['rpm'].append(url) + elif url.endswith('.tgz'): + groups['tgz'].append(url) + else: + groups['binary'].append(url) + return groups + +def process_report(build_report): + build_config = build_report['build_config'] + build_result = BuildResult( + compiler=build_config['compiler'], + build_type=build_config['build_type'], + sanitizer=build_config['sanitizer'], + bundled=build_config['bundled'], + splitted=build_config['splitted'], + status="success" if build_report['status'] else "failure", + elapsed_seconds=build_report['elapsed_seconds'], + with_coverage=False + ) + build_results = [] + build_urls = [] + build_logs_urls = [] + urls_groups = group_by_artifacts(build_report['build_urls']) + found_group = False + for _, group_urls in urls_groups.items(): + if group_urls: + build_results.append(build_result) + build_urls.append(group_urls) + build_logs_urls.append(build_report['log_url']) + found_group = True + + if not found_group: + build_results.append(build_result) + build_urls.append([""]) + build_logs_urls.append(build_report['log_url']) + + return build_results, build_urls, build_logs_urls + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + reports_path = os.getenv("REPORTS_PATH", "./reports") + temp_path = os.path.join(os.getenv("TEMP_PATH", ".")) + logging.info("Reports path %s", reports_path) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + build_check_name = sys.argv[1] + + build_reports = [] + for root, dirs, files in os.walk(reports_path): + for f in files: + if f.startswith("build_urls_") and f.endswith('.json'): + logging.info("Found build report json %s", f) + with open(os.path.join(root, f), 'r') as file_handler: + build_report = json.load(file_handler) + build_reports.append(build_report) + + + build_results = [] + build_artifacts = [] + build_logs = [] + + for build_report in build_reports: + build_result, build_artifacts_url, build_logs_url = process_report(build_report) + logging.info("Got %s result for report", len(build_result)) + build_results += build_result + build_artifacts += build_artifacts_url + build_logs += build_logs_url + + logging.info("Totally got %s results", len(build_results)) + + gh = Github(get_best_robot_token()) + s3_helper = S3Helper('https://s3.amazonaws.com') + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + + branch_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/commits/master" + branch_name = "master" + if pr_info.number != 0: + branch_name = "PR #{}".format(pr_info.number) + branch_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/pull/{pr_info.number}" + commit_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/commit/{pr_info.sha}" + task_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID', '0')}" + report = create_build_html_report( + build_check_name, + build_results, + build_logs, + build_artifacts, + task_url, + branch_url, + branch_name, + commit_url + ) + + report_path = os.path.join(temp_path, 'report.html') + with open(report_path, 'w') as f: + f.write(report) + + logging.info("Going to upload prepared report") + context_name_for_path = build_check_name.lower().replace(' ', '_') + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path + + url = s3_helper.upload_build_file_to_s3(report_path, s3_path_prefix + "/report.html") + logging.info("Report url %s", url) + + total_builds = len(build_results) + ok_builds = 0 + summary_status = "success" + for build_result in build_results: + if build_result.status == "failure" and summary_status != "error": + summary_status = "failure" + if build_result.status == "error" or not build_result.status: + summary_status = "error" + + if build_result.status == "success": + ok_builds += 1 + + description = "{}/{} builds are OK".format(ok_builds, total_builds) + + print("::notice ::Report url: {}".format(url)) + + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=build_check_name, description=description, state=summary_status, target_url=url) diff --git a/tests/ci/ccache_utils.py b/tests/ci/ccache_utils.py new file mode 100644 index 000000000000..f21f1a8c9655 --- /dev/null +++ b/tests/ci/ccache_utils.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +import logging +import time +import sys +import os +import shutil +from pathlib import Path + +import requests + +from compress_files import decompress_fast, compress_fast + +DOWNLOAD_RETRIES_COUNT = 5 + +def dowload_file_with_progress(url, path): + logging.info("Downloading from %s to temp path %s", url, path) + for i in range(DOWNLOAD_RETRIES_COUNT): + try: + with open(path, 'wb') as f: + response = requests.get(url, stream=True) + response.raise_for_status() + total_length = response.headers.get('content-length') + if total_length is None or int(total_length) == 0: + logging.info("No content-length, will download file without progress") + f.write(response.content) + else: + dl = 0 + total_length = int(total_length) + logging.info("Content length is %ld bytes", total_length) + for data in response.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + if sys.stdout.isatty(): + done = int(50 * dl / total_length) + percent = int(100 * float(dl) / total_length) + eq_str = '=' * done + space_str = ' ' * (50 - done) + sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%") + sys.stdout.flush() + break + except Exception as ex: + sys.stdout.write("\n") + time.sleep(3) + logging.info("Exception while downloading %s, retry %s", ex, i + 1) + if os.path.exists(path): + os.remove(path) + else: + raise Exception(f"Cannot download dataset from {url}, all retries exceeded") + + sys.stdout.write("\n") + logging.info("Downloading finished") + + +def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, temp_path): + ccache_name = os.path.basename(path_to_ccache_dir) + cache_found = False + prs_to_check = [current_pr_number] + if current_pr_number != 0: + prs_to_check.append(0) + for pr_number in prs_to_check: + logging.info("Searching cache for pr %s", pr_number) + s3_path_prefix = str(pr_number) + "/ccaches" + objects = s3_helper.list_prefix(s3_path_prefix) + logging.info("Found %s objects for pr", len(objects)) + for obj in objects: + if ccache_name in obj: + logging.info("Found ccache on path %s", obj) + url = "https://s3.amazonaws.com/clickhouse-builds/" + obj + compressed_cache = os.path.join(temp_path, os.path.basename(obj)) + dowload_file_with_progress(url, compressed_cache) + + path_to_decompress = str(Path(path_to_ccache_dir).parent) + if not os.path.exists(path_to_decompress): + os.makedirs(path_to_decompress) + + if os.path.exists(path_to_ccache_dir): + shutil.rmtree(path_to_ccache_dir) + logging.info("Ccache already exists, removing it") + + logging.info("Decompressing cache to path %s", path_to_decompress) + decompress_fast(compressed_cache, path_to_decompress) + logging.info("Files on path %s", os.listdir(path_to_decompress)) + cache_found = True + break + if cache_found: + break + + if not cache_found: + logging.info("ccache not found anywhere, cannot download anything :(") + if os.path.exists(path_to_ccache_dir): + logging.info("But at least we have some local cache") + else: + logging.info("ccache downloaded") + +def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path): + logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number) + ccache_name = os.path.basename(path_to_ccache_dir) + compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.gz") + compress_fast(path_to_ccache_dir, compressed_cache_path) + + s3_path = str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path) + logging.info("Will upload %s to path %s", compressed_cache_path, s3_path) + s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path) + logging.info("Upload finished") diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py new file mode 100644 index 000000000000..112b58ef1cf2 --- /dev/null +++ b/tests/ci/cherry_pick.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 + +import sys +import logging +import os +import subprocess + +from get_robot_token import get_parameter_from_ssm +from ssh import SSHKey +from cherry_pick_utils.backport import Backport +from cherry_pick_utils.cherrypick import CherryPick + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) + temp_path = os.path.join(os.getenv("TEMP_PATH")) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + + sys.path.append(os.path.join(repo_path, "utils/github")) + + + with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): + token = get_parameter_from_ssm("github_robot_token_1") + + bp = Backport(token, os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"), os.environ.get("REPO_TEAM")) + def cherrypick_run(token, pr, branch): + return CherryPick(token, + os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"), + os.environ.get("REPO_TEAM"), pr, branch + ).execute(repo_path, False) + + try: + bp.execute(repo_path, 'origin', None, cherrypick_run) + except subprocess.CalledProcessError as e: + logging.error(e.output) diff --git a/tests/ci/cherry_pick_utils/__init__.py b/tests/ci/cherry_pick_utils/__init__.py new file mode 100644 index 000000000000..40a96afc6ff0 --- /dev/null +++ b/tests/ci/cherry_pick_utils/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/tests/ci/cherry_pick_utils/backport.py b/tests/ci/cherry_pick_utils/backport.py new file mode 100644 index 000000000000..a28a15106946 --- /dev/null +++ b/tests/ci/cherry_pick_utils/backport.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- + +try: + from clickhouse.utils.github.cherrypick import CherryPick + from clickhouse.utils.github.query import Query as RemoteRepo + from clickhouse.utils.github.local import Repository as LocalRepo +except: + from .cherrypick import CherryPick + from .query import Query as RemoteRepo + from .local import Repository as LocalRepo + +import argparse +import logging +import re +import sys + + +class Backport: + def __init__(self, token, owner, name, team): + self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7) + self._token = token + self.default_branch_name = self._gh.default_branch + self.ssh_url = self._gh.ssh_url + + def getPullRequests(self, from_commit): + return self._gh.get_pull_requests(from_commit) + + def getBranchesWithRelease(self): + branches = set() + for pull_request in self._gh.find_pull_requests("release"): + branches.add(pull_request['headRefName']) + return branches + + def execute(self, repo, upstream, until_commit, run_cherrypick): + repo = LocalRepo(repo, upstream, self.default_branch_name) + all_branches = repo.get_release_branches() # [(branch_name, base_commit)] + + release_branches = self.getBranchesWithRelease() + + branches = [] + # iterate over all branches to preserve their precedence. + for branch in all_branches: + if branch[0] in release_branches: + branches.append(branch) + + if not branches: + logging.info('No release branches found!') + return + + for branch in branches: + logging.info('Found release branch: %s', branch[0]) + + if not until_commit: + until_commit = branches[0][1] + pull_requests = self.getPullRequests(until_commit) + + backport_map = {} + + RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$') + RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$') + RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$') + + # pull-requests are sorted by ancestry from the most recent. + for pr in pull_requests: + while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']): + logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0])) + branches.pop() + + logging.info("Processing PR #{}".format(pr['number'])) + + assert len(branches) + + branch_set = set([branch[0] for branch in branches]) + + # First pass. Find all must-backports + for label in pr['labels']['nodes']: + if label['name'] == 'pr-bugfix' or label['name'] == 'pr-must-backport': + backport_map[pr['number']] = branch_set.copy() + continue + matched = RE_MUST_BACKPORT.match(label['name']) + if matched: + if pr['number'] not in backport_map: + backport_map[pr['number']] = set() + backport_map[pr['number']].add(matched.group(1)) + + # Second pass. Find all no-backports + for label in pr['labels']['nodes']: + if label['name'] == 'pr-no-backport' and pr['number'] in backport_map: + del backport_map[pr['number']] + break + matched_no_backport = RE_NO_BACKPORT.match(label['name']) + matched_backported = RE_BACKPORTED.match(label['name']) + if matched_no_backport and pr['number'] in backport_map and matched_no_backport.group(1) in backport_map[pr['number']]: + backport_map[pr['number']].remove(matched_no_backport.group(1)) + logging.info('\tskipping %s because of forced no-backport', matched_no_backport.group(1)) + elif matched_backported and pr['number'] in backport_map and matched_backported.group(1) in backport_map[pr['number']]: + backport_map[pr['number']].remove(matched_backported.group(1)) + logging.info('\tskipping %s because it\'s already backported manually', matched_backported.group(1)) + + for pr, branches in list(backport_map.items()): + logging.info('PR #%s needs to be backported to:', pr) + for branch in branches: + logging.info('\t%s, and the status is: %s', branch, run_cherrypick(self._token, pr, branch)) + + # print API costs + logging.info('\nGitHub API total costs per query:') + for name, value in list(self._gh.api_costs.items()): + logging.info('%s : %s', name, value) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--token', type=str, required=True, help='token for Github access') + parser.add_argument('--repo', type=str, required=True, help='path to full repository', metavar='PATH') + parser.add_argument('--til', type=str, help='check PRs from HEAD til this commit', metavar='COMMIT') + parser.add_argument('--dry-run', action='store_true', help='do not create or merge any PRs', default=False) + parser.add_argument('--verbose', '-v', action='store_true', help='more verbose output', default=False) + parser.add_argument('--upstream', '-u', type=str, help='remote name of upstream in repository', default='origin') + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG) + else: + logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.INFO) + + cherrypick_run = lambda token, pr, branch: CherryPick(token, 'ClickHouse', 'ClickHouse', 'core', pr, branch).execute(args.repo, args.dry_run) + bp = Backport(args.token, 'ClickHouse', 'ClickHouse', 'core') + bp.execute(args.repo, args.upstream, args.til, cherrypick_run) diff --git a/tests/ci/cherry_pick_utils/cherrypick.py b/tests/ci/cherry_pick_utils/cherrypick.py new file mode 100644 index 000000000000..8bedf54fefae --- /dev/null +++ b/tests/ci/cherry_pick_utils/cherrypick.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- + +''' +Backports changes from PR to release branch. +Requires multiple separate runs as part of the implementation. + +First run should do the following: +1. Merge release branch with a first parent of merge-commit of PR (using 'ours' strategy). (branch: backport/{branch}/{pr}) +2. Create temporary branch over merge-commit to use it for PR creation. (branch: cherrypick/{merge_commit}) +3. Create PR from temporary branch to backport branch (emulating cherry-pick). + +Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it. + +Third run creates PR from backport branch (with merged previous PR) to release branch. +''' + +try: + from clickhouse.utils.github.query import Query as RemoteRepo +except: + from .query import Query as RemoteRepo + +import argparse +from enum import Enum +import logging +import os +import subprocess +import sys + + +class CherryPick: + class Status(Enum): + DISCARDED = 'discarded' + NOT_INITIATED = 'not started' + FIRST_MERGEABLE = 'waiting for 1st stage' + FIRST_CONFLICTS = 'conflicts on 1st stage' + SECOND_MERGEABLE = 'waiting for 2nd stage' + SECOND_CONFLICTS = 'conflicts on 2nd stage' + MERGED = 'backported' + + def _run(self, args): + out = subprocess.check_output(args).rstrip() + logging.debug(out) + return out + + def __init__(self, token, owner, name, team, pr_number, target_branch): + self._gh = RemoteRepo(token, owner=owner, name=name, team=team) + self._pr = self._gh.get_pull_request(pr_number) + + self.ssh_url = self._gh.ssh_url + + # TODO: check if pull-request is merged. + + self.merge_commit_oid = self._pr['mergeCommit']['oid'] + + self.target_branch = target_branch + self.backport_branch = 'backport/{branch}/{pr}'.format(branch=target_branch, pr=pr_number) + self.cherrypick_branch = 'cherrypick/{branch}/{oid}'.format(branch=target_branch, oid=self.merge_commit_oid) + + def getCherryPickPullRequest(self): + return self._gh.find_pull_request(base=self.backport_branch, head=self.cherrypick_branch) + + def createCherryPickPullRequest(self, repo_path): + DESCRIPTION = ( + 'This pull-request is a first step of an automated backporting.\n' + 'It contains changes like after calling a local command `git cherry-pick`.\n' + 'If you intend to continue backporting this changes, then resolve all conflicts if any.\n' + 'Otherwise, if you do not want to backport them, then just close this pull-request.\n' + '\n' + 'The check results does not matter at this step - you can safely ignore them.\n' + 'Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n' + ) + + # FIXME: replace with something better than os.system() + git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@yandex-team.ru', '-c', 'user.name=robot-clickhouse'] + base_commit_oid = self._pr['mergeCommit']['parents']['nodes'][0]['oid'] + + # Create separate branch for backporting, and make it look like real cherry-pick. + self._run(git_prefix + ['checkout', '-f', self.target_branch]) + self._run(git_prefix + ['checkout', '-B', self.backport_branch]) + self._run(git_prefix + ['merge', '-s', 'ours', '--no-edit', base_commit_oid]) + + # Create secondary branch to allow pull request with cherry-picked commit. + self._run(git_prefix + ['branch', '-f', self.cherrypick_branch, self.merge_commit_oid]) + + self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)]) + self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.cherrypick_branch)]) + + # Create pull-request like a local cherry-pick + pr = self._gh.create_pull_request(source=self.cherrypick_branch, target=self.backport_branch, + title='Cherry pick #{number} to {target}: {title}'.format( + number=self._pr['number'], target=self.target_branch, + title=self._pr['title'].replace('"', '\\"')), + description='Original pull-request #{}\n\n{}'.format(self._pr['number'], DESCRIPTION)) + + # FIXME: use `team` to leave a single eligible assignee. + self._gh.add_assignee(pr, self._pr['author']) + self._gh.add_assignee(pr, self._pr['mergedBy']) + + self._gh.set_label(pr, "do not test") + self._gh.set_label(pr, "pr-cherrypick") + + return pr + + def mergeCherryPickPullRequest(self, cherrypick_pr): + return self._gh.merge_pull_request(cherrypick_pr['id']) + + def getBackportPullRequest(self): + return self._gh.find_pull_request(base=self.target_branch, head=self.backport_branch) + + def createBackportPullRequest(self, cherrypick_pr, repo_path): + DESCRIPTION = ( + 'This pull-request is a last step of an automated backporting.\n' + 'Treat it as a standard pull-request: look at the checks and resolve conflicts.\n' + 'Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n' + ) + + git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@clickhouse.com', '-c', 'user.name=robot-clickhouse'] + + pr_title = 'Backport #{number} to {target}: {title}'.format( + number=self._pr['number'], target=self.target_branch, + title=self._pr['title'].replace('"', '\\"')) + + self._run(git_prefix + ['checkout', '-f', self.backport_branch]) + self._run(git_prefix + ['pull', '--ff-only', 'origin', self.backport_branch]) + self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', 'origin/' + self.target_branch, self.backport_branch])]) + self._run(git_prefix + ['commit', '-a', '--allow-empty', '-m', pr_title]) + self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)]) + + pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch, title=pr_title, + description='Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}'.format(self._pr['number'], cherrypick_pr['number'], DESCRIPTION)) + + # FIXME: use `team` to leave a single eligible assignee. + self._gh.add_assignee(pr, self._pr['author']) + self._gh.add_assignee(pr, self._pr['mergedBy']) + + self._gh.set_label(pr, "pr-backport") + + return pr + + def execute(self, repo_path, dry_run=False): + pr1 = self.getCherryPickPullRequest() + if not pr1: + if not dry_run: + pr1 = self.createCherryPickPullRequest(repo_path) + logging.debug('Created PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url']) + else: + return CherryPick.Status.NOT_INITIATED + else: + logging.debug('Found PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url']) + + if not pr1['merged'] and pr1['mergeable'] == 'MERGEABLE' and not pr1['closed']: + if not dry_run: + pr1 = self.mergeCherryPickPullRequest(pr1) + logging.debug('Merged PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url']) + + if not pr1['merged']: + logging.debug('Waiting for PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url']) + + if pr1['closed']: + return CherryPick.Status.DISCARDED + elif pr1['mergeable'] == 'CONFLICTING': + return CherryPick.Status.FIRST_CONFLICTS + else: + return CherryPick.Status.FIRST_MERGEABLE + + pr2 = self.getBackportPullRequest() + if not pr2: + if not dry_run: + pr2 = self.createBackportPullRequest(pr1, repo_path) + logging.debug('Created PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url']) + else: + return CherryPick.Status.FIRST_MERGEABLE + else: + logging.debug('Found PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url']) + + if pr2['merged']: + return CherryPick.Status.MERGED + elif pr2['closed']: + return CherryPick.Status.DISCARDED + elif pr2['mergeable'] == 'CONFLICTING': + return CherryPick.Status.SECOND_CONFLICTS + else: + return CherryPick.Status.SECOND_MERGEABLE + + +if __name__ == "__main__": + logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG) + + parser = argparse.ArgumentParser() + parser.add_argument('--token', '-t', type=str, required=True, help='token for Github access') + parser.add_argument('--pr', type=str, required=True, help='PR# to cherry-pick') + parser.add_argument('--branch', '-b', type=str, required=True, help='target branch name for cherry-pick') + parser.add_argument('--repo', '-r', type=str, required=True, help='path to full repository', metavar='PATH') + args = parser.parse_args() + + cp = CherryPick(args.token, 'ClickHouse', 'ClickHouse', 'core', args.pr, args.branch) + cp.execute(args.repo) diff --git a/tests/ci/cherry_pick_utils/local.py b/tests/ci/cherry_pick_utils/local.py new file mode 100644 index 000000000000..2ad8d4b8b715 --- /dev/null +++ b/tests/ci/cherry_pick_utils/local.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- + +import functools +import logging +import os +import re + + +class RepositoryBase: + def __init__(self, repo_path): + import git + + self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path)) + + # comparator of commits + def cmp(x, y): + if str(x) == str(y): + return 0 + if self._repo.is_ancestor(x, y): + return -1 + else: + return 1 + self.comparator = functools.cmp_to_key(cmp) + + def get_head_commit(self): + return self._repo.commit(self._default) + + def iterate(self, begin, end): + rev_range = '{}...{}'.format(begin, end) + for commit in self._repo.iter_commits(rev_range, first_parent=True): + yield commit + + +class Repository(RepositoryBase): + def __init__(self, repo_path, remote_name, default_branch_name): + super(Repository, self).__init__(repo_path) + self._remote = self._repo.remotes[remote_name] + self._remote.fetch() + self._default = self._remote.refs[default_branch_name] + + def get_release_branches(self): + ''' + Returns sorted list of tuples: + * remote branch (git.refs.remote.RemoteReference), + * base commit (git.Commit), + * head (git.Commit)). + List is sorted by commits in ascending order. + ''' + release_branches = [] + + RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$') + + for branch in [r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)]: + base = self._repo.merge_base(self._default, self._repo.commit(branch)) + if not base: + logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default) + elif len(base) > 1: + logging.info('Branch %s has more than one base commit. Ignoring.', branch.path) + else: + release_branches.append((os.path.basename(branch.name), base[0])) + + return sorted(release_branches, key=lambda x : self.comparator(x[1])) + + +class BareRepository(RepositoryBase): + def __init__(self, repo_path, default_branch_name): + super(BareRepository, self).__init__(repo_path) + self._default = self._repo.branches[default_branch_name] + + def get_release_branches(self): + ''' + Returns sorted list of tuples: + * branch (git.refs.head?), + * base commit (git.Commit), + * head (git.Commit)). + List is sorted by commits in ascending order. + ''' + release_branches = [] + + RE_RELEASE_BRANCH_REF = re.compile(r'^refs/heads/\d+\.\d+$') + + for branch in [r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)]: + base = self._repo.merge_base(self._default, self._repo.commit(branch)) + if not base: + logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default) + elif len(base) > 1: + logging.info('Branch %s has more than one base commit. Ignoring.', branch.path) + else: + release_branches.append((os.path.basename(branch.name), base[0])) + + return sorted(release_branches, key=lambda x : self.comparator(x[1])) diff --git a/tests/ci/cherry_pick_utils/parser.py b/tests/ci/cherry_pick_utils/parser.py new file mode 100644 index 000000000000..570410ba23d4 --- /dev/null +++ b/tests/ci/cherry_pick_utils/parser.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +class Description: + '''Parsed description representation + ''' + MAP_CATEGORY_TO_LABEL = { + 'New Feature': 'pr-feature', + 'Bug Fix': 'pr-bugfix', + 'Improvement': 'pr-improvement', + 'Performance Improvement': 'pr-performance', + # 'Backward Incompatible Change': doesn't match anything + 'Build/Testing/Packaging Improvement': 'pr-build', + 'Non-significant (changelog entry is not needed)': 'pr-non-significant', + 'Non-significant (changelog entry is not required)': 'pr-non-significant', + 'Non-significant': 'pr-non-significant', + 'Documentation (changelog entry is not required)': 'pr-documentation', + # 'Other': doesn't match anything + } + + def __init__(self, pull_request): + self.label_name = str() + self.legal = False + + self._parse(pull_request['bodyText']) + + def _parse(self, text): + lines = text.splitlines() + next_category = False + category = str() + + for line in lines: + stripped = line.strip() + + if not stripped: + continue + + if next_category: + category = stripped + next_category = False + + if stripped == 'I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en': + self.legal = True + + category_headers = ( + 'Category (leave one):', + 'Changelog category (leave one):', + 'Changelog category:', + 'Category:' + ) + + if stripped in category_headers: + next_category = True + + if category in Description.MAP_CATEGORY_TO_LABEL: + self.label_name = Description.MAP_CATEGORY_TO_LABEL[category] + else: + if not category: + print('Cannot find category in pr description') + else: + print(('Unknown category: ' + category)) diff --git a/tests/ci/cherry_pick_utils/query.py b/tests/ci/cherry_pick_utils/query.py new file mode 100644 index 000000000000..a9a8f4f1cd14 --- /dev/null +++ b/tests/ci/cherry_pick_utils/query.py @@ -0,0 +1,427 @@ +# -*- coding: utf-8 -*- + +import requests +import time + + +class Query: + ''' + Implements queries to the Github API using GraphQL + ''' + + _PULL_REQUEST = ''' + author {{ + ... on User {{ + id + login + }} + }} + + baseRepository {{ + nameWithOwner + }} + + mergeCommit {{ + oid + parents(first: {min_page_size}) {{ + totalCount + nodes {{ + oid + }} + }} + }} + + mergedBy {{ + ... on User {{ + id + login + }} + }} + + baseRefName + closed + headRefName + id + mergeable + merged + number + title + url + ''' + + def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10): + self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size) + + self._token = token + self._owner = owner + self._name = name + self._team = team + + self._max_page_size = max_page_size + self._min_page_size = min_page_size + + self.api_costs = {} + + repo = self.get_repository() + self._id = repo['id'] + self.ssh_url = repo['sshUrl'] + self.default_branch = repo['defaultBranchRef']['name'] + + self.members = set(self.get_members()) + + def get_repository(self): + _QUERY = ''' + repository(owner: "{owner}" name: "{name}") {{ + defaultBranchRef {{ + name + }} + id + sshUrl + }} + ''' + + query = _QUERY.format(owner=self._owner, name=self._name) + return self._run(query)['repository'] + + def get_members(self): + '''Get all team members for organization + + Returns: + members: a map of members' logins to ids + ''' + + _QUERY = ''' + organization(login: "{organization}") {{ + team(slug: "{team}") {{ + members(first: {max_page_size} {next}) {{ + pageInfo {{ + hasNextPage + endCursor + }} + nodes {{ + id + login + }} + }} + }} + }} + ''' + + members = {} + not_end = True + query = _QUERY.format(organization=self._owner, team=self._team, + max_page_size=self._max_page_size, + next='') + + while not_end: + result = self._run(query)['organization']['team'] + if result is None: + break + result = result['members'] + not_end = result['pageInfo']['hasNextPage'] + query = _QUERY.format(organization=self._owner, team=self._team, + max_page_size=self._max_page_size, + next='after: "{}"'.format(result["pageInfo"]["endCursor"])) + + members += dict([(node['login'], node['id']) for node in result['nodes']]) + + return members + + def get_pull_request(self, number): + _QUERY = ''' + repository(owner: "{owner}" name: "{name}") {{ + pullRequest(number: {number}) {{ + {pull_request_data} + }} + }} + ''' + + query = _QUERY.format(owner=self._owner, name=self._name, number=number, + pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size) + return self._run(query)['repository']['pullRequest'] + + def find_pull_request(self, base, head): + _QUERY = ''' + repository(owner: "{owner}" name: "{name}") {{ + pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{ + nodes {{ + {pull_request_data} + }} + totalCount + }} + }} + ''' + + query = _QUERY.format(owner=self._owner, name=self._name, base=base, head=head, + pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size) + result = self._run(query)['repository']['pullRequests'] + if result['totalCount'] > 0: + return result['nodes'][0] + else: + return {} + + def find_pull_requests(self, label_name): + ''' + Get all pull-requests filtered by label name + ''' + _QUERY = ''' + repository(owner: "{owner}" name: "{name}") {{ + pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{ + nodes {{ + {pull_request_data} + }} + }} + }} + ''' + + query = _QUERY.format(owner=self._owner, name=self._name, label_name=label_name, + pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size) + return self._run(query)['repository']['pullRequests']['nodes'] + + def get_pull_requests(self, before_commit): + ''' + Get all merged pull-requests from the HEAD of default branch to the last commit (excluding) + ''' + + _QUERY = ''' + repository(owner: "{owner}" name: "{name}") {{ + defaultBranchRef {{ + target {{ + ... on Commit {{ + history(first: {max_page_size} {next}) {{ + pageInfo {{ + hasNextPage + endCursor + }} + nodes {{ + oid + associatedPullRequests(first: {min_page_size}) {{ + totalCount + nodes {{ + ... on PullRequest {{ + {pull_request_data} + + labels(first: {min_page_size}) {{ + totalCount + pageInfo {{ + hasNextPage + endCursor + }} + nodes {{ + name + color + }} + }} + }} + }} + }} + }} + }} + }} + }} + }} + }} + ''' + + pull_requests = [] + not_end = True + query = _QUERY.format(owner=self._owner, name=self._name, + max_page_size=self._max_page_size, + min_page_size=self._min_page_size, + pull_request_data=self._PULL_REQUEST, + next='') + + while not_end: + result = self._run(query)['repository']['defaultBranchRef']['target']['history'] + not_end = result['pageInfo']['hasNextPage'] + query = _QUERY.format(owner=self._owner, name=self._name, + max_page_size=self._max_page_size, + min_page_size=self._min_page_size, + pull_request_data=self._PULL_REQUEST, + next='after: "{}"'.format(result["pageInfo"]["endCursor"])) + + for commit in result['nodes']: + # FIXME: maybe include `before_commit`? + if str(commit['oid']) == str(before_commit): + not_end = False + break + + # TODO: fetch all pull-requests that were merged in a single commit. + assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size + + for pull_request in commit['associatedPullRequests']['nodes']: + if(pull_request['baseRepository']['nameWithOwner'] == '{}/{}'.format(self._owner, self._name) and + pull_request['baseRefName'] == self.default_branch and + pull_request['mergeCommit']['oid'] == commit['oid']): + pull_requests.append(pull_request) + + return pull_requests + + def create_pull_request(self, source, target, title, description="", draft=False, can_modify=True): + _QUERY = ''' + createPullRequest(input: {{ + baseRefName: "{target}", + headRefName: "{source}", + repositoryId: "{id}", + title: "{title}", + body: "{body}", + draft: {draft}, + maintainerCanModify: {modify} + }}) {{ + pullRequest {{ + {pull_request_data} + }} + }} + ''' + + query = _QUERY.format(target=target, source=source, id=self._id, title=title, body=description, + draft="true" if draft else "false", modify="true" if can_modify else "false", + pull_request_data=self._PULL_REQUEST) + return self._run(query, is_mutation=True)['createPullRequest']['pullRequest'] + + def merge_pull_request(self, id): + _QUERY = ''' + mergePullRequest(input: {{ + pullRequestId: "{id}" + }}) {{ + pullRequest {{ + {pull_request_data} + }} + }} + ''' + + query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST) + return self._run(query, is_mutation=True)['mergePullRequest']['pullRequest'] + + # FIXME: figure out how to add more assignees at once + def add_assignee(self, pr, assignee): + _QUERY = ''' + addAssigneesToAssignable(input: {{ + assignableId: "{id1}", + assigneeIds: "{id2}" + }}) {{ + clientMutationId + }} + ''' + + query = _QUERY.format(id1=pr['id'], id2=assignee['id']) + self._run(query, is_mutation=True) + + def set_label(self, pull_request, label_name): + ''' + Set label by name to the pull request + + Args: + pull_request: JSON object returned by `get_pull_requests()` + label_name (string): label name + ''' + + _GET_LABEL = ''' + repository(owner: "{owner}" name: "{name}") {{ + labels(first: {max_page_size} {next} query: "{label_name}") {{ + pageInfo {{ + hasNextPage + endCursor + }} + nodes {{ + id + name + color + }} + }} + }} + ''' + + _SET_LABEL = ''' + addLabelsToLabelable(input: {{ + labelableId: "{pr_id}", + labelIds: "{label_id}" + }}) {{ + clientMutationId + }} + ''' + + labels = [] + not_end = True + query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name, + max_page_size=self._max_page_size, + next='') + + while not_end: + result = self._run(query)['repository']['labels'] + not_end = result['pageInfo']['hasNextPage'] + query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name, + max_page_size=self._max_page_size, + next='after: "{}"'.format(result["pageInfo"]["endCursor"])) + + labels += [label for label in result['nodes']] + + if not labels: + return + + query = _SET_LABEL.format(pr_id=pull_request['id'], label_id=labels[0]['id']) + self._run(query, is_mutation=True) + + def _run(self, query, is_mutation=False): + from requests.adapters import HTTPAdapter + from urllib3.util.retry import Retry + + # sleep a little, because we querying github too often + print("Request, is mutation", is_mutation) + time.sleep(0.5) + + def requests_retry_session( + retries=5, + backoff_factor=0.5, + status_forcelist=(403, 500, 502, 504), + session=None, + ): + session = session or requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + return session + + headers = {'Authorization': 'bearer {}'.format(self._token)} + if is_mutation: + query = ''' + mutation {{ + {query} + }} + '''.format(query=query) + else: + query = ''' + query {{ + {query} + rateLimit {{ + cost + remaining + }} + }} + '''.format(query=query) + + while True: + request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers) + if request.status_code == 200: + result = request.json() + if 'errors' in result: + raise Exception('Errors occurred: {}\nOriginal query: {}'.format(result["errors"], query)) + + if not is_mutation: + import inspect + caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3] + if caller not in list(self.api_costs.keys()): + self.api_costs[caller] = 0 + self.api_costs[caller] += result['data']['rateLimit']['cost'] + + return result['data'] + else: + import json + raise Exception('Query failed with code {code}:\n{json}'.format(code=request.status_code, json=json.dumps(request.json(), indent=4))) diff --git a/tests/ci/cherry_pick_utils/readme.md b/tests/ci/cherry_pick_utils/readme.md new file mode 100644 index 000000000000..10ae9ca4b0b8 --- /dev/null +++ b/tests/ci/cherry_pick_utils/readme.md @@ -0,0 +1,3 @@ +# Some scripts for backports implementation + +TODO: Remove copy from utils/github diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json index 52a101728eaa..4feae56b93cb 100644 --- a/tests/ci/ci_config.json +++ b/tests/ci/ci_config.json @@ -1,7 +1,7 @@ { "build_config": [ { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "", "sanitizer": "", "package-type": "deb", @@ -12,7 +12,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "", "sanitizer": "", "package-type": "performance", @@ -22,7 +22,7 @@ "with_coverage": false }, { - "compiler": "gcc-10", + "compiler": "gcc-11", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -32,7 +32,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "", "sanitizer": "address", "package-type": "deb", @@ -42,7 +42,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "", "sanitizer": "undefined", "package-type": "deb", @@ -52,7 +52,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "", "sanitizer": "thread", "package-type": "deb", @@ -62,7 +62,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "", "sanitizer": "memory", "package-type": "deb", @@ -72,7 +72,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "debug", "sanitizer": "", "package-type": "deb", @@ -82,7 +82,7 @@ "with_coverage": false }, { - "compiler": "gcc-10", + "compiler": "gcc-11", "build-type": "", "sanitizer": "", "package-type": "deb", @@ -92,7 +92,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -104,7 +104,7 @@ ], "special_build_config": [ { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "debug", "sanitizer": "", "package-type": "deb", @@ -114,7 +114,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-13", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -124,7 +124,7 @@ "with_coverage": false }, { - "compiler": "clang-11-darwin", + "compiler": "clang-13-darwin", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -134,7 +134,7 @@ "with_coverage": false }, { - "compiler": "clang-11-aarch64", + "compiler": "clang-13-aarch64", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -144,7 +144,7 @@ "with_coverage": false }, { - "compiler": "clang-11-freebsd", + "compiler": "clang-13-freebsd", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -154,7 +154,17 @@ "with_coverage": false }, { - "compiler": "clang-11-darwin-aarch64", + "compiler": "clang-13-darwin-aarch64", + "build-type": "", + "sanitizer": "", + "package-type": "binary", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": false + }, + { + "compiler": "clang-13-ppc64le", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -167,7 +177,7 @@ "tests_config": { "Functional stateful tests (address)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -179,7 +189,7 @@ }, "Functional stateful tests (thread)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -191,7 +201,7 @@ }, "Functional stateful tests (memory)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -203,7 +213,7 @@ }, "Functional stateful tests (ubsan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -215,7 +225,7 @@ }, "Functional stateful tests (debug)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -227,7 +237,7 @@ }, "Functional stateful tests (release)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -239,7 +249,7 @@ }, "Functional stateful tests (release, DatabaseOrdinary)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -251,7 +261,7 @@ }, "Functional stateful tests (release, DatabaseReplicated)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -263,7 +273,7 @@ }, "Functional stateless tests (address)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -275,7 +285,7 @@ }, "Functional stateless tests (thread)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -287,7 +297,7 @@ }, "Functional stateless tests (memory)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -299,7 +309,7 @@ }, "Functional stateless tests (ubsan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -311,7 +321,7 @@ }, "Functional stateless tests (debug)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -323,7 +333,7 @@ }, "Functional stateless tests (release)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -335,7 +345,7 @@ }, "Functional stateless tests (pytest)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -347,7 +357,7 @@ }, "Functional stateless tests (unbundled)": { "required_build_properties": { - "compiler": "gcc-10", + "compiler": "gcc-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -359,7 +369,7 @@ }, "Functional stateless tests (release, wide parts enabled)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -371,7 +381,7 @@ }, "Functional stateless tests (release, DatabaseOrdinary)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -383,7 +393,7 @@ }, "Functional stateless tests (release, DatabaseReplicated)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -395,7 +405,7 @@ }, "Stress test (address)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -407,7 +417,7 @@ }, "Stress test (thread)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -419,7 +429,7 @@ }, "Stress test (undefined)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -431,7 +441,7 @@ }, "Stress test (memory)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -443,7 +453,7 @@ }, "Stress test (debug)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -455,7 +465,7 @@ }, "Integration tests (asan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -467,7 +477,7 @@ }, "Integration tests (thread)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -479,7 +489,7 @@ }, "Integration tests (release)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -491,7 +501,7 @@ }, "Integration tests (memory)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -503,7 +513,7 @@ }, "Integration tests flaky check (asan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -515,7 +525,7 @@ }, "Compatibility check": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -527,7 +537,7 @@ }, "Split build smoke test": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -539,7 +549,7 @@ }, "Testflows check": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -551,7 +561,7 @@ }, "Unit tests release gcc": { "required_build_properties": { - "compiler": "gcc-10", + "compiler": "gcc-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -563,7 +573,7 @@ }, "Unit tests release clang": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -575,7 +585,7 @@ }, "Unit tests ASAN": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -587,7 +597,7 @@ }, "Unit tests MSAN": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -599,7 +609,7 @@ }, "Unit tests TSAN": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -611,7 +621,7 @@ }, "Unit tests UBSAN": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -623,7 +633,7 @@ }, "AST fuzzer (debug)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "debug", "sanitizer": "none", @@ -635,7 +645,7 @@ }, "AST fuzzer (ASan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -647,7 +657,7 @@ }, "AST fuzzer (MSan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -659,7 +669,7 @@ }, "AST fuzzer (TSan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -671,7 +681,7 @@ }, "AST fuzzer (UBSan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -683,7 +693,7 @@ }, "Release": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -695,7 +705,7 @@ }, "Functional stateless tests flaky check (address)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-13", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -704,6 +714,18 @@ "clang-tidy": "disable", "with_coverage": false } + }, + "ClickHouse Keeper Jepsen": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "binary", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang-tidy": "disable", + "with_coverage": false + } } } } diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py new file mode 100644 index 000000000000..7924a726a2e0 --- /dev/null +++ b/tests/ci/ci_config.py @@ -0,0 +1,736 @@ +#!/usr/bin/env python3 + +CI_CONFIG = { + "build_config": [ + { + "compiler": "clang-13", + "build_type": "", + "sanitizer": "", + "package_type": "deb", + "bundled": "bundled", + "splitted": "unsplitted", + "alien_pkgs": True, + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13", + "build_type": "", + "sanitizer": "", + "package_type": "performance", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "gcc-11", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13", + "build_type": "", + "sanitizer": "address", + "package_type": "deb", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13", + "build_type": "", + "sanitizer": "undefined", + "package_type": "deb", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13", + "build_type": "", + "sanitizer": "thread", + "package_type": "deb", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13", + "build_type": "", + "sanitizer": "memory", + "package_type": "deb", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13", + "build_type": "debug", + "sanitizer": "", + "package_type": "deb", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "gcc-11", + "build_type": "", + "sanitizer": "", + "package_type": "deb", + "bundled": "unbundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + } + ], + "special_build_config": [ + { + "compiler": "clang-13", + "build_type": "debug", + "sanitizer": "", + "package_type": "deb", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "enable", + "with_coverage": False + }, + { + "compiler": "clang-13", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "bundled": "bundled", + "splitted": "splitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13-darwin", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13-aarch64", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13-freebsd", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13-darwin-aarch64", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + }, + { + "compiler": "clang-13-ppc64le", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "bundled": "bundled", + "splitted": "unsplitted", + "tidy": "disable", + "with_coverage": False + } + ], + "tests_config": { + "Stateful tests (address, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "address", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateful tests (thread, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "thread", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateful tests (memory, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "memory", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateful tests (ubsan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "undefined", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateful tests (debug, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "debug", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateful tests (release, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateful tests (release, DatabaseOrdinary, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateful tests (release, DatabaseReplicated, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (address, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "address", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (thread, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "thread", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (memory, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "memory", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (ubsan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "undefined", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (debug, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "debug", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (release, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (unbundled, actions)": { + "required_build_properties": { + "compiler": "gcc-11", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "unbundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (release, wide parts enabled, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (release, DatabaseOrdinary, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests (release, DatabaseReplicated, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stress test (address, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "address", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stress test (thread, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "thread", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stress test (undefined, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "undefined", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stress test (memory, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "memory", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stress test (debug, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "debug", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Integration tests (asan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "address", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Integration tests (thread, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "thread", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Integration tests (release, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Integration tests (memory, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "memory", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Integration tests flaky check (asan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "address", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Compatibility check (actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Split build smoke test (actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "binary", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "splitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Testflows check (actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Unit tests (release-gcc, actions)": { + "required_build_properties": { + "compiler": "gcc-11", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Unit tests (release-clang, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "binary", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Unit tests (asan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "address", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Unit tests (msan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "memory", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Unit tests (tsan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "thread", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Unit tests (ubsan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "thread", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "AST fuzzer (debug, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "debug", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "AST fuzzer (ASan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "address", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "AST fuzzer (MSan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "memory", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "AST fuzzer (TSan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "thread", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "AST fuzzer (UBSan, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "undefined", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Release (actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "Stateless tests flaky check (address, actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "deb", + "build_type": "relwithdebuginfo", + "sanitizer": "address", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + }, + "ClickHouse Keeper Jepsen (actions)": { + "required_build_properties": { + "compiler": "clang-13", + "package_type": "binary", + "build_type": "relwithdebuginfo", + "sanitizer": "none", + "bundled": "bundled", + "splitted": "unsplitted", + "clang_tidy": "disable", + "with_coverage": False + } + } + } +} + +def build_config_to_string(build_config): + if build_config["package_type"] == "performance": + return "performance" + + return "_".join([ + build_config['compiler'], + build_config['build_type'] if build_config['build_type'] else "relwithdebuginfo", + build_config['sanitizer'] if build_config['sanitizer'] else "none", + build_config['bundled'], + build_config['splitted'], + 'tidy' if 'tidy' in build_config and build_config['tidy'] == 'enable' else 'notidy', + 'with_coverage' if 'with_coverage' in build_config and build_config['with_coverage'] else 'without_coverage', + build_config['package_type'], + ]) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py new file mode 100644 index 000000000000..0b9df6cb8683 --- /dev/null +++ b/tests/ci/clickhouse_helper.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +import time +import logging +import json + +import requests +from get_robot_token import get_parameter_from_ssm + +class ClickHouseHelper: + def __init__(self, url=None, user=None, password=None): + if url is None: + url = get_parameter_from_ssm("clickhouse-test-stat-url") + + self.url = url + self.auth = { + 'X-ClickHouse-User': user if user is not None else get_parameter_from_ssm("clickhouse-test-stat-login"), + 'X-ClickHouse-Key': password if password is not None else get_parameter_from_ssm("clickhouse-test-stat-password") + } + + def _insert_json_str_info(self, db, table, json_str): + params = { + 'database': db, + 'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table), + 'date_time_input_format': 'best_effort', + 'send_logs_level': 'warning', + } + + for i in range(5): + response = requests.post(self.url, params=params, data=json_str, headers=self.auth, verify=False) + + logging.info("Response content '%s'", response.content) + + if response.ok: + break + + error = ( + "Cannot insert data into clickhouse at try " + str(i) + + ": HTTP code " + str(response.status_code) + ": '" + + str(response.text) + "'") + + if response.status_code >= 500: + # A retriable error + time.sleep(1) + continue + + logging.info("Request headers '%s', body '%s'", response.request.headers, response.request.body) + + raise Exception(error) + else: + raise Exception(error) + + def insert_event_into(self, db, table, event): + event_str = json.dumps(event) + self._insert_json_str_info(db, table, event_str) + + def insert_events_into(self, db, table, events): + jsons = [] + for event in events: + jsons.append(json.dumps(event)) + + self._insert_json_str_info(db, table, ','.join(jsons)) + + def _select_and_get_json_each_row(self, db, query): + params = { + 'database': db, + 'query': query, + 'default_format': 'JSONEachRow', + } + for i in range(5): + response = None + try: + response = requests.get(self.url, params=params, headers=self.auth, verify=False) + response.raise_for_status() + return response.text + except Exception as ex: + logging.warning("Cannot insert with exception %s", str(ex)) + if response: + logging.warning("Reponse text %s", response.text) + time.sleep(0.1 * i) + + raise Exception("Cannot insert data into clickhouse") + + def select_json_each_row(self, db, query): + text = self._select_and_get_json_each_row(db, query) + result = [] + for line in text.split('\n'): + if line: + result.append(json.loads(line)) + return result + +def prepare_tests_results_for_clickhouse( + pr_info, test_results, + check_status, check_duration, check_start_time, + report_url, check_name): + + pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + base_ref = "master" + head_ref = "master" + base_repo = pr_info.repo_full_name + head_repo = pr_info.repo_full_name + if pr_info.number != 0: + pull_request_url = pr_info.pr_html_url + base_ref = pr_info.base_ref + base_repo = pr_info.base_name + head_ref = pr_info.head_ref + head_repo = pr_info.head_name + + common_properties = dict( + pull_request_number=pr_info.number, + commit_sha=pr_info.sha, + commit_url=pr_info.commit_html_url, + check_name=check_name, + check_status=check_status, + check_duration_ms=int(float(check_duration) * 1000), + check_start_time=check_start_time, + report_url=report_url, + pull_request_url=pull_request_url, + base_ref=base_ref, + base_repo=base_repo, + head_ref=head_ref, + head_repo=head_repo, + task_url=pr_info.task_url, + ) + + # Always publish a total record for all checks. For checks with individual + # tests, also publish a record per test. + result = [common_properties] + for test_result in test_results: + current_row = common_properties.copy() + test_name = test_result[0] + test_status = test_result[1] + + test_time = 0 + if len(test_result) > 2 and test_result[2]: + test_time = test_result[2] + current_row['test_duration_ms'] = int(float(test_time) * 1000) + current_row['test_name'] = test_name + current_row['test_status'] = test_status + result.append(current_row) + + return result + +def mark_flaky_tests(clickhouse_helper, check_name, test_results): + try: + query = """ + SELECT DISTINCT test_name + FROM checks + WHERE + check_start_time BETWEEN now() - INTERVAL 3 DAY AND now() + AND check_name = '{check_name}' + AND (test_status = 'FAIL' OR test_status = 'FLAKY') + AND pull_request_number = 0 + """.format(check_name=check_name) + + tests_data = clickhouse_helper.select_json_each_row('gh-data', query) + master_failed_tests = {row['test_name'] for row in tests_data} + logging.info("Found flaky tests: %s", ', '.join(master_failed_tests)) + + for test_result in test_results: + if test_result[1] == 'FAIL' and test_result[0] in master_failed_tests: + test_result[1] = 'FLAKY' + except Exception as ex: + logging.info("Exception happened during flaky tests fetch %s", ex) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py new file mode 100644 index 000000000000..5bdbf6347159 --- /dev/null +++ b/tests/ci/commit_status_helper.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 + +import os + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +def post_commit_status(gh, sha, check_name, description, state, report_url): + commit = get_commit(gh, sha) + commit.create_status(context=check_name, description=description, state=state, target_url=report_url) diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py new file mode 100644 index 000000000000..b6a8f67aa5f4 --- /dev/null +++ b/tests/ci/compatibility_check.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 + +from distutils.version import StrictVersion +import logging +import os +import json +import subprocess + +from github import Github + +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from build_download_helper import download_builds_filter +from upload_result_helper import upload_results +from docker_pull_helper import get_images_with_versions +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + +IMAGE_UBUNTU = "clickhouse/test-old-ubuntu" +IMAGE_CENTOS = "clickhouse/test-old-centos" +MAX_GLIBC_VERSION = '2.4' +DOWNLOAD_RETRIES_COUNT = 5 +CHECK_NAME = "Compatibility check (actions)" + +def process_os_check(log_path): + name = os.path.basename(log_path) + with open(log_path, 'r') as log: + line = log.read().split('\n')[0].strip() + if line != 'OK': + return (name, "FAIL") + else: + return (name, "OK") + +def process_glibc_check(log_path): + bad_lines = [] + with open(log_path, 'r') as log: + for line in log: + if line.strip(): + columns = line.strip().split(' ') + symbol_with_glibc = columns[-2] # sysconf@GLIBC_2.2.5 + _, version = symbol_with_glibc.split('@GLIBC_') + if version == 'PRIVATE': + bad_lines.append((symbol_with_glibc, "FAIL")) + elif StrictVersion(version) > MAX_GLIBC_VERSION: + bad_lines.append((symbol_with_glibc, "FAIL")) + if not bad_lines: + bad_lines.append(("glibc check", "OK")) + return bad_lines + +def process_result(result_folder, server_log_folder): + summary = process_glibc_check(os.path.join(result_folder, "glibc.log")) + + status = "success" + description = "Compatibility check passed" + if len(summary) > 1 or summary[0][1] != "OK": + status = "failure" + description = "glibc check failed" + + if status == "success": + for operating_system in ("ubuntu:12.04", "centos:5"): + result = process_os_check(os.path.join(result_folder, operating_system)) + if result[1] != "OK": + status = "failure" + description = f"Old {operating_system} failed" + summary += [result] + break + summary += [result] + + server_log_path = os.path.join(server_log_folder, "clickhouse-server.log") + stderr_log_path = os.path.join(server_log_folder, "stderr.log") + client_stderr_log_path = os.path.join(server_log_folder, "clientstderr.log") + result_logs = [] + if os.path.exists(server_log_path): + result_logs.append(server_log_path) + + if os.path.exists(stderr_log_path): + result_logs.append(stderr_log_path) + + if os.path.exists(client_stderr_log_path): + result_logs.append(client_stderr_log_path) + + return status, description, summary, result_logs + + +def get_run_commands(build_path, result_folder, server_log_folder, image_centos, image_ubuntu): + return [ + f"readelf -s {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log", + f"readelf -s {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log", + f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \ + f"--volume={build_path}/etc/clickhouse-server:/config " \ + f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04", + f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \ + f"--volume={build_path}/etc/clickhouse-server:/config " \ + f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5", + ] + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) + reports_path = os.getenv("REPORTS_PATH", "./reports") + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + + gh = Github(get_best_robot_token()) + + docker_images = get_images_with_versions(reports_path, [IMAGE_CENTOS, IMAGE_UBUNTU]) + + packages_path = os.path.join(temp_path, "packages") + if not os.path.exists(packages_path): + os.makedirs(packages_path) + + def url_filter(url): + return url.endswith('.deb') and ('clickhouse-common-static_' in url or 'clickhouse-server_' in url) + + download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter) + + for f in os.listdir(packages_path): + if '.deb' in f: + full_path = os.path.join(packages_path, f) + subprocess.check_call(f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True) + + server_log_path = os.path.join(temp_path, "server_log") + if not os.path.exists(server_log_path): + os.makedirs(server_log_path) + + result_path = os.path.join(temp_path, "result_path") + if not os.path.exists(result_path): + os.makedirs(result_path) + + run_commands = get_run_commands(packages_path, result_path, server_log_path, docker_images[0], docker_images[1]) + + state = "success" + for run_command in run_commands: + try: + logging.info("Running command %s", run_command) + subprocess.check_call(run_command, shell=True) + except subprocess.CalledProcessError as ex: + logging.info("Exception calling command %s", ex) + state = "failure" + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + s3_helper = S3Helper('https://s3.amazonaws.com') + state, description, test_results, additional_logs = process_result(result_path, server_log_path) + + ch_helper = ClickHouseHelper() + mark_flaky_tests(ch_helper, CHECK_NAME, test_results) + + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME) + print(f"::notice ::Report url: {report_url}") + post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py new file mode 100644 index 000000000000..f3d2349408f3 --- /dev/null +++ b/tests/ci/compress_files.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import subprocess +import logging +import os + +def compress_file_fast(path, archive_path): + if os.path.exists('/usr/bin/pigz'): + subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True) + else: + subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True) + + +def compress_fast(path, archive_path, exclude=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster") + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, compressing with default tar") + + if exclude is None: + exclude_part = "" + elif isinstance(exclude, list): + exclude_part = " ".join(["--exclude {}".format(x) for x in exclude]) + else: + exclude_part = "--exclude {}".format(str(exclude)) + + fname = os.path.basename(path) + if os.path.isfile(path): + path = os.path.dirname(path) + else: + path += "/.." + cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname) + logging.debug("compress_fast cmd: %s", cmd) + subprocess.check_call(cmd, shell=True) + + +def decompress_fast(archive_path, result_path=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, result_path) + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, decompressing with default tar ('%s' -> '%s')", archive_path, result_path) + + if result_path is None: + subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True) + else: + subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py new file mode 100644 index 000000000000..0482f05f284d --- /dev/null +++ b/tests/ci/docker_images_check.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +import subprocess +import logging +import json +import os +import time +import shutil +from github import Github +from s3_helper import S3Helper +from pr_info import PRInfo +from get_robot_token import get_best_robot_token, get_parameter_from_ssm +from upload_result_helper import upload_results +from commit_status_helper import get_commit +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + +NAME = "Push to Dockerhub (actions)" + +def get_changed_docker_images(pr_info, repo_path, image_file_path): + images_dict = {} + path_to_images_file = os.path.join(repo_path, image_file_path) + if os.path.exists(path_to_images_file): + with open(path_to_images_file, 'r') as dict_file: + images_dict = json.load(dict_file) + else: + logging.info("Image file %s doesnt exists in repo %s", image_file_path, repo_path) + + dockerhub_repo_name = 'yandex' + if not images_dict: + return [], dockerhub_repo_name + + files_changed = pr_info.changed_files + + logging.info("Changed files for PR %s @ %s: %s", pr_info.number, pr_info.sha, str(files_changed)) + + changed_images = [] + + for dockerfile_dir, image_description in images_dict.items(): + if image_description['name'].startswith('clickhouse/'): + dockerhub_repo_name = 'clickhouse' + + for f in files_changed: + if f.startswith(dockerfile_dir): + logging.info( + "Found changed file '%s' which affects docker image '%s' with path '%s'", + f, image_description['name'], dockerfile_dir) + changed_images.append(dockerfile_dir) + break + + # The order is important: dependents should go later than bases, so that + # they are built with updated base versions. + index = 0 + while index < len(changed_images): + image = changed_images[index] + for dependent in images_dict[image]['dependent']: + logging.info( + "Marking docker image '%s' as changed because it depends on changed docker image '%s'", + dependent, image) + changed_images.append(dependent) + index += 1 + if index > 100: + # Sanity check to prevent infinite loop. + raise RuntimeError("Too many changed docker images, this is a bug." + str(changed_images)) + + # If a dependent image was already in the list because its own files + # changed, but then it was added as a dependent of a changed base, we + # must remove the earlier entry so that it doesn't go earlier than its + # base. This way, the dependent will be rebuilt later than the base, and + # will correctly use the updated version of the base. + seen = set() + no_dups_reversed = [] + for x in reversed(changed_images): + if x not in seen: + seen.add(x) + no_dups_reversed.append(x) + + result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)] + logging.info("Changed docker images for PR %s @ %s: '%s'", pr_info.number, pr_info.sha, result) + return result, dockerhub_repo_name + +def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string): + logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder) + build_log = None + push_log = None + with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(pl.name) + if retcode != 0: + return False, build_log, None + + with open('tag_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(pl.name) + if retcode != 0: + return False, build_log, None + + logging.info("Pushing image %s to dockerhub", image_name) + + with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: + cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + push_log = str(pl.name) + if retcode != 0: + return False, build_log, push_log + + logging.info("Processing of %s successfully finished", image_name) + return True, build_log, push_log + +def process_single_image(versions, path_to_dockerfile_folder, image_name): + logging.info("Image will be pushed with versions %s", ', '.join(versions)) + result = [] + for ver in versions: + for i in range(5): + success, build_log, push_log = build_and_push_one_image(path_to_dockerfile_folder, image_name, ver) + if success: + result.append((image_name + ":" + ver, build_log, push_log, 'OK')) + break + logging.info("Got error will retry %s time and sleep for %s seconds", i, i * 5) + time.sleep(i * 5) + else: + result.append((image_name + ":" + ver, build_log, push_log, 'FAIL')) + + logging.info("Processing finished") + return result + + +def process_test_results(s3_client, test_results, s3_path_prefix): + overall_status = 'success' + processed_test_results = [] + for image, build_log, push_log, status in test_results: + if status != 'OK': + overall_status = 'failure' + url_part = '' + if build_log is not None and os.path.exists(build_log): + build_url = s3_client.upload_test_report_to_s3( + build_log, + s3_path_prefix + "/" + os.path.basename(build_log)) + url_part += 'build_log'.format(build_url) + if push_log is not None and os.path.exists(push_log): + push_url = s3_client.upload_test_report_to_s3( + push_log, + s3_path_prefix + "/" + os.path.basename(push_log)) + if url_part: + url_part += ', ' + url_part += 'push_log'.format(push_url) + if url_part: + test_name = image + ' (' + url_part + ')' + else: + test_name = image + processed_test_results.append((test_name, status)) + return overall_status, processed_test_results + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') + dockerhub_password = get_parameter_from_ssm('dockerhub_robot_password') + + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, False, True) + changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") + logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) + pr_commit_version = str(pr_info.number) + '-' + pr_info.sha + versions = [str(pr_info.number), pr_commit_version] + if pr_info.number == 0: + versions.append("latest") + + subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True) + + result_images = {} + images_processing_result = [] + for rel_path, image_name in changed_images: + full_path = os.path.join(repo_path, rel_path) + images_processing_result += process_single_image(versions, full_path, image_name) + result_images[image_name] = pr_commit_version + + if changed_images: + description = "Updated " + ','.join([im[1] for im in changed_images]) + else: + description = "Nothing to update" + + if len(description) >= 140: + description = description[:136] + "..." + + s3_helper = S3Helper('https://s3.amazonaws.com') + + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) + + ch_helper = ClickHouseHelper() + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) + + with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: + json.dump(result_images, images_file) + + print("::notice ::Report url: {}".format(url)) + print("::set-output name=url_output::\"{}\"".format(url)) + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, status, stopwatch.duration_seconds, stopwatch.start_time_str, url, NAME) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/docker_pull_helper.py b/tests/ci/docker_pull_helper.py new file mode 100644 index 000000000000..f98047448208 --- /dev/null +++ b/tests/ci/docker_pull_helper.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +import os +import json +import time +import subprocess +import logging + +class DockerImage: + def __init__(self, name, version=None): + self.name = name + if version is None: + self.version = 'latest' + else: + self.version = version + + def __str__(self): + return f"{self.name}:{self.version}" + +def get_images_with_versions(reports_path, required_image, pull=True): + images_path = None + for root, _, files in os.walk(reports_path): + for f in files: + if f == 'changed_images.json': + images_path = os.path.join(root, 'changed_images.json') + break + + if images_path is not None and os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r', encoding='utf-8') as images_fd: + images = json.load(images_fd) + logging.info("Got images %s", images) + else: + images = {} + + docker_images = [] + for image_name in required_image: + docker_image = DockerImage(image_name) + if image_name in images: + docker_image.version = images[image_name] + docker_images.append(docker_image) + + if pull: + for docker_image in docker_images: + for i in range(10): + try: + logging.info("Pulling image %s", docker_image) + latest_error = subprocess.check_output(f"docker pull {docker_image}", stderr=subprocess.STDOUT, shell=True) + break + except Exception as ex: + time.sleep(i * 3) + logging.info("Got execption pulling docker %s", ex) + else: + raise Exception(f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}") + + return docker_images + +def get_image_with_version(reports_path, image, pull=True): + return get_images_with_versions(reports_path, [image], pull)[0] diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py new file mode 100644 index 000000000000..11ff68e0286c --- /dev/null +++ b/tests/ci/docs_check.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +import logging +import subprocess +import os +import json +import sys +from github import Github +from s3_helper import S3Helper +from pr_info import PRInfo +from get_robot_token import get_best_robot_token +from upload_result_helper import upload_results +from docker_pull_helper import get_image_with_version +from commit_status_helper import post_commit_status, get_commit +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + + +NAME = "Docs Check (actions)" + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = os.path.join(os.getenv("TEMP_PATH")) + repo_path = os.path.join(os.getenv("REPO_COPY")) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_changed_files=True) + + gh = Github(get_best_robot_token()) + if not pr_info.has_changes_in_documentation(): + logging.info ("No changes in documentation") + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description="No changes in docs", state="success") + sys.exit(0) + + logging.info("Has changes in docs") + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + docker_image = get_image_with_version(temp_path, 'clickhouse/docs-check') + + test_output = os.path.join(temp_path, 'docs_check_log') + if not os.path.exists(test_output): + os.makedirs(test_output) + + cmd = f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}" + + run_log_path = os.path.join(test_output, 'runlog.log') + + with open(run_log_path, 'w', encoding='utf-8') as log: + with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + status = "success" + description = "Docs check passed" + else: + description = "Docs check failed (non zero exit code)" + status = "failure" + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + files = os.listdir(test_output) + lines = [] + additional_files = [] + if not files: + logging.error("No output files after docs check") + description = "No output files after docs check" + status = "failure" + else: + for f in files: + path = os.path.join(test_output, f) + additional_files.append(path) + with open(path, 'r', encoding='utf-8') as check_file: + for line in check_file: + if "ERROR" in line: + lines.append((line.split(':')[-1], "FAIL")) + if lines: + status = "failure" + description = "Found errors in docs" + elif status != "failure": + lines.append(("No errors found", "OK")) + else: + lines.append(("Non zero exit code", "FAIL")) + + s3_helper = S3Helper('https://s3.amazonaws.com') + ch_helper = ClickHouseHelper() + + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME) + print("::notice ::Report url: {report_url}") + post_commit_status(gh, pr_info.sha, NAME, description, status, report_url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, lines, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py new file mode 100644 index 000000000000..6ca45d638582 --- /dev/null +++ b/tests/ci/docs_release.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +import logging +import subprocess +import os +import json +import sys + +from github import Github + +from s3_helper import S3Helper +from pr_info import PRInfo +from get_robot_token import get_best_robot_token +from ssh import SSHKey +from upload_result_helper import upload_results +from docker_pull_helper import get_image_with_version +from commit_status_helper import get_commit + +NAME = "Docs Release (actions)" + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + temp_path = os.path.join(os.getenv("TEMP_PATH")) + repo_path = os.path.join(os.getenv("REPO_COPY")) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_changed_files=True) + + gh = Github(get_best_robot_token()) + if not pr_info.has_changes_in_documentation(): + logging.info ("No changes in documentation") + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description="No changes in docs", state="success") + sys.exit(0) + + logging.info("Has changes in docs") + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + docker_image = get_image_with_version(temp_path, 'clickhouse/docs-release') + + test_output = os.path.join(temp_path, 'docs_release_log') + if not os.path.exists(test_output): + os.makedirs(test_output) + + token = os.getenv('CLOUDFLARE_TOKEN') + cmd = "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent " \ + f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}" + + run_log_path = os.path.join(test_output, 'runlog.log') + + with open(run_log_path, 'w', encoding='utf-8') as log, SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): + with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + status = "success" + description = "Released successfuly" + else: + description = "Release failed (non zero exit code)" + status = "failure" + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + files = os.listdir(test_output) + lines = [] + additional_files = [] + if not files: + logging.error("No output files after docs release") + description = "No output files after docs release" + status = "failure" + else: + for f in files: + path = os.path.join(test_output, f) + additional_files.append(path) + with open(path, 'r', encoding='utf-8') as check_file: + for line in check_file: + if "ERROR" in line: + lines.append((line.split(':')[-1], "FAIL")) + if lines: + status = "failure" + description = "Found errors in docs" + elif status != "failure": + lines.append(("No errors found", "OK")) + else: + lines.append(("Non zero exit code", "FAIL")) + + s3_helper = S3Helper('https://s3.amazonaws.com') + + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME) + print("::notice ::Report url: {report_url}") + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py new file mode 100644 index 000000000000..2734102be3f5 --- /dev/null +++ b/tests/ci/fast_test_check.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 + +import logging +import subprocess +import os +import json +import csv +from github import Github +from pr_info import PRInfo +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from upload_result_helper import upload_results +from docker_pull_helper import get_image_with_version +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + +NAME = 'Fast test (actions)' + +def get_fasttest_cmd(workspace, output_path, ccache_path, repo_path, pr_number, commit_sha, image): + return f"docker run --cap-add=SYS_PTRACE " \ + f"-e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output " \ + f"-e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE " \ + f"-e PULL_REQUEST_NUMBER={pr_number} -e COMMIT_SHA={commit_sha} -e COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 " \ + f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/ClickHouse --volume={output_path}:/test_output "\ + f"--volume={ccache_path}:/fasttest-workspace/ccache {image}" + + +def process_results(result_folder): + test_results = [] + additional_files = [] + # Just upload all files from result_folder. + # If task provides processed results, then it's responsible for content of result_folder. + if os.path.exists(result_folder): + test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + additional_files = [os.path.join(result_folder, f) for f in test_files] + + status_path = os.path.join(result_folder, "check_status.tsv") + logging.info("Found test_results.tsv") + status = list(csv.reader(open(status_path, 'r'), delimiter='\t')) + if len(status) != 1 or len(status[0]) != 2: + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + results_path = os.path.join(result_folder, "test_results.tsv") + test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t')) + if len(test_results) == 0: + raise Exception("Empty results") + + return state, description, test_results, additional_files + + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + caches_path = os.getenv("CACHES_PATH", temp_path) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + + gh = Github(get_best_robot_token()) + + docker_image = get_image_with_version(temp_path, 'clickhouse/fasttest') + + s3_helper = S3Helper('https://s3.amazonaws.com') + + workspace = os.path.join(temp_path, "fasttest-workspace") + if not os.path.exists(workspace): + os.makedirs(workspace) + + output_path = os.path.join(temp_path, "fasttest-output") + if not os.path.exists(output_path): + os.makedirs(output_path) + + cache_path = os.path.join(caches_path, "fasttest") + if not os.path.exists(cache_path): + os.makedirs(cache_path) + + repo_path = os.path.join(temp_path, "fasttest-repo") + if not os.path.exists(repo_path): + os.makedirs(repo_path) + + run_cmd = get_fasttest_cmd(workspace, output_path, cache_path, repo_path, pr_info.number, pr_info.sha, docker_image) + logging.info("Going to run fasttest with cmd %s", run_cmd) + + logs_path = os.path.join(temp_path, "fasttest-logs") + if not os.path.exists(logs_path): + os.makedirs(logs_path) + + run_log_path = os.path.join(logs_path, 'runlog.log') + with open(run_log_path, 'w') as log: + retcode = subprocess.Popen(run_cmd, shell=True, stderr=log, stdout=log).wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {cache_path}", shell=True) + + test_output_files = os.listdir(output_path) + additional_logs = [] + for f in test_output_files: + additional_logs.append(os.path.join(output_path, f)) + + test_log_exists = 'test_log.txt' in test_output_files or 'test_result.txt' in test_output_files + test_result_exists = 'test_results.tsv' in test_output_files + test_results = [] + if 'submodule_log.txt' not in test_output_files: + description = "Cannot clone repository" + state = "failure" + elif 'cmake_log.txt' not in test_output_files: + description = "Cannot fetch submodules" + state = "failure" + elif 'build_log.txt' not in test_output_files: + description = "Cannot finish cmake" + state = "failure" + elif 'install_log.txt' not in test_output_files: + description = "Cannot build ClickHouse" + state = "failure" + elif not test_log_exists and not test_result_exists: + description = "Cannot install or start ClickHouse" + state = "failure" + else: + state, description, test_results, additional_logs = process_results(output_path) + + ch_helper = ClickHouseHelper() + mark_flaky_tests(ch_helper, NAME, test_results) + + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, NAME, True) + print("::notice ::Report url: {}".format(report_url)) + post_commit_status(gh, pr_info.sha, NAME, description, state, report_url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py new file mode 100644 index 000000000000..c38b3c094485 --- /dev/null +++ b/tests/ci/finish_check.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import logging +import json +import os +from github import Github +from pr_info import PRInfo +from get_robot_token import get_best_robot_token +from commit_status_helper import get_commit + +NAME = 'Run Check (actions)' + +def filter_statuses(statuses): + """ + Squash statuses to latest state + 1. context="first", state="success", update_time=1 + 2. context="second", state="success", update_time=2 + 3. context="first", stat="failure", update_time=3 + =========> + 1. context="second", state="success" + 2. context="first", stat="failure" + """ + filt = {} + for status in sorted(statuses, key=lambda x: x.updated_at): + filt[status.context] = status + return filt + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_orgs=True) + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + + url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + statuses = filter_statuses(list(commit.get_statuses())) + if NAME in statuses and statuses[NAME].state == "pending": + commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py new file mode 100644 index 000000000000..dc91ec071639 --- /dev/null +++ b/tests/ci/functional_test_check.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 + +import csv +import logging +import subprocess +import os +import json +import sys + +from github import Github + +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from build_download_helper import download_all_deb_packages +from upload_result_helper import upload_results +from docker_pull_helper import get_image_with_version +from commit_status_helper import post_commit_status, get_commit +from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + + +def get_image_name(check_name): + if 'stateless' in check_name.lower(): + return 'clickhouse/stateless-test' + if 'stateful' in check_name.lower(): + return 'clickhouse/stateful-test' + else: + raise Exception(f"Cannot deduce image name based on check name {check_name}") + +def get_run_command(builds_path, result_path, server_log_path, kill_timeout, additional_envs, image, flaky_check, tests_to_run): + additional_options = ['--hung-check'] + additional_options.append('--print-time') + + if tests_to_run: + additional_options += tests_to_run + + additional_options_str = '-e ADDITIONAL_OPTIONS="' + ' '.join(additional_options) + '"' + + envs = [f'-e MAX_RUN_TIME={int(0.9 * kill_timeout)}', '-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"'] + + if flaky_check: + envs += ['-e NUM_TRIES=100', '-e MAX_RUN_TIME=1800'] + + envs += [f'-e {e}' for e in additional_envs] + + env_str = ' '.join(envs) + + return f"docker run --volume={builds_path}:/package_folder " \ + f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " \ + f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" + + +def get_tests_to_run(pr_info): + result = set([]) + + if pr_info.changed_files is None: + return [] + + for fpath in pr_info.changed_files: + if 'tests/queries/0_stateless/0' in fpath: + logging.info('File %s changed and seems like stateless test', fpath) + fname = fpath.split('/')[3] + fname_without_ext = os.path.splitext(fname)[0] + result.add(fname_without_ext + '.') + return list(result) + +def process_results(result_folder, server_log_path): + test_results = [] + additional_files = [] + # Just upload all files from result_folder. + # If task provides processed results, then it's responsible for content of result_folder. + if os.path.exists(result_folder): + test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + additional_files = [os.path.join(result_folder, f) for f in test_files] + + if os.path.exists(server_log_path): + server_log_files = [f for f in os.listdir(server_log_path) if os.path.isfile(os.path.join(server_log_path, f))] + additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files] + + status_path = os.path.join(result_folder, "check_status.tsv") + logging.info("Found test_results.tsv") + with open(status_path, 'r', encoding='utf-8') as status_file: + status = list(csv.reader(status_file, delimiter='\t')) + + if len(status) != 1 or len(status[0]) != 2: + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + results_path = os.path.join(result_folder, "test_results.tsv") + with open(results_path, 'r', encoding='utf-8') as results_file: + test_results = list(csv.reader(results_file, delimiter='\t')) + if len(test_results) == 0: + raise Exception("Empty results") + + return state, description, test_results, additional_files + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) + reports_path = os.getenv("REPORTS_PATH", "./reports") + + check_name = sys.argv[1] + kill_timeout = int(sys.argv[2]) + flaky_check = 'flaky' in check_name.lower() + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: + event = json.load(event_file) + + gh = Github(get_best_robot_token()) + pr_info = PRInfo(event, need_changed_files=flaky_check) + tests_to_run = [] + if flaky_check: + tests_to_run = get_tests_to_run(pr_info) + if not tests_to_run: + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=check_name, description='Not found changed stateless tests', state='success') + sys.exit(0) + + + image_name = get_image_name(check_name) + docker_image = get_image_with_version(reports_path, image_name) + + packages_path = os.path.join(temp_path, "packages") + if not os.path.exists(packages_path): + os.makedirs(packages_path) + + download_all_deb_packages(check_name, reports_path, packages_path) + + server_log_path = os.path.join(temp_path, "server_log") + if not os.path.exists(server_log_path): + os.makedirs(server_log_path) + + result_path = os.path.join(temp_path, "result_path") + if not os.path.exists(result_path): + os.makedirs(result_path) + + run_log_path = os.path.join(result_path, "runlog.log") + + run_command = get_run_command(packages_path, result_path, server_log_path, kill_timeout, [], docker_image, flaky_check, tests_to_run) + logging.info("Going to run func tests: %s", run_command) + + with open(run_log_path, 'w', encoding='utf-8') as log: + with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + s3_helper = S3Helper('https://s3.amazonaws.com') + state, description, test_results, additional_logs = process_results(result_path, server_log_path) + + ch_helper = ClickHouseHelper() + mark_flaky_tests(ch_helper, check_name, test_results) + + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name) + + print(f"::notice ::Report url: {report_url}") + post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py new file mode 100644 index 000000000000..db37ee311c53 --- /dev/null +++ b/tests/ci/get_robot_token.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +import boto3 +from github import Github + +def get_parameter_from_ssm(name, decrypt=True, client=None): + if not client: + client = boto3.client('ssm', region_name='us-east-1') + return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value'] + +def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4): + client = boto3.client('ssm', region_name='us-east-1') + tokens = {} + for i in range(1, total_tokens + 1): + token_name = token_prefix_env_name + str(i) + token = get_parameter_from_ssm(token_name, True, client) + gh = Github(token) + rest, _ = gh.rate_limiting + tokens[token] = rest + + return max(tokens.items(), key=lambda x: x[1])[0] diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py new file mode 100644 index 000000000000..f6a46e72e848 --- /dev/null +++ b/tests/ci/integration_test_check.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 + +import os +import logging +import sys +import json +import subprocess +import csv + +from github import Github + +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from build_download_helper import download_all_deb_packages +from upload_result_helper import upload_results +from docker_pull_helper import get_images_with_versions +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + + +DOWNLOAD_RETRIES_COUNT = 5 + +IMAGES = [ + "yandex/clickhouse-integration-tests-runner", + "yandex/clickhouse-mysql-golang-client", + "yandex/clickhouse-mysql-java-client", + "yandex/clickhouse-mysql-js-client", + "yandex/clickhouse-mysql-php-client", + "yandex/clickhouse-postgresql-java-client", + "yandex/clickhouse-integration-test", + "yandex/clickhouse-kerberos-kdc", + "yandex/clickhouse-integration-helper", +] + +def get_json_params_dict(check_name, commit_sha, pr_number, docker_images): + return { + 'context_name': check_name, + 'commit': commit_sha, + 'pull_request': pr_number, + 'pr_info': None, + 'docker_images_with_versions': docker_images, + 'shuffle_test_groups': False, + 'use_tmpfs': False, + 'disable_net_host': True, + } + +def get_env_for_runner(build_path, repo_path, result_path, work_path): + binary_path = os.path.join(build_path, 'clickhouse') + odbc_bridge_path = os.path.join(build_path, 'clickhouse-odbc-bridge') + library_bridge_path = os.path.join(build_path, 'clickhouse-library-bridge') + + my_env = os.environ.copy() + my_env["CLICKHOUSE_TESTS_BUILD_PATH"] = build_path + my_env["CLICKHOUSE_TESTS_SERVER_BIN_PATH"] = binary_path + my_env["CLICKHOUSE_TESTS_CLIENT_BIN_PATH"] = binary_path + my_env["CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH"] = odbc_bridge_path + my_env["CLICKHOUSE_TESTS_LIBRARY_BRIDGE_BIN_PATH"] = library_bridge_path + my_env["CLICKHOUSE_TESTS_REPO_PATH"] = repo_path + my_env["CLICKHOUSE_TESTS_RESULT_PATH"] = result_path + my_env["CLICKHOUSE_TESTS_BASE_CONFIG_DIR"] = f"{repo_path}/programs/server" + my_env["CLICKHOUSE_TESTS_JSON_PARAMS_PATH"] = os.path.join(work_path, "params.json") + my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = '0' + + return my_env + +def process_results(result_folder): + test_results = [] + additional_files = [] + # Just upload all files from result_folder. + # If task provides processed results, then it's responsible for content of result_folder. + if os.path.exists(result_folder): + test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + additional_files = [os.path.join(result_folder, f) for f in test_files] + + status_path = os.path.join(result_folder, "check_status.tsv") + if os.path.exists(status_path): + logging.info("Found test_results.tsv") + with open(status_path, 'r', encoding='utf-8') as status_file: + status = list(csv.reader(status_file, delimiter='\t')) + else: + status = [] + + if len(status) != 1 or len(status[0]) != 2: + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + results_path = os.path.join(result_folder, "test_results.tsv") + with open(results_path, 'r', encoding='utf-8') as results_file: + test_results = list(csv.reader(results_file, delimiter='\t')) + if len(test_results) == 0: + raise Exception("Empty results") + + return state, description, test_results, additional_files + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) + reports_path = os.getenv("REPORTS_PATH", "./reports") + + check_name = sys.argv[1] + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + + gh = Github(get_best_robot_token()) + + images = get_images_with_versions(temp_path, IMAGES) + images_with_versions = {i.name: i.version for i in images} + result_path = os.path.join(temp_path, "output_dir") + if not os.path.exists(result_path): + os.makedirs(result_path) + + work_path = os.path.join(temp_path, "workdir") + if not os.path.exists(work_path): + os.makedirs(work_path) + + build_path = os.path.join(temp_path, "build") + if not os.path.exists(build_path): + os.makedirs(build_path) + + download_all_deb_packages(check_name, reports_path, build_path) + + my_env = get_env_for_runner(build_path, repo_path, result_path, work_path) + + json_path = os.path.join(work_path, 'params.json') + with open(json_path, 'w', encoding='utf-8') as json_params: + json_params.write(json.dumps(get_json_params_dict(check_name, pr_info.sha, pr_info.number, images_with_versions))) + + output_path_log = os.path.join(result_path, "main_script_log.txt") + + runner_path = os.path.join(repo_path, "tests/integration", "ci-runner.py") + run_command = f"sudo -E {runner_path} | tee {output_path_log}" + + with open(output_path_log, 'w', encoding='utf-8') as log: + with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log, env=my_env) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run tests successfully") + else: + logging.info("Some tests failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + state, description, test_results, additional_logs = process_results(result_path) + + ch_helper = ClickHouseHelper() + mark_flaky_tests(ch_helper, check_name, test_results) + + s3_helper = S3Helper('https://s3.amazonaws.com') + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name, False) + print(f"::notice ::Report url: {report_url}") + post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/metrics_lambda/Dockerfile b/tests/ci/metrics_lambda/Dockerfile new file mode 100644 index 000000000000..f53be71a8931 --- /dev/null +++ b/tests/ci/metrics_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py new file mode 100644 index 000000000000..4bf967a51e17 --- /dev/null +++ b/tests/ci/metrics_lambda/app.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True, False) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", headers=headers) + response.raise_for_status() + data = response.json() + total_runners = data['total_count'] + runners = data['runners'] + + total_pages = int(total_runners / 100 + 1) + print("Total pages", total_pages) + for i in range(2, total_pages + 1): + response = requests.get(f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100", headers=headers) + response.raise_for_status() + data = response.json() + runners += data['runners'] + + print("Total runners", len(runners)) + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def group_runners_by_tag(listed_runners): + result = {} + + RUNNER_TYPE_LABELS = ['style-checker', 'builder', 'func-tester', 'stress-tester'] + for runner in listed_runners: + for tag in runner.tags: + if tag in RUNNER_TYPE_LABELS: + if tag not in result: + result[tag] = [] + result[tag].append(runner) + break + else: + if 'unlabeled' not in result: + result['unlabeled'] = [] + result['unlabeled'].append(runner) + return result + + +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_ratio, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace=namespace, MetricData=metrics_data) + +def delete_runner(access_token, runner): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) + response.raise_for_status() + print(f"Response code deleting {runner.name} is {response.status_code}") + return response.status_code == 204 + +def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_runners): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runners = list_runners(access_token) + grouped_runners = group_runners_by_tag(runners) + for group, group_runners in grouped_runners.items(): + if push_to_cloudwatch: + push_metrics_to_cloudwatch(group_runners, 'RunnersMetrics/' + group) + else: + print(group, f"({len(group_runners)})") + for runner in group_runners: + print('\t', runner) + + if delete_offline_runners: + print("Going to delete offline runners") + for runner in runners: + if runner.offline: + print("Deleting runner", runner) + delete_runner(access_token, runner) + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-cloudwatch', action='store_true', help='Store received token in parameter store') + parser.add_argument('--delete-offline', action='store_true', help='Remove offline runners') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_cloudwatch, args.delete_offline) diff --git a/tests/ci/metrics_lambda/requirements.txt b/tests/ci/metrics_lambda/requirements.txt new file mode 100644 index 000000000000..c0dcf4a4dde7 --- /dev/null +++ b/tests/ci/metrics_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py new file mode 100644 index 000000000000..37fc17d52d80 --- /dev/null +++ b/tests/ci/pr_info.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +import os +import urllib + +import requests +from unidiff import PatchSet + + +DIFF_IN_DOCUMENTATION_EXT = [".html", ".md", ".yml", ".txt", ".css", ".js", ".xml", ".ico", ".conf", ".svg", ".png", ".jpg", ".py", ".sh"] + +class PRInfo: + def __init__(self, github_event, need_orgs=False, need_changed_files=False): + if 'pull_request' in github_event: # pull request and other similar events + self.number = github_event['number'] + if 'after' in github_event: + self.sha = github_event['after'] + else: + self.sha = github_event['pull_request']['head']['sha'] + + repo_prefix = f"{os.getenv('GITHUB_SERVER_URL', 'https://github.com')}/{os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse')}" + self.task_url = f"{repo_prefix}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + self.repo_full_name = os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse') + self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" + self.pr_html_url = f"{repo_prefix}/pull/{self.number}" + + self.base_ref = github_event['pull_request']['base']['ref'] + self.base_name = github_event['pull_request']['base']['repo']['full_name'] + self.head_ref = github_event['pull_request']['head']['ref'] + self.head_name = github_event['pull_request']['head']['repo']['full_name'] + + self.labels = { l['name'] for l in github_event['pull_request']['labels'] } + self.user_login = github_event['pull_request']['user']['login'] + self.user_orgs = set([]) + if need_orgs: + user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) + if user_orgs_response.ok: + response_json = user_orgs_response.json() + self.user_orgs = set(org['id'] for org in response_json) + + self.changed_files = set([]) + if need_changed_files: + diff_url = github_event['pull_request']['diff_url'] + diff = urllib.request.urlopen(diff_url) + diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) + self.changed_files = { f.path for f in diff_object } + + elif 'commits' in github_event: + self.number = 0 + self.sha = github_event['after'] + self.labels = {} + self.repo_full_name = os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse') + repo_prefix = f"{os.getenv('GITHUB_SERVER_URL', 'https://github.com')}/{os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse')}" + self.task_url = f"{repo_prefix}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" + self.pr_html_url = f"{repo_prefix}/commits/master" + self.base_ref = "master" + self.base_name = self.repo_full_name + self.head_ref = "master" + self.head_name = self.repo_full_name + + if need_changed_files: + commit_before = github_event['before'] + response = requests.get(f"{os.getenv('GITHUB_SERVER_URL')}/repos/{os.getenv('GITHUB_REPOSITORY')}/compare/{commit_before}...{self.sha}") + response.raise_for_status() + diff = response.json() + + if 'files' in diff: + self.changed_files = [f['filename'] for f in diff['files']] + else: + self.changed_files = set([]) + else: + self.changed_files = set([]) + else: + raise Exception("Cannot detect type of event") + + + def get_dict(self): + return { + 'sha': self.sha, + 'number': self.number, + 'labels': self.labels, + 'user_login': self.user_login, + 'user_orgs': self.user_orgs, + } + + def has_changes_in_documentation(self): + # If the list wasn't built yet the best we can do is to + # assume that there were changes. + if self.changed_files is None or not self.changed_files: + return True + + for f in self.changed_files: + _, ext = os.path.splitext(f) + if ext in DIFF_IN_DOCUMENTATION_EXT or 'Dockerfile' in f: + return True + return False + + +class FakePRInfo: + def __init__(self): + self.number = 11111 + self.sha = "xxxxxxxxxxxxxxxxxx" diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py new file mode 100644 index 000000000000..c55ef4dd5694 --- /dev/null +++ b/tests/ci/pvs_check.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 + +# pylint: disable=line-too-long + +import subprocess +import os +import json +import logging +import sys +from github import Github +from s3_helper import S3Helper +from pr_info import PRInfo +from get_robot_token import get_best_robot_token, get_parameter_from_ssm +from upload_result_helper import upload_results +from commit_status_helper import get_commit +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + +NAME = 'PVS Studio (actions)' +LICENCE_NAME = 'Free license: ClickHouse, Yandex' +HTML_REPORT_FOLDER = 'pvs-studio-html-report' +TXT_REPORT_NAME = 'pvs-studio-task-report.txt' + +def _process_txt_report(path): + warnings = [] + errors = [] + with open(path, 'r') as report_file: + for line in report_file: + if 'viva64' in line: + continue + + if 'warn' in line: + warnings.append(':'.join(line.split('\t')[0:2])) + elif 'err' in line: + errors.append(':'.join(line.split('\t')[0:2])) + + return warnings, errors + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + repo_path = os.path.join(os.getenv("REPO_COPY", os.path.abspath("../../"))) + temp_path = os.path.join(os.getenv("TEMP_PATH")) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + # this check modify repository so copy it to the temp directory + logging.info("Repo copy path %s", repo_path) + + gh = Github(get_best_robot_token()) + + images_path = os.path.join(temp_path, 'changed_images.json') + docker_image = 'clickhouse/pvs-test' + if os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r') as images_fd: + images = json.load(images_fd) + logging.info("Got images %s", images) + if 'clickhouse/pvs-test' in images: + docker_image += ':' + images['clickhouse/pvs-test'] + + logging.info("Got docker image %s", docker_image) + + s3_helper = S3Helper('https://s3.amazonaws.com') + + licence_key = get_parameter_from_ssm('pvs_studio_key') + cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + commit = get_commit(gh, pr_info.sha) + + try: + subprocess.check_output(cmd, shell=True) + except: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) + + try: + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + html_urls = s3_helper.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) + index_html = None + + for url in html_urls: + if 'index.html' in url: + index_html = 'HTML report'.format(url) + break + + if not index_html: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', + target_url=f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) + + txt_report = os.path.join(temp_path, TXT_REPORT_NAME) + warnings, errors = _process_txt_report(txt_report) + errors = errors + warnings + + status = 'success' + test_results = [(index_html, "Look at the report"), ("Errors count not checked", "OK")] + description = "Total errors {}".format(len(errors)) + additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, NAME) + + print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) + + ch_helper = ClickHouseHelper() + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) + except Exception as ex: + print("Got an exception", ex) + sys.exit(1) diff --git a/tests/ci/report.py b/tests/ci/report.py new file mode 100644 index 000000000000..156e6096605b --- /dev/null +++ b/tests/ci/report.py @@ -0,0 +1,324 @@ +# -*- coding: utf-8 -*- +import os +import datetime + +### FIXME: BEST FRONTEND PRACTICIES BELOW + +HTML_BASE_TEST_TEMPLATE = """ + + + + {title} + + +
+ +

{header}

+ +{test_part} + + + +""" + +HTML_TEST_PART = """ + + +{headers} + +{rows} +
+""" + +BASE_HEADERS = ['Test name', 'Test status'] + + +def _format_header(header, branch_name, branch_url=None): + result = ' '.join([w.capitalize() for w in header.split(' ')]) + result = result.replace("Clickhouse", "ClickHouse") + result = result.replace("clickhouse", "ClickHouse") + if 'ClickHouse' not in result: + result = 'ClickHouse ' + result + result += ' for ' + if branch_url: + result += '{name}'.format(url=branch_url, name=branch_name) + else: + result += branch_name + return result + + +def _get_status_style(status): + style = "font-weight: bold;" + if status in ('OK', 'success', 'PASSED'): + style += 'color: #0A0;' + elif status in ('FAIL', 'failure', 'error', 'FAILED', 'Timeout'): + style += 'color: #F00;' + else: + style += 'color: #FFB400;' + return style + + +def _get_html_url(url): + if isinstance(url, str): + return '{name}'.format(url=url, name=os.path.basename(url).replace('%2B', '+').replace('%20', ' ')) + if isinstance(url, tuple): + return '{name}'.format(url=url[0], name=url[1].replace('%2B', '+').replace('%20', ' ')) + return '' + + +def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=None, with_raw_logs=False): + if additional_urls is None: + additional_urls = [] + + if test_result: + rows_part = "" + num_fails = 0 + has_test_time = False + has_test_logs = False + for result in test_result: + test_name = result[0] + test_status = result[1] + + test_logs = None + test_time = None + if len(result) > 2: + test_time = result[2] + has_test_time = True + + if len(result) > 3: + test_logs = result[3] + has_test_logs = True + + row = "" + is_fail = test_status in ('FAIL', 'FLAKY') + if is_fail and with_raw_logs and test_logs is not None: + row = "" + row += "" + test_name + "" + style = _get_status_style(test_status) + + # Allow to quickly scroll to the first failure. + is_fail_id = "" + if is_fail: + num_fails = num_fails + 1 + is_fail_id = 'id="fail' + str(num_fails) + '" ' + + row += ''.format(style) + test_status + "" + + if test_time is not None: + row += "" + test_time + "" + + if test_logs is not None and not with_raw_logs: + test_logs_html = "
".join([_get_html_url(url) for url in test_logs]) + row += "" + test_logs_html + "" + + row += "" + rows_part += row + if test_logs is not None and with_raw_logs: + row = "" + # TODO: compute colspan too + row += "
" + test_logs + "
" + row += "" + rows_part += row + + headers = BASE_HEADERS + if has_test_time: + headers.append('Test time, sec.') + if has_test_logs and not with_raw_logs: + headers.append('Logs') + + headers = ''.join(['' + h + '' for h in headers]) + test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part) + else: + test_part = "" + + additional_html_urls = "" + for url in additional_urls: + additional_html_urls += ' ' + _get_html_url(url) + + result = HTML_BASE_TEST_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + raw_log_name=os.path.basename(raw_log_url), + raw_log_url=raw_log_url, + task_url=task_url, + test_part=test_part, + branch_name=branch_name, + commit_url=commit_url, + additional_urls=additional_html_urls + ) + return result + + +HTML_BASE_BUILD_TEMPLATE = """ + + + + +{title} + + +
+

{header}

+ + + + + + + + + + + + +{rows} +
CompilerBuild typeSanitizerBundledSplittedStatusBuild logBuild timeArtifacts
+ + + +""" + +LINK_TEMPLATE = '{text}' + + +def create_build_html_report(header, build_results, build_logs_urls, artifact_urls_list, task_url, branch_url, branch_name, commit_url): + rows = "" + for (build_result, build_log_url, artifact_urls) in zip(build_results, build_logs_urls, artifact_urls_list): + row = "" + row += "{}".format(build_result.compiler) + if build_result.build_type: + row += "{}".format(build_result.build_type) + else: + row += "{}".format("relwithdebuginfo") + if build_result.sanitizer: + row += "{}".format(build_result.sanitizer) + else: + row += "{}".format("none") + + row += "{}".format(build_result.bundled) + row += "{}".format(build_result.splitted) + + if build_result.status: + style = _get_status_style(build_result.status) + row += '{}'.format(style, build_result.status) + else: + style = _get_status_style("error") + row += '{}'.format(style, "error") + + row += 'link'.format(build_log_url) + + if build_result.elapsed_seconds: + delta = datetime.timedelta(seconds=build_result.elapsed_seconds) + else: + delta = 'unknown' + + row += '{}'.format(str(delta)) + + links = "" + link_separator = "
" + if artifact_urls: + for artifact_url in artifact_urls: + links += LINK_TEMPLATE.format(text=os.path.basename(artifact_url.replace('%2B', '+').replace('%20', ' ')), url=artifact_url) + links += link_separator + if links: + links = links[:-len(link_separator)] + row += "{}".format(links) + + row += "" + rows += row + return HTML_BASE_BUILD_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + rows=rows, + task_url=task_url, + branch_name=branch_name, + commit_url=commit_url) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py new file mode 100644 index 000000000000..99a99ad30630 --- /dev/null +++ b/tests/ci/run_check.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +import os +import json +import sys +import logging +from github import Github +from pr_info import PRInfo +from get_robot_token import get_best_robot_token +from commit_status_helper import get_commit + +NAME = 'Run Check (actions)' + +TRUSTED_ORG_IDS = { + 7409213, # yandex + 28471076, # altinity + 54801242, # clickhouse +} + +OK_TEST_LABEL = set(["can be tested", "release", "pr-documentation", "pr-doc-fix"]) +DO_NOT_TEST_LABEL = "do not test" + +# Individual trusted contirbutors who are not in any trusted organization. +# Can be changed in runtime: we will append users that we learned to be in +# a trusted org, to save GitHub API calls. +TRUSTED_CONTRIBUTORS = { + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloundFlare + "BohuTANG", + "codyrobert", # Flickerbox engineer + "damozhaeva", # DOCSUP + "den-crane", + "gyuton", # DOCSUP + "gyuton", # technical writer, Yandex + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kitaisreal", # Seasoned contributor + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasFL", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober" # Developer of YT +} + + +def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): + if pr_user_login in TRUSTED_CONTRIBUTORS: + logging.info("User '%s' is trusted", pr_user_login) + return True + + logging.info("User '%s' is not trusted", pr_user_login) + + for org_id in pr_user_orgs: + if org_id in TRUSTED_ORG_IDS: + logging.info("Org '%s' is trusted; will mark user %s as trusted", org_id, pr_user_login) + return True + logging.info("Org '%s' is not trusted", org_id) + + return False + +# Returns whether we should look into individual checks for this PR. If not, it +# can be skipped entirely. +def should_run_checks_for_pr(pr_info): + # Consider the labels and whether the user is trusted. + force_labels = set(['force tests']).intersection(pr_info.labels) + if force_labels: + return True, "Labeled '{}'".format(', '.join(force_labels)) + + if 'do not test' in pr_info.labels: + return False, "Labeled 'do not test'" + + if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): + return False, "Needs 'can be tested' label" + + if 'release' in pr_info.labels or 'pr-backport' in pr_info.labels or 'pr-cherrypick' in pr_info.labels: + return False, "Don't try new checks for release/backports/cherry-picks" + + return True, "No special conditions apply" + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_orgs=True) + can_run, description = should_run_checks_for_pr(pr_info) + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + if not can_run: + print("::notice ::Cannot run") + commit.create_status(context=NAME, description=description, state="failure", target_url=url) + sys.exit(1) + else: + if 'pr-documentation' in pr_info.labels or 'pr-doc-fix' in pr_info.labels: + commit.create_status(context=NAME, description="Skipping checks for documentation", state="success", target_url=url) + print("::notice ::Can run, but it's documentation PR, skipping") + else: + print("::notice ::Can run") + commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py new file mode 100644 index 000000000000..82791234f1a7 --- /dev/null +++ b/tests/ci/s3_helper.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +import hashlib +import logging +import os +from multiprocessing.dummy import Pool +import boto3 +from compress_files import compress_file_fast + +def _md5(fname): + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + logging.debug("MD5 for %s is %s", fname, hash_md5.hexdigest()) + return hash_md5.hexdigest() + + +def _flatten_list(lst): + result = [] + for elem in lst: + if isinstance(elem, list): + result += _flatten_list(elem) + else: + result.append(elem) + return result + + +class S3Helper(): + def __init__(self, host): + self.session = boto3.session.Session(region_name='us-east-1') + self.client = self.session.client('s3', endpoint_url=host) + + def _upload_file_to_s3(self, bucket_name, file_path, s3_path): + logging.debug("Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path) + metadata = {} + if os.path.getsize(file_path) < 64 * 1024 * 1024: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + metadata['ContentType'] = "text/plain; charset=utf-8" + logging.info("Content type %s for file path %s", "text/plain; charset=utf-8", file_path) + elif s3_path.endswith("html"): + metadata['ContentType'] = "text/html; charset=utf-8" + logging.info("Content type %s for file path %s", "text/html; charset=utf-8", file_path) + elif s3_path.endswith("css"): + metadata['ContentType'] = "text/css; charset=utf-8" + logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path) + elif s3_path.endswith("js"): + metadata['ContentType'] = "text/javascript; charset=utf-8" + logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path) + else: + logging.info("No content type provied for %s", file_path) + else: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + logging.info("Going to compress file log file %s to %s", file_path, file_path + ".gz") + compress_file_fast(file_path, file_path + ".gz") + file_path += ".gz" + s3_path += ".gz" + else: + logging.info("Processing file without compression") + logging.info("File is too large, do not provide content type") + + self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) + logging.info("Upload %s to %s. Meta: %s", file_path, s3_path, metadata) + # last two replacements are specifics of AWS urls: https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/ + return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path).replace('+', '%2B').replace(' ', '%20') + + def upload_test_report_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-test-reports', file_path, s3_path) + + def upload_build_file_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-builds', file_path, s3_path) + + def _upload_folder_to_s3(self, folder_path, s3_folder_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks): + logging.info("Upload folder '%s' to bucket=%s of s3 folder '%s'", folder_path, bucket_name, s3_folder_path) + if not os.path.exists(folder_path): + return [] + files = os.listdir(folder_path) + if not files: + return [] + + p = Pool(min(len(files), 5)) + + def task(file_name): + full_fs_path = os.path.join(folder_path, file_name) + if keep_dirs_in_s3_path: + full_s3_path = s3_folder_path + "/" + os.path.basename(folder_path) + else: + full_s3_path = s3_folder_path + + if os.path.isdir(full_fs_path): + return self._upload_folder_to_s3(full_fs_path, full_s3_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks) + + if os.path.islink(full_fs_path): + if upload_symlinks: + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + return [] + + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + + return sorted(_flatten_list(list(p.map(task, files)))) + + def upload_build_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-builds', keep_dirs_in_s3_path, upload_symlinks) + + def upload_test_folder_to_s3(self, folder_path, s3_folder_path): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-test-reports', True, True) + + def list_prefix(self, s3_prefix_path, bucket='clickhouse-builds'): + objects = self.client.list_objects_v2(Bucket=bucket, Prefix=s3_prefix_path) + result = [] + if 'Contents' in objects: + for obj in objects['Contents']: + result.append(obj['Key']) + + return result diff --git a/tests/ci/split_build_smoke_check.py b/tests/ci/split_build_smoke_check.py new file mode 100644 index 000000000000..28eb554d90e2 --- /dev/null +++ b/tests/ci/split_build_smoke_check.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +import os +import logging +import json +import subprocess + +from github import Github + +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from build_download_helper import download_shared_build +from upload_result_helper import upload_results +from docker_pull_helper import get_image_with_version +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + + +DOCKER_IMAGE = "clickhouse/split-build-smoke-test" +DOWNLOAD_RETRIES_COUNT = 5 +RESULT_LOG_NAME = "run.log" +CHECK_NAME = 'Split build smoke test (actions)' + +def process_result(result_folder, server_log_folder): + status = "success" + description = 'Server started and responded' + summary = [("Smoke test", "OK")] + with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log: + lines = run_log.read().split('\n') + if not lines or lines[0].strip() != 'OK': + status = "failure" + logging.info("Lines is not ok: %s", str('\n'.join(lines))) + summary = [("Smoke test", "FAIL")] + description = 'Server failed to respond, see result in logs' + + result_logs = [] + server_log_path = os.path.join(server_log_folder, "clickhouse-server.log") + stderr_log_path = os.path.join(result_folder, "stderr.log") + client_stderr_log_path = os.path.join(result_folder, "clientstderr.log") + run_log_path = os.path.join(result_folder, RESULT_LOG_NAME) + + for path in [server_log_path, stderr_log_path, client_stderr_log_path, run_log_path]: + if os.path.exists(path): + result_logs.append(path) + + return status, description, summary, result_logs + +def get_run_command(build_path, result_folder, server_log_folder, docker_image): + return f"docker run --network=host --volume={build_path}:/package_folder" \ + f" --volume={server_log_folder}:/var/log/clickhouse-server" \ + f" --volume={result_folder}:/test_output" \ + f" {docker_image} >{result_folder}/{RESULT_LOG_NAME}" + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) + reports_path = os.getenv("REPORTS_PATH", "./reports") + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + + gh = Github(get_best_robot_token()) + + for root, _, files in os.walk(reports_path): + for f in files: + if f == 'changed_images.json': + images_path = os.path.join(root, 'changed_images.json') + break + + docker_image = get_image_with_version(reports_path, DOCKER_IMAGE) + + packages_path = os.path.join(temp_path, "packages") + if not os.path.exists(packages_path): + os.makedirs(packages_path) + + download_shared_build(CHECK_NAME, reports_path, packages_path) + + server_log_path = os.path.join(temp_path, "server_log") + if not os.path.exists(server_log_path): + os.makedirs(server_log_path) + + result_path = os.path.join(temp_path, "result_path") + if not os.path.exists(result_path): + os.makedirs(result_path) + + run_command = get_run_command(packages_path, result_path, server_log_path, docker_image) + + logging.info("Going to run command %s", run_command) + with subprocess.Popen(run_command, shell=True) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + print("Result path", os.listdir(result_path)) + print("Server log path", os.listdir(server_log_path)) + + state, description, test_results, additional_logs = process_result(result_path, server_log_path) + + ch_helper = ClickHouseHelper() + s3_helper = S3Helper('https://s3.amazonaws.com') + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME) + print(f"::notice ::Report url: {report_url}") + post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py new file mode 100644 index 000000000000..1c0515364a82 --- /dev/null +++ b/tests/ci/ssh.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +import shutil +import os +import subprocess +import tempfile +import logging +import signal + + +class SSHAgent: + def __init__(self): + self._env = {} + self._env_backup = {} + self._keys = {} + self.start() + + @property + def pid(self): + return int(self._env["SSH_AGENT_PID"]) + + def start(self): + if shutil.which("ssh-agent") is None: + raise Exception("ssh-agent binary is not available") + + self._env_backup["SSH_AUTH_SOCK"] = os.environ.get("SSH_AUTH_SOCK") + self._env_backup["SSH_OPTIONS"] = os.environ.get("SSH_OPTIONS") + + # set ENV from stdout of ssh-agent + for line in self._run(['ssh-agent']).splitlines(): + name, _, value = line.partition(b"=") + if _ == b"=": + value = value.split(b";", 1)[0] + self._env[name.decode()] = value.decode() + os.environ[name.decode()] = value.decode() + + ssh_options = "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else "" + os.environ["SSH_OPTIONS"] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + + def add(self, key): + key_pub = self._key_pub(key) + + if key_pub in self._keys: + self._keys[key_pub] += 1 + else: + self._run(["ssh-add", "-"], stdin=key.encode()) + self._keys[key_pub] = 1 + + return key_pub + + def remove(self, key_pub): + if key_pub not in self._keys: + raise Exception(f"Private key not found, public part: {key_pub}") + + if self._keys[key_pub] > 1: + self._keys[key_pub] -= 1 + else: + with tempfile.NamedTemporaryFile() as f: + f.write(key_pub) + f.flush() + self._run(["ssh-add", "-d", f.name]) + self._keys.pop(key_pub) + + def print_keys(self): + keys = self._run(["ssh-add", "-l"]).splitlines() + if keys: + logging.info("ssh-agent keys:") + for key in keys: + logging.info("%s", key) + else: + logging.info("ssh-agent (pid %d) is empty", self.pid) + + def kill(self): + for k, v in self._env.items(): + os.environ.pop(k, None) + + for k, v in self._env_backup.items(): + if v is not None: + os.environ[k] = v + + os.kill(self.pid, signal.SIGTERM) + + def _key_pub(self, key): + with tempfile.NamedTemporaryFile() as f: + f.write(key.encode()) + f.flush() + return self._run(["ssh-keygen", "-y", "-f", f.name]) + + @staticmethod + def _run(cmd, stdin=None): + shell = isinstance(cmd, str) + with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE if stdin else None, shell=shell) as p: + stdout, stderr = p.communicate(stdin) + + if stdout.strip().decode() == "The agent has no identities.": + return "" + + if p.returncode: + message = stderr.strip() + b"\n" + stdout.strip() + raise Exception(message.strip().decode()) + + return stdout + +class SSHKey: + def __init__(self, key_name): + self.key = os.getenv(key_name) + self._key_pub = None + self._ssh_agent = SSHAgent() + + def __enter__(self): + self._key_pub = self._ssh_agent.add(self.key) + self._ssh_agent.print_keys() + + def __exit__(self, exc_type, exc_val, exc_tb): + self._ssh_agent.remove(self._key_pub) + self._ssh_agent.print_keys() diff --git a/tests/ci/stopwatch.py b/tests/ci/stopwatch.py new file mode 100644 index 000000000000..b6ae8674df11 --- /dev/null +++ b/tests/ci/stopwatch.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 + +import datetime + +class Stopwatch(): + def __init__(self): + self.start_time = datetime.datetime.utcnow() + self.start_time_str_value = self.start_time.strftime("%Y-%m-%d %H:%M:%S") + + @property + def duration_seconds(self): + return (datetime.datetime.utcnow() - self.start_time).total_seconds() + + @property + def start_time_str(self): + return self.start_time_str_value diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py new file mode 100644 index 000000000000..4b3adfad23f3 --- /dev/null +++ b/tests/ci/stress_check.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 + +import csv +import logging +import subprocess +import os +import json +import sys + +from github import Github + +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from build_download_helper import download_all_deb_packages +from upload_result_helper import upload_results +from docker_pull_helper import get_image_with_version +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + + +def get_run_command(build_path, result_folder, server_log_folder, image): + cmd = "docker run -e S3_URL='https://clickhouse-datasets.s3.amazonaws.com' " + \ + f"--volume={build_path}:/package_folder " \ + f"--volume={result_folder}:/test_output " \ + f"--volume={server_log_folder}:/var/log/clickhouse-server {image}" + + return cmd + +def process_results(result_folder, server_log_path, run_log_path): + test_results = [] + additional_files = [] + # Just upload all files from result_folder. + # If task provides processed results, then it's responsible for content of result_folder. + if os.path.exists(result_folder): + test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + additional_files = [os.path.join(result_folder, f) for f in test_files] + + if os.path.exists(server_log_path): + server_log_files = [f for f in os.listdir(server_log_path) if os.path.isfile(os.path.join(server_log_path, f))] + additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files] + + additional_files.append(run_log_path) + + status_path = os.path.join(result_folder, "check_status.tsv") + if not os.path.exists(status_path): + return "failure", "check_status.tsv doesn't exists", test_results, additional_files + + logging.info("Found check_status.tsv") + with open(status_path, 'r', encoding='utf-8') as status_file: + status = list(csv.reader(status_file, delimiter='\t')) + + if len(status) != 1 or len(status[0]) != 2: + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + results_path = os.path.join(result_folder, "test_results.tsv") + with open(results_path, 'r', encoding='utf-8') as results_file: + test_results = list(csv.reader(results_file, delimiter='\t')) + if len(test_results) == 0: + raise Exception("Empty results") + + return state, description, test_results, additional_files + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) + reports_path = os.getenv("REPORTS_PATH", "./reports") + + check_name = sys.argv[1] + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + + gh = Github(get_best_robot_token()) + + docker_image = get_image_with_version(reports_path, 'clickhouse/stress-test') + + packages_path = os.path.join(temp_path, "packages") + if not os.path.exists(packages_path): + os.makedirs(packages_path) + + download_all_deb_packages(check_name, reports_path, packages_path) + + server_log_path = os.path.join(temp_path, "server_log") + if not os.path.exists(server_log_path): + os.makedirs(server_log_path) + + result_path = os.path.join(temp_path, "result_path") + if not os.path.exists(result_path): + os.makedirs(result_path) + + run_log_path = os.path.join(temp_path, "runlog.log") + + run_command = get_run_command(packages_path, result_path, server_log_path, docker_image) + logging.info("Going to run func tests: %s", run_command) + + with open(run_log_path, 'w', encoding='utf-8') as log: + with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + + s3_helper = S3Helper('https://s3.amazonaws.com') + state, description, test_results, additional_logs = process_results(result_path, server_log_path, run_log_path) + ch_helper = ClickHouseHelper() + mark_flaky_tests(ch_helper, check_name, test_results) + + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name) + print(f"::notice ::Report url: {report_url}") + + post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py new file mode 100644 index 000000000000..8e11b2958277 --- /dev/null +++ b/tests/ci/style_check.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +import logging +import subprocess +import os +import csv +import json +from github import Github +from s3_helper import S3Helper +from pr_info import PRInfo +from get_robot_token import get_best_robot_token +from upload_result_helper import upload_results +from docker_pull_helper import get_image_with_version +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + + +NAME = "Style Check (actions)" + + +def process_result(result_folder): + test_results = [] + additional_files = [] + # Just upload all files from result_folder. + # If task provides processed results, then it's responsible for content of result_folder. + if os.path.exists(result_folder): + test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + additional_files = [os.path.join(result_folder, f) for f in test_files] + + status_path = os.path.join(result_folder, "check_status.tsv") + logging.info("Found test_results.tsv") + status = list(csv.reader(open(status_path, 'r'), delimiter='\t')) + if len(status) != 1 or len(status[0]) != 2: + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + try: + results_path = os.path.join(result_folder, "test_results.tsv") + test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t')) + if len(test_results) == 0: + raise Exception("Empty results") + + return state, description, test_results, additional_files + except Exception: + if state == "success": + state, description = "error", "Failed to read test_results.tsv" + return state, description, test_results, additional_files + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + gh = Github(get_best_robot_token()) + + docker_image = get_image_with_version(temp_path, 'clickhouse/style-test') + s3_helper = S3Helper('https://s3.amazonaws.com') + + subprocess.check_output(f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) + state, description, test_results, additional_files = process_result(temp_path) + ch_helper = ClickHouseHelper() + mark_flaky_tests(ch_helper, NAME, test_results) + + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME) + print("::notice ::Report url: {}".format(report_url)) + post_commit_status(gh, pr_info.sha, NAME, description, state, report_url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/termination_lambda/Dockerfile b/tests/ci/termination_lambda/Dockerfile new file mode 100644 index 000000000000..f53be71a8931 --- /dev/null +++ b/tests/ci/termination_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py new file mode 100644 index 000000000000..cd7d51ae8eb5 --- /dev/null +++ b/tests/ci/termination_lambda/app.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", headers=headers) + response.raise_for_status() + data = response.json() + total_runners = data['total_count'] + runners = data['runners'] + + total_pages = int(total_runners / 100 + 1) + for i in range(2, total_pages + 1): + response = requests.get(f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100", headers=headers) + response.raise_for_status() + data = response.json() + runners += data['runners'] + + print("Total runners", len(runners)) + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_ratio, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) + + +def how_many_instances_to_kill(event_data): + data_array = event_data['CapacityToTerminate'] + to_kill_by_zone = {} + for av_zone in data_array: + zone_name = av_zone['AvailabilityZone'] + to_kill = av_zone['Capacity'] + if zone_name not in to_kill_by_zone: + to_kill_by_zone[zone_name] = 0 + + to_kill_by_zone[zone_name] += to_kill + return to_kill_by_zone + +def get_candidates_to_be_killed(event_data): + data_array = event_data['Instances'] + instances_by_zone = {} + for instance in data_array: + zone_name = instance['AvailabilityZone'] + instance_id = instance['InstanceId'] + if zone_name not in instances_by_zone: + instances_by_zone[zone_name] = [] + instances_by_zone[zone_name].append(instance_id) + + return instances_by_zone + +def delete_runner(access_token, runner): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) + response.raise_for_status() + print(f"Response code deleting {runner.name} is {response.status_code}") + return response.status_code == 204 + + +def main(github_secret_key, github_app_id, event): + print("Got event", json.dumps(event, sort_keys=True, indent=4)) + to_kill_by_zone = how_many_instances_to_kill(event) + instances_by_zone = get_candidates_to_be_killed(event) + + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + + runners = list_runners(access_token) + + to_delete_runners = [] + instances_to_kill = [] + for zone in to_kill_by_zone: + num_to_kill = to_kill_by_zone[zone] + candidates = instances_by_zone[zone] + if num_to_kill > len(candidates): + raise Exception(f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}") + + delete_for_av = [] + for candidate in candidates: + if candidate not in set([runner.name for runner in runners]): + print(f"Candidate {candidate} was not in runners list, simply delete it") + instances_to_kill.append(candidate) + + for candidate in candidates: + if len(delete_for_av) + len(instances_to_kill) == num_to_kill: + break + if candidate in instances_to_kill: + continue + + for runner in runners: + if runner.name == candidate: + if not runner.busy: + print(f"Runner {runner.name} is not busy and can be deleted from AV {zone}") + delete_for_av.append(runner) + else: + print(f"Runner {runner.name} is busy, not going to delete it") + break + + if len(delete_for_av) < num_to_kill: + print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}") + to_delete_runners += delete_for_av + + print("Got instances to kill: ", ', '.join(instances_to_kill)) + print("Going to delete runners:", ', '.join([runner.name for runner in to_delete_runners])) + for runner in to_delete_runners: + if delete_runner(access_token, runner): + print(f"Runner {runner.name} successfuly deleted from github") + instances_to_kill.append(runner.name) + else: + print(f"Cannot delete {runner.name} from github") + + ## push metrics + #runners = list_runners(access_token) + #push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + + response = { + "InstanceIDs": instances_to_kill + } + print(response) + return response + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + return main(private_key, app_id, event) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + sample_event = { + "AutoScalingGroupARN": "arn:aws:autoscaling:us-east-1::autoScalingGroup:d4738357-2d40-4038-ae7e-b00ae0227003:autoScalingGroupName/my-asg", + "AutoScalingGroupName": "my-asg", + "CapacityToTerminate": [ + { + "AvailabilityZone": "us-east-1b", + "Capacity": 1, + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "Capacity": 2, + "InstanceMarketOption": "OnDemand" + } + ], + "Instances": [ + { + "AvailabilityZone": "us-east-1b", + "InstanceId": "i-08d0b3c1a137e02a5", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + } + ], + "Cause": "SCALE_IN" + } + + main(private_key, args.app_id, sample_event) diff --git a/tests/ci/termination_lambda/requirements.txt b/tests/ci/termination_lambda/requirements.txt new file mode 100644 index 000000000000..c0dcf4a4dde7 --- /dev/null +++ b/tests/ci/termination_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/token_lambda/Dockerfile b/tests/ci/token_lambda/Dockerfile new file mode 100644 index 000000000000..f53be71a8931 --- /dev/null +++ b/tests/ci/token_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/token_lambda/app.py b/tests/ci/token_lambda/app.py new file mode 100644 index 000000000000..731d6c040de1 --- /dev/null +++ b/tests/ci/token_lambda/app.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_runner_registration_token(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post("https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + + +def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runner_registration_token = get_runner_registration_token(access_token) + + if push_to_ssm: + import boto3 + + print("Trying to put params into ssm manager") + client = boto3.client('ssm') + client.put_parameter( + Name=ssm_parameter_name, + Value=runner_registration_token, + Type='SecureString', + Overwrite=True) + else: + print("Not push token to AWS Parameter Store, just print:", runner_registration_token) + + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True, 'github_runner_registration_token') + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get new token from github to add runners') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-ssm', action='store_true', help='Store received token in parameter store') + parser.add_argument('--ssm-parameter-name', default='github_runner_registration_token', help='AWS paramater store parameter name') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name) diff --git a/tests/ci/token_lambda/requirements.txt b/tests/ci/token_lambda/requirements.txt new file mode 100644 index 000000000000..c0dcf4a4dde7 --- /dev/null +++ b/tests/ci/token_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py new file mode 100644 index 000000000000..21aa63e3b19c --- /dev/null +++ b/tests/ci/unit_tests_check.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 + +import logging +import os +import sys +import subprocess +import json + +from github import Github + +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from build_download_helper import download_unit_tests +from upload_result_helper import upload_results +from docker_pull_helper import get_image_with_version +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse +from stopwatch import Stopwatch + + +IMAGE_NAME = 'clickhouse/unit-test' + +def get_test_name(line): + elements = reversed(line.split(' ')) + for element in elements: + if '(' not in element and ')' not in element: + return element + raise Exception(f"No test name in line '{line}'") + +def process_result(result_folder): + OK_SIGN = 'OK ]' + FAILED_SIGN = 'FAILED ]' + SEGFAULT = 'Segmentation fault' + SIGNAL = 'received signal SIG' + PASSED = 'PASSED' + + summary = [] + total_counter = 0 + failed_counter = 0 + result_log_path = f'{result_folder}/test_result.txt' + if not os.path.exists(result_log_path): + logging.info("No output log on path %s", result_log_path) + return "error", "No output log", summary, [] + + status = "success" + description = "" + passed = False + with open(result_log_path, 'r', encoding='utf-8') as test_result: + for line in test_result: + if OK_SIGN in line: + logging.info("Found ok line: '%s'", line) + test_name = get_test_name(line.strip()) + logging.info("Test name: '%s'", test_name) + summary.append((test_name, "OK")) + total_counter += 1 + elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line: + logging.info("Found fail line: '%s'", line) + test_name = get_test_name(line.strip()) + logging.info("Test name: '%s'", test_name) + summary.append((test_name, "FAIL")) + total_counter += 1 + failed_counter += 1 + elif SEGFAULT in line: + logging.info("Found segfault line: '%s'", line) + status = "failure" + description += "Segmentation fault. " + break + elif SIGNAL in line: + logging.info("Received signal line: '%s'", line) + status = "failure" + description += "Exit on signal. " + break + elif PASSED in line: + logging.info("PASSED record found: '%s'", line) + passed = True + + if not passed: + status = "failure" + description += "PASSED record not found. " + + if failed_counter != 0: + status = "failure" + + if not description: + description += f"fail: {failed_counter}, passed: {total_counter - failed_counter}" + + return status, description, summary, [result_log_path] + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) + reports_path = os.getenv("REPORTS_PATH", "./reports") + + check_name = sys.argv[1] + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + + gh = Github(get_best_robot_token()) + + docker_image = get_image_with_version(reports_path, IMAGE_NAME) + + download_unit_tests(check_name, reports_path, temp_path) + + tests_binary_path = os.path.join(temp_path, "unit_tests_dbms") + os.chmod(tests_binary_path, 0o777) + + test_output = os.path.join(temp_path, "test_output") + if not os.path.exists(test_output): + os.makedirs(test_output) + + run_command = f"docker run --cap-add=SYS_PTRACE --volume={tests_binary_path}:/unit_tests_dbms --volume={test_output}:/test_output {docker_image}" + + run_log_path = os.path.join(test_output, "runlog.log") + + logging.info("Going to run func tests: %s", run_command) + + with open(run_log_path, 'w', encoding='utf-8') as log: + with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + s3_helper = S3Helper('https://s3.amazonaws.com') + state, description, test_results, additional_logs = process_result(test_output) + + ch_helper = ClickHouseHelper() + mark_flaky_tests(ch_helper, check_name, test_results) + + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name) + print(f"::notice ::Report url: {report_url}") + post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py new file mode 100644 index 000000000000..d0705372c44f --- /dev/null +++ b/tests/ci/upload_result_helper.py @@ -0,0 +1,64 @@ +import os +import logging +import ast + +from report import create_test_html_report + +def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs): + proccessed_logs = {} + # Firstly convert paths of logs from test_results to urls to s3. + for test_result in test_results: + if len(test_result) <= 3 or with_raw_logs: + continue + + # Convert from string repr of list to list. + test_log_paths = ast.literal_eval(test_result[3]) + test_log_urls = [] + for log_path in test_log_paths: + if log_path in proccessed_logs: + test_log_urls.append(proccessed_logs[log_path]) + elif log_path: + url = s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path)) + test_log_urls.append(url) + proccessed_logs[log_path] = url + + test_result[3] = test_log_urls + + additional_urls = [] + for log_path in additional_logs: + if log_path: + additional_urls.append( + s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path))) + + return additional_urls + +def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files, check_name, with_raw_logs=True): + s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace(' ', '_').replace('(', '_').replace(')', '_').replace(',', '_') + additional_urls = process_logs(s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs) + + branch_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = f"PR #{pr_number}" + branch_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/pull/{pr_number}" + commit_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/commit/{commit_sha}" + + task_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + if additional_urls: + raw_log_url = additional_urls[0] + additional_urls.pop(0) + else: + raw_log_url = task_url + + html_report = create_test_html_report(check_name, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls, with_raw_logs) + with open('report.html', 'w', encoding='utf-8') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py new file mode 100644 index 000000000000..dd3845eae660 --- /dev/null +++ b/tests/ci/version_helper.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +import os +import subprocess +import datetime + +FILE_WITH_VERSION_PATH = "cmake/autogenerated_versions.txt" +CHANGELOG_IN_PATH = "debian/changelog.in" +CHANGELOG_PATH = "debian/changelog" +CONTRIBUTORS_SCRIPT_DIR = "src/Storages/System/" + + +class ClickHouseVersion(): + def __init__(self, major, minor, patch, tweak, revision): + self.major = major + self.minor = minor + self.patch = patch + self.tweak = tweak + self.revision = revision + + def minor_update(self): + return ClickHouseVersion( + self.major, + self.minor + 1, + 1, + 1, + self.revision + 1) + + def patch_update(self): + return ClickHouseVersion( + self.major, + self.minor, + self.patch + 1, + 1, + self.revision) + + def tweak_update(self): + return ClickHouseVersion( + self.major, + self.minor, + self.patch, + self.tweak + 1, + self.revision) + + def get_version_string(self): + return '.'.join([ + str(self.major), + str(self.minor), + str(self.patch), + str(self.tweak) + ]) + + def as_tuple(self): + return (self.major, self.minor, self.patch, self.tweak) + + +class VersionType(): + STABLE = "stable" + TESTING = "testing" + + +def build_version_description(version, version_type): + return "v" + version.get_version_string() + "-" + version_type + + +def _get_version_from_line(line): + _, ver_with_bracket = line.strip().split(' ') + return ver_with_bracket[:-1] + + +def get_version_from_repo(repo_path): + path_to_file = os.path.join(repo_path, FILE_WITH_VERSION_PATH) + major = 0 + minor = 0 + patch = 0 + tweak = 0 + version_revision = 0 + with open(path_to_file, 'r') as ver_file: + for line in ver_file: + if "VERSION_MAJOR" in line and "math" not in line and "SET" in line: + major = _get_version_from_line(line) + elif "VERSION_MINOR" in line and "math" not in line and "SET" in line: + minor = _get_version_from_line(line) + elif "VERSION_PATCH" in line and "math" not in line and "SET" in line: + patch = _get_version_from_line(line) + elif "VERSION_REVISION" in line and "math" not in line: + version_revision = _get_version_from_line(line) + return ClickHouseVersion(major, minor, patch, tweak, version_revision) + + +def _update_cmake_version(repo_path, version, sha, version_type): + cmd = """sed -i --follow-symlinks -e "s/SET(VERSION_REVISION [^) ]*/SET(VERSION_REVISION {revision}/g;" \ + -e "s/SET(VERSION_DESCRIBE [^) ]*/SET(VERSION_DESCRIBE {version_desc}/g;" \ + -e "s/SET(VERSION_GITHASH [^) ]*/SET(VERSION_GITHASH {sha}/g;" \ + -e "s/SET(VERSION_MAJOR [^) ]*/SET(VERSION_MAJOR {major}/g;" \ + -e "s/SET(VERSION_MINOR [^) ]*/SET(VERSION_MINOR {minor}/g;" \ + -e "s/SET(VERSION_PATCH [^) ]*/SET(VERSION_PATCH {patch}/g;" \ + -e "s/SET(VERSION_STRING [^) ]*/SET(VERSION_STRING {version_string}/g;" \ + {path}""".format( + revision=version.revision, + version_desc=build_version_description(version, version_type), + sha=sha, + major=version.major, + minor=version.minor, + patch=version.patch, + version_string=version.get_version_string(), + path=os.path.join(repo_path, FILE_WITH_VERSION_PATH), + ) + subprocess.check_call(cmd, shell=True) + + +def _update_changelog(repo_path, version): + cmd = """sed \ + -e "s/[@]VERSION_STRING[@]/{version_str}/g" \ + -e "s/[@]DATE[@]/{date}/g" \ + -e "s/[@]AUTHOR[@]/clickhouse-release/g" \ + -e "s/[@]EMAIL[@]/clickhouse-release@yandex-team.ru/g" \ + < {in_path} > {changelog_path} + """.format( + version_str=version.get_version_string(), + date=datetime.datetime.now().strftime("%a, %d %b %Y %H:%M:%S") + " +0300", + in_path=os.path.join(repo_path, CHANGELOG_IN_PATH), + changelog_path=os.path.join(repo_path, CHANGELOG_PATH) + ) + subprocess.check_call(cmd, shell=True) + +def _update_contributors(repo_path): + cmd = "cd {} && ./StorageSystemContributors.sh".format(os.path.join(repo_path, CONTRIBUTORS_SCRIPT_DIR)) + subprocess.check_call(cmd, shell=True) + +def _update_dockerfile(repo_path, version): + version_str_for_docker = '.'.join([str(version.major), str(version.minor), str(version.patch), '*']) + cmd = "ls -1 {path}/docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='{ver}'/'".format(path=repo_path, ver=version_str_for_docker) + subprocess.check_call(cmd, shell=True) + +def update_version_local(repo_path, sha, version, version_type="testing"): + _update_contributors(repo_path) + _update_cmake_version(repo_path, version, sha, version_type) + _update_changelog(repo_path, version) + _update_dockerfile(repo_path, version) diff --git a/tests/ci/worker/init_builder.sh b/tests/ci/worker/init_builder.sh new file mode 100644 index 000000000000..dc3f777bccaa --- /dev/null +++ b/tests/ci/worker/init_builder.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "Running init script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_HOME=/home/ubuntu/actions-runner + +echo "Receiving token" +export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` +export RUNNER_URL="https://github.com/ClickHouse" +# Funny fact, but metadata service has fixed IP +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` + +cd $RUNNER_HOME + +echo "Going to configure runner" +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,builder' --work _work + +echo "Run" +sudo -u ubuntu ./run.sh diff --git a/tests/ci/worker/init_func_tester.sh b/tests/ci/worker/init_func_tester.sh new file mode 100644 index 000000000000..b117f11556d7 --- /dev/null +++ b/tests/ci/worker/init_func_tester.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "Running init script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_HOME=/home/ubuntu/actions-runner + +echo "Receiving token" +export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` +export RUNNER_URL="https://github.com/ClickHouse" +# Funny fact, but metadata service has fixed IP +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` + +cd $RUNNER_HOME + +echo "Going to configure runner" +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,func-tester' --work _work + +echo "Run" +sudo -u ubuntu ./run.sh diff --git a/tests/ci/worker/init_stress_tester.sh b/tests/ci/worker/init_stress_tester.sh new file mode 100644 index 000000000000..54ed944b2749 --- /dev/null +++ b/tests/ci/worker/init_stress_tester.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "Running init script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_HOME=/home/ubuntu/actions-runner + +echo "Receiving token" +export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` +export RUNNER_URL="https://github.com/ClickHouse" +# Funny fact, but metadata service has fixed IP +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` + +cd $RUNNER_HOME + +echo "Going to configure runner" +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,stress-tester' --work _work + +echo "Run" +sudo -u ubuntu ./run.sh diff --git a/tests/ci/worker/init_style_checker.sh b/tests/ci/worker/init_style_checker.sh new file mode 100644 index 000000000000..77cf66b5262e --- /dev/null +++ b/tests/ci/worker/init_style_checker.sh @@ -0,0 +1,20 @@ +#!/usr/bin/bash +set -euo pipefail + +echo "Running init script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_HOME=/home/ubuntu/actions-runner + +echo "Receiving token" +export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` +export RUNNER_URL="https://github.com/ClickHouse" +# Funny fact, but metadata service has fixed IP +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` + +cd $RUNNER_HOME + +echo "Going to configure runner" +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,style-checker' --work _work + +echo "Run" +sudo -u ubuntu ./run.sh diff --git a/tests/ci/worker/ubuntu_style_check.sh b/tests/ci/worker/ubuntu_style_check.sh new file mode 100644 index 000000000000..bf5c6057bed7 --- /dev/null +++ b/tests/ci/worker/ubuntu_style_check.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "Running prepare script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_VERSION=2.283.1 +export RUNNER_HOME=/home/ubuntu/actions-runner + +apt-get update + +apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + python3-pip \ + unzip + +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null + +apt-get update + +apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io + +usermod -aG docker ubuntu + +# enable ipv6 in containers (fixed-cidr-v6 is some random network mask) +cat < /etc/docker/daemon.json +{ + "ipv6": true, + "fixed-cidr-v6": "2001:db8:1::/64" +} +EOT + +systemctl restart docker + +pip install boto3 pygithub requests urllib3 unidiff + +mkdir -p $RUNNER_HOME && cd $RUNNER_HOME + +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz + +tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +./bin/installdependencies.sh + +chown -R ubuntu:ubuntu $RUNNER_HOME + +cd /home/ubuntu +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip awscliv2.zip +./aws/install + +rm -rf /home/ubuntu/awscliv2.zip /home/ubuntu/aws From 516b5cd247acb86ef39f5ad86e55a1e65fb95a1c Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 22 Nov 2021 14:55:54 +0300 Subject: [PATCH 176/472] Revert old CI config --- tests/ci/ci_config.json | 144 +++++++++++++++++----------------------- 1 file changed, 61 insertions(+), 83 deletions(-) diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json index 4feae56b93cb..52a101728eaa 100644 --- a/tests/ci/ci_config.json +++ b/tests/ci/ci_config.json @@ -1,7 +1,7 @@ { "build_config": [ { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "", "sanitizer": "", "package-type": "deb", @@ -12,7 +12,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "", "sanitizer": "", "package-type": "performance", @@ -22,7 +22,7 @@ "with_coverage": false }, { - "compiler": "gcc-11", + "compiler": "gcc-10", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -32,7 +32,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "", "sanitizer": "address", "package-type": "deb", @@ -42,7 +42,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "", "sanitizer": "undefined", "package-type": "deb", @@ -52,7 +52,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "", "sanitizer": "thread", "package-type": "deb", @@ -62,7 +62,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "", "sanitizer": "memory", "package-type": "deb", @@ -72,7 +72,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "debug", "sanitizer": "", "package-type": "deb", @@ -82,7 +82,7 @@ "with_coverage": false }, { - "compiler": "gcc-11", + "compiler": "gcc-10", "build-type": "", "sanitizer": "", "package-type": "deb", @@ -92,7 +92,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -104,7 +104,7 @@ ], "special_build_config": [ { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "debug", "sanitizer": "", "package-type": "deb", @@ -114,7 +114,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-11", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -124,7 +124,7 @@ "with_coverage": false }, { - "compiler": "clang-13-darwin", + "compiler": "clang-11-darwin", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -134,7 +134,7 @@ "with_coverage": false }, { - "compiler": "clang-13-aarch64", + "compiler": "clang-11-aarch64", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -144,7 +144,7 @@ "with_coverage": false }, { - "compiler": "clang-13-freebsd", + "compiler": "clang-11-freebsd", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -154,17 +154,7 @@ "with_coverage": false }, { - "compiler": "clang-13-darwin-aarch64", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-13-ppc64le", + "compiler": "clang-11-darwin-aarch64", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -177,7 +167,7 @@ "tests_config": { "Functional stateful tests (address)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -189,7 +179,7 @@ }, "Functional stateful tests (thread)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -201,7 +191,7 @@ }, "Functional stateful tests (memory)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -213,7 +203,7 @@ }, "Functional stateful tests (ubsan)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -225,7 +215,7 @@ }, "Functional stateful tests (debug)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -237,7 +227,7 @@ }, "Functional stateful tests (release)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -249,7 +239,7 @@ }, "Functional stateful tests (release, DatabaseOrdinary)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -261,7 +251,7 @@ }, "Functional stateful tests (release, DatabaseReplicated)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -273,7 +263,7 @@ }, "Functional stateless tests (address)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -285,7 +275,7 @@ }, "Functional stateless tests (thread)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -297,7 +287,7 @@ }, "Functional stateless tests (memory)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -309,7 +299,7 @@ }, "Functional stateless tests (ubsan)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -321,7 +311,7 @@ }, "Functional stateless tests (debug)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -333,7 +323,7 @@ }, "Functional stateless tests (release)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -345,7 +335,7 @@ }, "Functional stateless tests (pytest)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -357,7 +347,7 @@ }, "Functional stateless tests (unbundled)": { "required_build_properties": { - "compiler": "gcc-11", + "compiler": "gcc-10", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -369,7 +359,7 @@ }, "Functional stateless tests (release, wide parts enabled)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -381,7 +371,7 @@ }, "Functional stateless tests (release, DatabaseOrdinary)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -393,7 +383,7 @@ }, "Functional stateless tests (release, DatabaseReplicated)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -405,7 +395,7 @@ }, "Stress test (address)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -417,7 +407,7 @@ }, "Stress test (thread)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -429,7 +419,7 @@ }, "Stress test (undefined)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -441,7 +431,7 @@ }, "Stress test (memory)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -453,7 +443,7 @@ }, "Stress test (debug)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -465,7 +455,7 @@ }, "Integration tests (asan)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -477,7 +467,7 @@ }, "Integration tests (thread)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -489,7 +479,7 @@ }, "Integration tests (release)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -501,7 +491,7 @@ }, "Integration tests (memory)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -513,7 +503,7 @@ }, "Integration tests flaky check (asan)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -525,7 +515,7 @@ }, "Compatibility check": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -537,7 +527,7 @@ }, "Split build smoke test": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -549,7 +539,7 @@ }, "Testflows check": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -561,7 +551,7 @@ }, "Unit tests release gcc": { "required_build_properties": { - "compiler": "gcc-11", + "compiler": "gcc-10", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -573,7 +563,7 @@ }, "Unit tests release clang": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -585,7 +575,7 @@ }, "Unit tests ASAN": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -597,7 +587,7 @@ }, "Unit tests MSAN": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -609,7 +599,7 @@ }, "Unit tests TSAN": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -621,7 +611,7 @@ }, "Unit tests UBSAN": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -633,7 +623,7 @@ }, "AST fuzzer (debug)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "debug", "sanitizer": "none", @@ -645,7 +635,7 @@ }, "AST fuzzer (ASan)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -657,7 +647,7 @@ }, "AST fuzzer (MSan)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -669,7 +659,7 @@ }, "AST fuzzer (TSan)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -681,7 +671,7 @@ }, "AST fuzzer (UBSan)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -693,7 +683,7 @@ }, "Release": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -705,7 +695,7 @@ }, "Functional stateless tests flaky check (address)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -714,18 +704,6 @@ "clang-tidy": "disable", "with_coverage": false } - }, - "ClickHouse Keeper Jepsen": { - "required_build_properties": { - "compiler": "clang-13", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } } } } From 551832a3e58a4c04f970d3c0f79da990c59fe17b Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 22 Nov 2021 15:00:19 +0300 Subject: [PATCH 177/472] Fix new ci compiler --- tests/ci/ci_config.py | 124 +++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 62 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 7924a726a2e0..64a2b4d5a2ab 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -3,7 +3,7 @@ CI_CONFIG = { "build_config": [ { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "", "sanitizer": "", "package_type": "deb", @@ -14,7 +14,7 @@ "with_coverage": False }, { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "", "sanitizer": "", "package_type": "performance", @@ -24,7 +24,7 @@ "with_coverage": False }, { - "compiler": "gcc-11", + "compiler": "gcc-10", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -34,7 +34,7 @@ "with_coverage": False }, { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "", "sanitizer": "address", "package_type": "deb", @@ -44,7 +44,7 @@ "with_coverage": False }, { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "", "sanitizer": "undefined", "package_type": "deb", @@ -54,7 +54,7 @@ "with_coverage": False }, { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "", "sanitizer": "thread", "package_type": "deb", @@ -64,7 +64,7 @@ "with_coverage": False }, { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "", "sanitizer": "memory", "package_type": "deb", @@ -74,7 +74,7 @@ "with_coverage": False }, { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "debug", "sanitizer": "", "package_type": "deb", @@ -84,7 +84,7 @@ "with_coverage": False }, { - "compiler": "gcc-11", + "compiler": "gcc-10", "build_type": "", "sanitizer": "", "package_type": "deb", @@ -94,7 +94,7 @@ "with_coverage": False }, { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -106,7 +106,7 @@ ], "special_build_config": [ { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "debug", "sanitizer": "", "package_type": "deb", @@ -116,7 +116,7 @@ "with_coverage": False }, { - "compiler": "clang-13", + "compiler": "clang-11", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -126,7 +126,7 @@ "with_coverage": False }, { - "compiler": "clang-13-darwin", + "compiler": "clang-11-darwin", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -136,7 +136,7 @@ "with_coverage": False }, { - "compiler": "clang-13-aarch64", + "compiler": "clang-11-aarch64", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -146,7 +146,7 @@ "with_coverage": False }, { - "compiler": "clang-13-freebsd", + "compiler": "clang-11-freebsd", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -156,7 +156,7 @@ "with_coverage": False }, { - "compiler": "clang-13-darwin-aarch64", + "compiler": "clang-11-darwin-aarch64", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -166,7 +166,7 @@ "with_coverage": False }, { - "compiler": "clang-13-ppc64le", + "compiler": "clang-11-ppc64le", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -179,7 +179,7 @@ "tests_config": { "Stateful tests (address, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -191,7 +191,7 @@ }, "Stateful tests (thread, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -203,7 +203,7 @@ }, "Stateful tests (memory, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -215,7 +215,7 @@ }, "Stateful tests (ubsan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -227,7 +227,7 @@ }, "Stateful tests (debug, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -239,7 +239,7 @@ }, "Stateful tests (release, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -251,7 +251,7 @@ }, "Stateful tests (release, DatabaseOrdinary, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -263,7 +263,7 @@ }, "Stateful tests (release, DatabaseReplicated, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -275,7 +275,7 @@ }, "Stateless tests (address, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -287,7 +287,7 @@ }, "Stateless tests (thread, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -299,7 +299,7 @@ }, "Stateless tests (memory, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -311,7 +311,7 @@ }, "Stateless tests (ubsan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -323,7 +323,7 @@ }, "Stateless tests (debug, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -335,7 +335,7 @@ }, "Stateless tests (release, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -347,7 +347,7 @@ }, "Stateless tests (unbundled, actions)": { "required_build_properties": { - "compiler": "gcc-11", + "compiler": "gcc-10", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -359,7 +359,7 @@ }, "Stateless tests (release, wide parts enabled, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -371,7 +371,7 @@ }, "Stateless tests (release, DatabaseOrdinary, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -383,7 +383,7 @@ }, "Stateless tests (release, DatabaseReplicated, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -395,7 +395,7 @@ }, "Stress test (address, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -407,7 +407,7 @@ }, "Stress test (thread, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -419,7 +419,7 @@ }, "Stress test (undefined, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -431,7 +431,7 @@ }, "Stress test (memory, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -443,7 +443,7 @@ }, "Stress test (debug, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -455,7 +455,7 @@ }, "Integration tests (asan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -467,7 +467,7 @@ }, "Integration tests (thread, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -479,7 +479,7 @@ }, "Integration tests (release, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -491,7 +491,7 @@ }, "Integration tests (memory, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -503,7 +503,7 @@ }, "Integration tests flaky check (asan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -515,7 +515,7 @@ }, "Compatibility check (actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -527,7 +527,7 @@ }, "Split build smoke test (actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -539,7 +539,7 @@ }, "Testflows check (actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -551,7 +551,7 @@ }, "Unit tests (release-gcc, actions)": { "required_build_properties": { - "compiler": "gcc-11", + "compiler": "gcc-10", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -563,7 +563,7 @@ }, "Unit tests (release-clang, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -575,7 +575,7 @@ }, "Unit tests (asan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -587,7 +587,7 @@ }, "Unit tests (msan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -599,7 +599,7 @@ }, "Unit tests (tsan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -611,7 +611,7 @@ }, "Unit tests (ubsan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -623,7 +623,7 @@ }, "AST fuzzer (debug, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -635,7 +635,7 @@ }, "AST fuzzer (ASan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -647,7 +647,7 @@ }, "AST fuzzer (MSan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -659,7 +659,7 @@ }, "AST fuzzer (TSan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -671,7 +671,7 @@ }, "AST fuzzer (UBSan, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -683,7 +683,7 @@ }, "Release (actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -695,7 +695,7 @@ }, "Stateless tests flaky check (address, actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -707,7 +707,7 @@ }, "ClickHouse Keeper Jepsen (actions)": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-11", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", From 9c9bb47d14a0527486c688a1abe63209522a4a08 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 23 Nov 2021 12:18:28 +0300 Subject: [PATCH 178/472] Add workflow for release branches --- .github/workflows/release_branches.yml | 933 +++++++++++++++++++++++++ 1 file changed, 933 insertions(+) create mode 100644 .github/workflows/release_branches.yml diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml new file mode 100644 index 000000000000..e279ae915888 --- /dev/null +++ b/.github/workflows/release_branches.yml @@ -0,0 +1,933 @@ +name: ReleaseCI +on: # yamllint disable-line rule:truthy + push: + branches: + - '21.**' + - '22.**' + - '23.**' + - '24.**' + - 'backport/**' +jobs: + DockerHubPush: + runs-on: [self-hosted, style-checker] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_images_check.py + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/docker_images_check/changed_images.json + CompatibilityCheck: + needs: [BuilderDebRelease] + runs-on: [self-hosted, style-checker] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: CompatibilityCheck + env: + TEMP_PATH: ${{runner.temp}}/compatibility_check + REPO_COPY: ${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH: ${{runner.temp}}/reports_dir + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 compatibility_check.py 0 + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +######################################################################################### +#################################### ORDINARY BUILDS #################################### +######################################################################################### + BuilderDebRelease: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NUMBER: 0 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + BuilderDebAsan: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NUMBER: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + BuilderDebUBsan: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NUMBER: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + BuilderDebTsan: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NUMBER: 5 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + BuilderDebMsan: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NUMBER: 6 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + BuilderDebDebug: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NUMBER: 7 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################ +##################################### BUILD REPORTER ####################################### +############################################################################################ + BuilderReport: + needs: + - BuilderDebRelease + - BuilderDebAsan + - BuilderDebTsan + - BuilderDebUBsan + - BuilderDebMsan + - BuilderDebDebug + runs-on: [self-hosted, style-checker] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Report Builder + env: + TEMP_PATH: ${{runner.temp}}/report_check + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'ClickHouse build check (actions)' + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cd $GITHUB_WORKSPACE/tests/ci + python3 build_report_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################## +########################### FUNCTIONAl STATELESS TESTS ####################################### +############################################################################################## + FunctionalStatelessTestRelease: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (release, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestAsan: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (address, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestUBsan: + needs: [BuilderDebUBsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_ubsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (ubsan, actions)' + REPO_COPY: ${{runner.temp}}/stateless_ubsan/ClickHouse + KILL_TIMEOUT: 10800 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################## +############################ FUNCTIONAl STATEFUL TESTS ####################################### +############################################################################################## + FunctionalStatefulTestRelease: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateful_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateful tests (release, actions)' + REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse + KILL_TIMEOUT: 3600 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatefulTestAsan: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateful_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateful tests (address, actions)' + REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse + KILL_TIMEOUT: 3600 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatefulTestTsan: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateful_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateful tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateful_tsan/ClickHouse + KILL_TIMEOUT: 3600 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatefulTestMsan: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateful_msan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateful tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateful_msan/ClickHouse + KILL_TIMEOUT: 3600 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatefulTestUBsan: + needs: [BuilderDebUBsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateful_ubsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateful tests (ubsan, actions)' + REPO_COPY: ${{runner.temp}}/stateful_ubsan/ClickHouse + KILL_TIMEOUT: 3600 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatefulTestDebug: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateful_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateful tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse + KILL_TIMEOUT: 3600 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################## +######################################### STRESS TESTS ####################################### +############################################################################################## + StressTestAsan: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Stress test + env: + TEMP_PATH: ${{runner.temp}}/stress_thread + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stress test (address, actions)' + REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 stress_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + StressTestTsan: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Stress test + env: + TEMP_PATH: ${{runner.temp}}/stress_thread + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stress test (thread, actions)' + REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 stress_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + StressTestMsan: + needs: [BuilderDebMsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Stress test + env: + TEMP_PATH: ${{runner.temp}}/stress_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stress test (memory, actions)' + REPO_COPY: ${{runner.temp}}/stress_memory/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 stress_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + StressTestUBsan: + needs: [BuilderDebUBsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Stress test + env: + TEMP_PATH: ${{runner.temp}}/stress_undefined + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stress test (undefined, actions)' + REPO_COPY: ${{runner.temp}}/stress_undefined/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 stress_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + StressTestDebug: + needs: [BuilderDebDebug] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Stress test + env: + TEMP_PATH: ${{runner.temp}}/stress_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stress test (debug, actions)' + REPO_COPY: ${{runner.temp}}/stress_debug/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 stress_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################# +############################# INTEGRATION TESTS ############################################# +############################################################################################# + IntegrationTestsAsan: + needs: [BuilderDebAsan, FunctionalStatelessTestAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan: + needs: [BuilderDebTsan, FunctionalStatelessTestTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease: + needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FinishCheck: + needs: + - DockerHubPush + - BuilderReport + - FunctionalStatelessTestDebug + - FunctionalStatelessTestRelease + - FunctionalStatelessTestAsan + - FunctionalStatelessTestTsan + - FunctionalStatelessTestMsan + - FunctionalStatelessTestUBsan + - FunctionalStatefulTestDebug + - FunctionalStatefulTestRelease + - FunctionalStatefulTestAsan + - FunctionalStatefulTestTsan + - FunctionalStatefulTestMsan + - FunctionalStatefulTestUBsan + - StressTestDebug + - StressTestAsan + - StressTestTsan + - StressTestMsan + - StressTestUBsan + - IntegrationTestsAsan + - IntegrationTestsRelease + - IntegrationTestsTsan + - CompatibilityCheck + runs-on: [self-hosted, style-checker] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Finish label + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 finish_check.py From cff4e9a0658d2410792e7eb8773898159fad1546 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 23 Nov 2021 12:27:39 +0300 Subject: [PATCH 179/472] Fix pr info --- tests/ci/pr_info.py | 48 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 37fc17d52d80..1fdc34ccdf12 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -8,6 +8,26 @@ DIFF_IN_DOCUMENTATION_EXT = [".html", ".md", ".yml", ".txt", ".css", ".js", ".xml", ".ico", ".conf", ".svg", ".png", ".jpg", ".py", ".sh"] +def get_pr_for_commit(sha, ref): + try_get_pr_url = f"https://api.github.com/repos/{os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse')}/commits/{sha}/pulls" + try: + response = requests.get(try_get_pr_url) + response.raise_for_status() + data = response.json() + if len(data) > 1: + print("Got more than one pr for commit", sha) + for pr in data: + # refs for pushes looks like refs/head/XX + # refs for RPs looks like XX + if pr['head']['ref'] in ref: + return pr + print ("Cannot find PR with required ref", ref, "returning first one") + first_pr = data[0] + return first_pr + except Exception as ex: + print("Cannot fetch PR info from commit", ex) + return None + class PRInfo: def __init__(self, github_event, need_orgs=False, need_changed_files=False): if 'pull_request' in github_event: # pull request and other similar events @@ -46,22 +66,32 @@ def __init__(self, github_event, need_orgs=False, need_changed_files=False): self.changed_files = { f.path for f in diff_object } elif 'commits' in github_event: - self.number = 0 self.sha = github_event['after'] - self.labels = {} - self.repo_full_name = os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse') + pull_request = get_pr_for_commit(self.sha, github_event['ref']) repo_prefix = f"{os.getenv('GITHUB_SERVER_URL', 'https://github.com')}/{os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse')}" self.task_url = f"{repo_prefix}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" - self.pr_html_url = f"{repo_prefix}/commits/master" - self.base_ref = "master" - self.base_name = self.repo_full_name - self.head_ref = "master" - self.head_name = self.repo_full_name + self.repo_full_name = os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse') + if pull_request is None or pull_request['state'] == 'closed': # it's merged PR to master + self.number = 0 + self.labels = {} + self.pr_html_url = f"{repo_prefix}/commits/master" + self.base_ref = "master" + self.base_name = self.repo_full_name + self.head_ref = "master" + self.head_name = self.repo_full_name + else: + self.number = pull_request['number'] + self.labels = { l['name'] for l in pull_request['labels'] } + self.base_ref = pull_request['base']['ref'] + self.base_name = pull_request['base']['repo']['full_name'] + self.head_ref = pull_request['head']['ref'] + self.head_name = pull_request['head']['repo']['full_name'] + self.pr_html_url = pull_request['html_url'] if need_changed_files: commit_before = github_event['before'] - response = requests.get(f"{os.getenv('GITHUB_SERVER_URL')}/repos/{os.getenv('GITHUB_REPOSITORY')}/compare/{commit_before}...{self.sha}") + response = requests.get(f"https://api.github.com/repos/{os.getenv('GITHUB_REPOSITORY')}/compare/{commit_before}...{self.sha}") response.raise_for_status() diff = response.json() From 986187bd0208f6ad28db04ac4389b70de59d4997 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 23 Nov 2021 12:33:30 +0300 Subject: [PATCH 180/472] Fix images --- tests/ci/pr_info.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 1fdc34ccdf12..1a09646b01c4 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -30,6 +30,7 @@ def get_pr_for_commit(sha, ref): class PRInfo: def __init__(self, github_event, need_orgs=False, need_changed_files=False): + print("EVENT", github_event) if 'pull_request' in github_event: # pull request and other similar events self.number = github_event['number'] if 'after' in github_event: @@ -90,15 +91,21 @@ def __init__(self, github_event, need_orgs=False, need_changed_files=False): self.pr_html_url = pull_request['html_url'] if need_changed_files: - commit_before = github_event['before'] - response = requests.get(f"https://api.github.com/repos/{os.getenv('GITHUB_REPOSITORY')}/compare/{commit_before}...{self.sha}") - response.raise_for_status() - diff = response.json() - - if 'files' in diff: - self.changed_files = [f['filename'] for f in diff['files']] + if self.number == 0: + commit_before = github_event['before'] + response = requests.get(f"https://api.github.com/repos/{os.getenv('GITHUB_REPOSITORY')}/compare/{commit_before}...{self.sha}") + response.raise_for_status() + diff = response.json() + + if 'files' in diff: + self.changed_files = [f['filename'] for f in diff['files']] + else: + self.changed_files = set([]) else: - self.changed_files = set([]) + diff_url = pull_request['diff_url'] + diff = urllib.request.urlopen(diff_url) + diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) + self.changed_files = { f.path for f in diff_object } else: self.changed_files = set([]) else: From f0ce5f362c899d6cceb3940b6bcd96f865876050 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 23 Nov 2021 02:39:21 +0300 Subject: [PATCH 181/472] Merge pull request #31637 from pkit/fix_kerberized_hadoop fix kerberized_hadoop image (cherry picked from commit 2a7ceca14af807050923eb01716aee769069f3ba) --- docker/images.json | 4 ---- .../integration/kerberized_hadoop/Dockerfile | 17 ++++++++--------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/docker/images.json b/docker/images.json index e2e224685969..c6b03fe2de50 100644 --- a/docker/images.json +++ b/docker/images.json @@ -13,10 +13,6 @@ "docker/test/codebrowser" ] }, - "docker/packager/unbundled": { - "name": "yandex/clickhouse-unbundled-builder", - "dependent": [] - }, "docker/test/compatibility/centos": { "name": "yandex/clickhouse-test-old-centos", "dependent": [] diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 5f256e350a6b..ccd5f38dec3e 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -2,18 +2,17 @@ FROM sequenceiq/hadoop-docker:2.7.0 -RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo && \ - sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo && \ - sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo - # https://community.letsencrypt.org/t/rhel-centos-6-openssl-client-compatibility-after-dst-root-ca-x3-expiration/161032/81 RUN sed -i s/xMDkzMDE0MDExNVow/0MDkzMDE4MTQwM1ow/ /etc/pki/tls/certs/ca-bundle.crt -RUN yum clean all && \ - rpm --rebuilddb && \ - yum -y update && \ - yum -y install yum-plugin-ovl && \ - yum --quiet -y install krb5-workstation.x86_64 + +RUN curl -o krb5-libs-1.10.3-65.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault.centos.org/6.10/os/x86_64/Packages/krb5-libs-1.10.3-65.el6.x86_64.rpm && \ + curl -o krb5-workstation-1.10.3-65.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault.centos.org/6.9/os/x86_64/Packages/krb5-workstation-1.10.3-65.el6.x86_64.rpm && \ + curl -o libkadm5-1.10.3-65.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault.centos.org/6.10/os/x86_64/Packages/libkadm5-1.10.3-65.el6.x86_64.rpm && \ + curl -o libss-1.41.12-24.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault.centos.org/6.9/cr/x86_64/Packages/libss-1.41.12-24.el6.x86_64.rpm && \ + curl -o libcom_err-1.41.12-24.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault.centos.org/6.9/cr/x86_64/Packages/libcom_err-1.41.12-24.el6.x86_64.rpm && \ + rpm -Uvh libkadm5-1.10.3-65.el6.x86_64.rpm libss-1.41.12-24.el6.x86_64.rpm krb5-libs-1.10.3-65.el6.x86_64.rpm krb5-workstation-1.10.3-65.el6.x86_64.rpm libcom_err-1.41.12-24.el6.x86_64.rpm && \ + rm -fr *.rpm RUN cd /tmp && \ curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ From 2a42a5be316d7a2fa3563adae99d7442031fc3b9 Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Wed, 24 Nov 2021 10:30:43 +0800 Subject: [PATCH 182/472] add jni --- .../Formats/Impl/ParquetBlockInputFormat.cpp | 4 + .../Formats/Impl/ParquetBlockInputFormat.h | 2 + .../Builder/SerializedFunctionBuilder.cpp | 103 +++++++ .../Builder/SerializedFunctionBuilder.h | 34 +++ .../Builder/SerializedPlanBuilder.cpp | 95 +++++++ .../Builder/SerializedPlanBuilder.h | 39 ++- utils/local-engine/CMakeLists.txt | 98 ++++++- utils/local-engine/Parser/CMakeLists.txt | 0 .../Parser/SerializedPlanParser.cpp | 145 ++++++++++ .../Parser/SerializedPlanParser.h | 73 +++++ utils/local-engine/java/pom.xml | 144 ++++++++++ .../java/io/kyligence/jni/engine/Chunk.java | 53 ++++ .../io/kyligence/jni/engine/LocalEngine.java | 13 + utils/local-engine/local_engine.cpp | 166 ++++++----- utils/local-engine/local_engine_jni.cpp | 263 ++++++++++++++++++ 15 files changed, 1132 insertions(+), 100 deletions(-) create mode 100644 utils/local-engine/Builder/SerializedFunctionBuilder.cpp create mode 100644 utils/local-engine/Builder/SerializedFunctionBuilder.h create mode 100644 utils/local-engine/Parser/CMakeLists.txt create mode 100644 utils/local-engine/Parser/SerializedPlanParser.cpp create mode 100644 utils/local-engine/Parser/SerializedPlanParser.h create mode 100644 utils/local-engine/java/pom.xml create mode 100644 utils/local-engine/java/src/main/java/io/kyligence/jni/engine/Chunk.java create mode 100644 utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java create mode 100644 utils/local-engine/local_engine_jni.cpp diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 07a0e15cb6b4..e82c24736d93 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -115,6 +115,10 @@ void ParquetBlockInputFormat::prepareReader() index += indexes_count; } } +ProcessorPtr ParquetBlockInputFormat::getParquetFormat(ReadBuffer & in_, Block header_) +{ + return std::make_shared(in_, header_); +} void registerInputFormatProcessorParquet(FormatFactory &factory) { diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index b68f97c005a1..bd67214a0a90 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -17,6 +17,8 @@ class ArrowColumnToCHColumn; class ParquetBlockInputFormat : public IInputFormat { public: + static ProcessorPtr getParquetFormat(ReadBuffer & in_, Block header_); + ParquetBlockInputFormat(ReadBuffer & in_, Block header_); void resetParser() override; diff --git a/utils/local-engine/Builder/SerializedFunctionBuilder.cpp b/utils/local-engine/Builder/SerializedFunctionBuilder.cpp new file mode 100644 index 000000000000..185ac4cc0ece --- /dev/null +++ b/utils/local-engine/Builder/SerializedFunctionBuilder.cpp @@ -0,0 +1,103 @@ +#include "SerializedFunctionBuilder.h" +dbms::SerializedScalarFunctionBuilder::SerializedScalarFunctionBuilder( + int functionId, + const DB::NamesAndTypesList & args, + const std::string & func_name, + bool is_deterministic, + const DB::DataTypePtr & outputType) + : function_id(functionId), arguments(args), name(func_name), deterministic(is_deterministic), output_type(outputType) +{ +} +std::unique_ptr dbms::SerializedScalarFunctionBuilder::build() +{ + this->function = std::make_unique(); + function->mutable_name()->Add(std::move(this->name)); + function->mutable_id()->set_id(this->function_id); + function->set_deterministic(this->deterministic); + convertDataTypeToDerivationExpression(function->mutable_output_type(), this->output_type); + function->mutable_normal(); + for (const auto &arg : this->arguments) { + auto *s_arg = function->mutable_arguments()->Add(); + convertNameAndTypeToArgument(s_arg, arg); + } + return std::move(function); +} +void dbms::convertDataTypeToDerivationExpression(io::substrait::DerivationExpression * expression, DB::DataTypePtr type) +{ + DB::WhichDataType which(type); + if (which.isDate()) + { + auto * date = expression->mutable_date(); + date->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isInt32()) + { + auto * int_32 = expression->mutable_i32(); + int_32->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isInt64()) + { + auto * int_64 = expression->mutable_i64(); + int_64->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isFloat32()) + { + auto * float_32 = expression->mutable_fp32(); + float_32->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isFloat64()) + { + auto * float_64 = expression->mutable_fp64(); + float_64->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isInt8()) + { + auto * boolean = expression->mutable_bool_(); + boolean->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else + { + throw std::runtime_error("unsupported data type " + std::string(type->getFamilyName())); + } +} + +void dbms::convertNameAndTypeToArgument(io::substrait::FunctionSignature_Argument *argument, DB::NameAndTypePair arg) +{ + argument->set_name(arg.name); + DB::WhichDataType which(arg.type); + auto * p_type = argument->mutable_type()->mutable_type(); + if (which.isDate()) + { + auto * date = p_type->mutable_date(); + date->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isInt32()) + { + auto * int_32 = p_type->mutable_i32(); + int_32->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isInt64()) + { + auto * int_64 = p_type->mutable_i64(); + int_64->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isFloat32()) + { + auto * float_32 = p_type->mutable_fp32(); + float_32->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isFloat64()) + { + auto * float_64 = p_type->mutable_fp64(); + float_64->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else if (which.isInt8()) + { + auto * boolean = p_type->mutable_bool_(); + boolean->set_nullability(io::substrait::Type_Nullability_REQUIRED); + } + else + { + throw std::runtime_error("unsupported data type " + std::string(arg.type->getFamilyName())); + } +} diff --git a/utils/local-engine/Builder/SerializedFunctionBuilder.h b/utils/local-engine/Builder/SerializedFunctionBuilder.h new file mode 100644 index 000000000000..c260c47778a1 --- /dev/null +++ b/utils/local-engine/Builder/SerializedFunctionBuilder.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include + +namespace dbms +{ + +void convertDataTypeToDerivationExpression(io::substrait::DerivationExpression* expression, DB::DataTypePtr type); + +void convertNameAndTypeToArgument(io::substrait::FunctionSignature_Argument* argument, DB::NameAndTypePair args); + +class SerializedFunctionBuilder +{ +}; + +class SerializedScalarFunctionBuilder +{ +public: + SerializedScalarFunctionBuilder( + int functionId, + const DB::NamesAndTypesList & args, + const std::string & func_name, + bool is_deterministic, + const DB::DataTypePtr & outputType); + std::unique_ptr build(); +private: + int function_id; + DB::NamesAndTypesList arguments; + std::string name; + bool deterministic; + DB::DataTypePtr output_type; + std::unique_ptr function; +}; +} diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp index 11cd00611936..742fa8548159 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.cpp +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -1 +1,96 @@ #include "SerializedPlanBuilder.h" + +namespace dbms +{ +std::unique_ptr SerializedSchemaBuilder::build() +{ + for (const auto & [name, type] : this->type_map) + { + this->schema->add_names(name); + auto *type_struct = this->schema->mutable_struct_(); + if (type == "I8") + { + auto *t = type_struct->mutable_types()->Add(); + t->mutable_i8()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + } + else if (type == "I32") + { + auto *t = type_struct->mutable_types()->Add(); + t->mutable_i32()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + } + else if (type == "Boolean") + { + auto *t = type_struct->mutable_types()->Add(); + t->mutable_bool_()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + } + else if (type == "I16") + { + auto *t = type_struct->mutable_types()->Add(); + t->mutable_i16()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + } + else if (type == "String") + { + auto *t = type_struct->mutable_types()->Add(); + t->mutable_string()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + } + else { + throw "doesn't support type "+ type; + } + } + return std::move(this->schema); +} +SerializedSchemaBuilder & SerializedSchemaBuilder::column(std::string name, std::string type, bool nullable) +{ + this->type_map.emplace(name, type); + this->nullability_map.emplace(name, nullable); + return *this; +} +SerializedSchemaBuilder::SerializedSchemaBuilder():schema(std::make_unique()) +{ +} +SerializedPlanBuilder & SerializedPlanBuilder::filter(std::string lhs, CompareOperator compareOperator, int value) +{ + this->filters.push_back(std::make_tuple(lhs, compareOperator, value)); + return *this; +} +SerializedPlanBuilder & SerializedPlanBuilder::files(std::string path, SchemaPtr schema) +{ + this->source = path; + this->data_schema = std::move(schema); + return *this; +} +std::unique_ptr SerializedPlanBuilder::build() +{ +// for (const auto & [lhs, compareOperator, value] : this->filters) +// { +// auto filter_rel = std::make_shared(); +// auto *function = filter_rel->mutable_condition()->mutable_scalar_function(); +// function->mutable_id()->set_id(1); +// auto *args = function->mutable_args(); +// +// auto arg1 = io::substrait::Expression(); +// arg1.literal().i32(); +// args->Add(std::move(arg1)); +// +// auto arg2 = io::substrait::Expression();co +// arg2.literal().i8() +// } +// +// filter_rel->mutable_input()->set_allocated_read(read_rel.get()) + auto *rel = this->plan->mutable_relations()->Add(); + auto *read_rel = rel->mutable_read(); + auto *local_files = read_rel->mutable_local_files(); + auto *file = local_files->mutable_items()->Add(); + file->set_uri_path(this->source); + read_rel->mutable_base_schema()->CopyFrom(*this->data_schema); + return std::move(this->plan); +} +SerializedPlanBuilder::SerializedPlanBuilder():plan(std::make_unique()) +{ +} +} diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h index 545906a603d0..a45242703b3d 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.h +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -1,21 +1,37 @@ #pragma once -#include +#include namespace dbms { + +enum CompareOperator { + LESS, + EQUAL, + GREATER +}; +using SchemaPtr = std::unique_ptr; +using Filter = std::tuple; + class SerializedPlanBuilder { public: - SerializedPlanBuilder& filter(); - SerializedPlanBuilder& aggregate(); - SerializedPlanBuilder& project(); - io::substrait::Plan build(); -public: - static SerializedPlanBuilder& read(); + SerializedPlanBuilder(); + SerializedPlanBuilder& filter(std::string lhs, CompareOperator compareOperator, int value); + SerializedPlanBuilder& files(std::string path, SchemaPtr schema); +// SerializedPlanBuilder& aggregate(); +// SerializedPlanBuilder& project(); + std::unique_ptr build(); + + std::vector filters; + std::string source; + SchemaPtr data_schema; + std::unique_ptr plan; }; + +using Type = io::substrait::Type; /** * build a schema, need define column name and column. * 1. column name @@ -24,13 +40,12 @@ class SerializedPlanBuilder */ class SerializedSchemaBuilder { public: - io::substrait::Type_NamedStruct build(); - SerializedPlanBuilder& column(std::string name, std::string type, bool nullable = false); -public: - static SerializedSchemaBuilder& builder(); - + SerializedSchemaBuilder(); + std::unique_ptr build(); + SerializedSchemaBuilder& column(std::string name, std::string type, bool nullable = false); private: std::map type_map; std::map nullability_map; + std::unique_ptr schema; }; } diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index 40106276a68a..368e5d07e721 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -1,32 +1,100 @@ +set(ARROW_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src") +set(CLICKHOUSE_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/src") +set(USE_INTERNAL_PARQUET_LIBRARY 1) set(RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include") + +cmake_minimum_required(VERSION 3.11) +if(CMAKE_VERSION VERSION_LESS 3.11) + message(FATAL_ERROR "Building local engine JNI bindings requires CMake version >= 3.11") +endif() + +# Find java/jni +include(FindJava) +include(UseJava) +include(FindJNI) +message("JNI_INCLUDE_DIRS ${JNI_INCLUDE_DIRS}") +include_directories(${JNI_INCLUDE_DIRS}) + +get_filename_component(JAVA_MAIN_CLASS_PATH + ${PROJECT_SOURCE_DIR}/utils/local-engine/java/src/main/java + ABSOLUTE) +set(JNI_NATIVE_SOURCES + local_engine_jni.cpp) + +set(JAVA_MAIN_CLASSES + ${JAVA_MAIN_CLASS_PATH}/io/kyligence/jni/engine/LocalEngine.java + ${JAVA_MAIN_CLASS_PATH}/io/kyligence/jni/engine/Chunk.java + ) +# Create the jni header file (from the java class). +set(JNI_HEADERS_DIR ${PROJECT_SOURCE_DIR}/utils/local-engine/include) +file(MAKE_DIRECTORY ${JNI_HEADERS_DIR}) +if(${Java_VERSION_MAJOR} VERSION_GREATER_EQUAL "10" AND ${CMAKE_VERSION} VERSION_LESS "3.11.4") + # Java 10 and newer don't have javah, but the alternative GENERATE_NATIVE_HEADERS requires CMake 3.11.4 or newer + message(FATAL_ERROR "Detected Java 10 or newer (${Java_VERSION_STRING}), to build with CMake please upgrade CMake to 3.11.4 or newer") + +elseif(${CMAKE_VERSION} VERSION_LESS "3.11.4" OR (${Java_VERSION_MINOR} STREQUAL "7" AND ${Java_VERSION_MAJOR} STREQUAL "1")) + # Old CMake or Java 1.7 prepare the JAR... + message(FATAL_ERROR "Don't support old JDK ${Java_VERSION_STRING}") + +else () + # Java 1.8 or newer prepare the JAR... + message("Preparing Jar for JDK ${Java_VERSION_STRING}") + add_jar( + local_engine + SOURCES ${JAVA_MAIN_CLASSES} + GENERATE_NATIVE_HEADERS local_engine_headers DESTINATION ${JNI_HEADERS_DIR} + ) + message("generating headers to ${JNI_HEADERS_DIR}") +endif() + + function(add_cxx_compile_options option) add_compile_options("$<$,CXX>:${option}>") endfunction() add_cxx_compile_options(-Wzero-as-null-pointer-constant) -add_subdirectory(Substrait) -add_subdirectory(Builder) -add_headers_and_sources(builder Builder) -include_directories(${CMAKE_CURRENT_BINARY_DIR}) -add_executable (local_engine - local_engine.cpp - ${builder_headers} - ${builder_sources} - ) -target_include_directories(local_engine PRIVATE +#add_subdirectory(Substrait) +#add_subdirectory(Builder) +#add_headers_and_sources(builder Builder) +#add_headers_and_sources(parser Parser) +#include (../../cmake/find/parquet.cmake) +#include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${ClickHouse_SOURCE_DIR}/utils/local-engine) +#add_executable (local_engine +# local_engine.cpp +# ${builder_headers} +# ${builder_sources} +# ${parser_headers} +# ${parser_sources} +# ) +#target_include_directories(local_engine PRIVATE +# ${RAPIDJSON_INCLUDE_DIR} +# ${SUBSTRAIT_HEADERS} +# ${ARROW_INCLUDE_DIR} +# ) + +include_directories( ${RAPIDJSON_INCLUDE_DIR} ${SUBSTRAIT_HEADERS} + ${ARROW_INCLUDE_DIR} ) set (CLICKHOUSE_SERVER_LINK - PRIVATE - dbms + +# dbms clickhouse_aggregate_functions clickhouse_common_io clickhouse_functions - clickhouse_storages_system - substrait +# clickhouse_storages_system +# substrait ) -target_link_libraries(local_engine ${CLICKHOUSE_SERVER_LINK} ) +#target_link_libraries(local_engine ${CLICKHOUSE_SERVER_LINK} ) +#create_javah() + +set(LOCALENGINE_SHARED_LIB local_engine_jni) +add_library(${LOCALENGINE_SHARED_LIB} SHARED ${JNI_NATIVE_SOURCES}) +#add_executable(${LOCALENGINE_SHARED_LIB} ${JNI_NATIVE_SOURCES}) +add_dependencies(${LOCALENGINE_SHARED_LIB} local_engine_headers) +target_link_libraries(${LOCALENGINE_SHARED_LIB} ${CLICKHOUSE_SERVER_LINK} ) diff --git a/utils/local-engine/Parser/CMakeLists.txt b/utils/local-engine/Parser/CMakeLists.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp new file mode 100644 index 000000000000..ec4c4e0aa2fd --- /dev/null +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -0,0 +1,145 @@ +#include "SerializedPlanParser.h" +#include +#include +#include +#include +#include +#include +#include + +DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocalFile(const io::substrait::ReadRel& rel) +{ + assert(rel.has_local_files()); + assert(rel.has_base_schema()); + auto files_info = std::make_shared(); + for (const auto &item : rel.local_files().items()) + { + files_info->files.push_back(item.uri_path()); + } + return std::make_shared(files_info, parseNameStruct(rel.base_schema())); +} + +DB::Block dbms::SerializedPlanParser::parseNameStruct(const io::substrait::Type_NamedStruct & struct_) +{ + auto internal_cols = std::make_unique>(); + internal_cols->reserve(struct_.names_size()); + for (int i = 0; i < struct_.names_size(); ++i) + { + const auto& name = struct_.names(i); + const auto& type = struct_.struct_().types(i); + auto data_type = parseType(type); + internal_cols->push_back(DB::ColumnWithTypeAndName(data_type->createColumn(), data_type, name)); + } + return DB::Block(*std::move(internal_cols)); +} +DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type& type) +{ + auto & factory = DB::DataTypeFactory::instance(); + if (type.has_bool_() || type.has_i8()) + { + return factory.get("UInt8"); + } + else if (type.has_i16()) + { + return factory.get("UInt16"); + } + else if (type.has_i32()) + { + return factory.get("UInt32"); + } + else if (type.has_string()) + { + return factory.get("String"); + } + else + { + throw std::runtime_error("doesn't support type " + type.DebugString()); + } +} +DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptr plan) +{ + auto query_plan = std::make_unique(); + if (plan->relations().Capacity() == 1) + { + auto rel = plan->relations().at(0); + if (rel.has_read()) { + std::shared_ptr source = std::dynamic_pointer_cast(SerializedPlanParser::parseReadRealWithLocalFile(rel.read())); + auto source_step = std::make_unique(Pipe(source), "Parquet"); + query_plan->addStep(std::move(source_step)); + } + else + { + throw std::runtime_error("unsupported relation"); + } + } + else + { + throw std::runtime_error("unsupported relation"); + } + return query_plan; +} +DB::Chunk DB::BatchParquetFileSource::generate() +{ + while (!finished_generate) + { + /// Open file lazily on first read. This is needed to avoid too many open files from different streams. + if (!reader) + { + auto current_file = files_info->next_file_to_read.fetch_add(1); + if (current_file >= files_info->files.size()) + return {}; + + current_path = files_info->files[current_file]; + std::unique_ptr nested_buffer; + + struct stat file_stat{}; + + /// Check if file descriptor allows random reads (and reading it twice). + if (0 != stat(current_path.c_str(), &file_stat)) + throw std::runtime_error("Cannot stat file " + current_path); + + if (S_ISREG(file_stat.st_mode)) + nested_buffer = std::make_unique(current_path); + else + nested_buffer = std::make_unique(current_path); + + + read_buf = std::move(nested_buffer); + auto format = DB::ParquetBlockInputFormat::getParquetFormat(*read_buf, header); + + pipeline = std::make_unique(); + pipeline->init(Pipe(format)); + + reader = std::make_unique(*pipeline); + } + + Chunk chunk; + if (reader->pull(chunk)) + { + return chunk; + } + + finished_generate = true; + + /// Close file prematurely if stream was ended. + reader.reset(); + pipeline.reset(); + read_buf.reset(); + } + + return {}; +} +DB::BatchParquetFileSource::BatchParquetFileSource( + FilesInfoPtr files, const DB::Block & sample) + : SourceWithProgress(sample), files_info(files), header(sample) +{ +} +void dbms::LocalExecutor::execute(DB::QueryPlanPtr query_plan) +{ + QueryPlanOptimizationSettings optimization_settings{.optimize_plan = false}; + auto query_pipeline = query_plan->buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); + auto executor = DB::PullingPipelineExecutor(*query_pipeline); + DB::Chunk chunk; + // TODO pull chunk +} + diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h new file mode 100644 index 000000000000..603a2cf926db --- /dev/null +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -0,0 +1,73 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +struct FilesInfo +{ + std::vector files; + std::atomic next_file_to_read = 0; +}; +using FilesInfoPtr = std::shared_ptr; + +class BatchParquetFileSource : DB::SourceWithProgress +{ +public: + BatchParquetFileSource(FilesInfoPtr files, const Block & header); + +private: + String getName() const override + { + return "BatchParquetFileSource"; + } + +protected: + Chunk generate() override; + +private: + FilesInfoPtr files_info; + std::unique_ptr read_buf; + std::unique_ptr pipeline; + std::unique_ptr reader; + bool finished_generate = false; + std::string current_path; + Block header; +}; + +using BatchParquetFileSourcePtr = std::shared_ptr; +} + + +namespace dbms +{ +using namespace DB; + + +class SerializedPlanParser +{ +public: + static DB::QueryPlanPtr parse(std::unique_ptr plan); + static DB::BatchParquetFileSourcePtr parseReadRealWithLocalFile(const io::substrait::ReadRel& rel); + static DB::Block parseNameStruct(const io::substrait::Type_NamedStruct& struct_); + static DB::DataTypePtr parseType(const io::substrait::Type& type); +}; + + +class LocalExecutor +{ + static void execute(QueryPlanPtr query_plan); +}; +} + + diff --git a/utils/local-engine/java/pom.xml b/utils/local-engine/java/pom.xml new file mode 100644 index 000000000000..7680287e1e09 --- /dev/null +++ b/utils/local-engine/java/pom.xml @@ -0,0 +1,144 @@ + + + io.kylingence.jni + 0.1.0-SNAPSHOT + 4.0.0 + local-engine + ClickHouse Local Engine + + + 1.8 + 1.8 + 1.8 + UTF-8 + 1.7.25 + 2.4.4 + 2.11.12 + 2.11 + 2.6.5 + compile + 5.6.1 + 1.6.1 + 2.6.5 + 0.9.2-kylin-r3 + 20.0 + + + + + + + + + net.alchim31.maven + scala-maven-plugin + 3.2.2 + + + + add-source + + + + scala-compile-first + process-resources + + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + + org.scalatest + scalatest-maven-plugin + 2.0.0 + + ${project.build.directory}/surefire-reports + + . + WDF TestSuite.txt + + + + test + + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.18.1 + + + false + true + + **/*Test.* + **/*Suite.* + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.2.0 + + + org.apache.maven.plugins + maven-compiler-plugin + 3.5.1 + + ${maven.compiler.source} + ${maven.compiler.target} + + + + maven-clean-plugin + 2.5 + + + maven-deploy-plugin + 2.8.1 + + + maven-install-plugin + 2.5.1 + + + maven-jar-plugin + 2.4 + + + maven-javadoc-plugin + 2.9.1 + + + maven-resources-plugin + 2.6 + + + maven-site-plugin + 3.3 + + + maven-source-plugin + 2.2.1 + + + + + diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/Chunk.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/Chunk.java new file mode 100644 index 000000000000..4f9d5deae74b --- /dev/null +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/Chunk.java @@ -0,0 +1,53 @@ +package io.kyligence.jni.engine; + +public class Chunk { + public static class ColumnInfo { + // The data stored in these two allocations need to maintain binary compatible. We can + // directly pass this buffer to external components. + private long nulls; + private long data; + + // Only set if type is Array or Map. + private long lengthData; + private long offsetData; + + public ColumnInfo(long nulls, long data, long lengthData, long offsetData) { + this.nulls = nulls; + this.data = data; + this.lengthData = lengthData; + this.offsetData = offsetData; + } + + public long getNulls() { + return nulls; + } + + public long getData() { + return data; + } + + public long getLengthData() { + return lengthData; + } + + public long getOffsetData() { + return offsetData; + } + } + + private final ColumnInfo[] columns; + private final long rowCount; + + public Chunk(ColumnInfo[] columns, long rowCount) { + this.columns = columns; + this.rowCount = rowCount; + } + + public ColumnInfo[] getColumns() { + return columns; + } + + public long getRowCount() { + return rowCount; + } +} diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java new file mode 100644 index 000000000000..ea9bf1b07fc5 --- /dev/null +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java @@ -0,0 +1,13 @@ +package io.kyligence.jni.engine; + +public class LocalEngine { + public static native long test(int a, int b); + + public static void main(String[] args) throws InterruptedException { + System.out.println("start load library"); + System.load("/Users/neng.liu/Documents/GitHub/ClickHouse/cmake-build-debug/utils/local-engine/liblocal_engine_jnid.dylib"); + System.out.println("start in java"); + long result = test(1, 2); + System.out.println(result); + } +} diff --git a/utils/local-engine/local_engine.cpp b/utils/local-engine/local_engine.cpp index 125881a355ee..8035ad51d3f9 100644 --- a/utils/local-engine/local_engine.cpp +++ b/utils/local-engine/local_engine.cpp @@ -1,38 +1,38 @@ -#include "Poco/Logger.h" +#include +#include #include #include -#include -#include #include #include -#include -#include +#include +#include +#include "Poco/Logger.h" +#include +#include #include +#include #include #include -#include -#include #include -#include +#include +#include #include -#include -#include -#include -#include -#include #include -#include +#include +#include +#include #include -#include +#include +#include #include #include +#include #include #include #include -#include -#include +#include using namespace DB; using namespace rapidjson; @@ -61,9 +61,10 @@ Block getTableHeader(std::map & cols) return Block(*internalCols); } -std::shared_ptr getSource(ReadBuffer & buf, Block &header) { +std::shared_ptr getSource(ReadBuffer & buf, Block & header) +{ FormatSettings settings; - return std::make_shared(header, buf, RowInputFormatParams{.max_block_size=100}, false, settings); + return std::make_shared(header, buf, RowInputFormatParams{.max_block_size = 100}, false, settings); } @@ -91,23 +92,25 @@ void registerAllFunctions() FunctionOverloadResolverPtr getFunction(const std::string & name, ContextPtr context) { - auto & factory = FunctionFactory::instance(); return factory.get(name, context); } -AggregateFunctionPtr getAggregateFunction(const std::string & name, DataTypes arg_types) { +AggregateFunctionPtr getAggregateFunction(const std::string & name, DataTypes arg_types) +{ auto & factory = AggregateFunctionFactory::instance(); AggregateFunctionProperties properties; return factory.get(name, arg_types, Array{}, properties); } -ActionsDAG::NodeRawConstPtrs getArguments(ActionsDAG::NodeRawConstPtrs nodes, std::vector& args) { +ActionsDAG::NodeRawConstPtrs getArguments(ActionsDAG::NodeRawConstPtrs nodes, std::vector & args) +{ ActionsDAG::NodeRawConstPtrs result; result.reserve(args.size()); - for (const auto &item : nodes) + for (const auto & item : nodes) { - if (std::find(args.begin(), args.end(), item->result_name) != args.end()) { + if (std::find(args.begin(), args.end(), item->result_name) != args.end()) + { result.emplace_back(item); } } @@ -117,7 +120,7 @@ ActionsDAG::NodeRawConstPtrs getArguments(ActionsDAG::NodeRawConstPtrs nodes, st NamesAndTypesList blockToNameAndTypeList(Block & header) { NamesAndTypesList types; - for (const auto &name : header.getNames()) + for (const auto & name : header.getNames()) { auto column = header.findByName(name); types.push_back(NameAndTypePair(column->name, column->type)); @@ -128,19 +131,20 @@ NamesAndTypesList blockToNameAndTypeList(Block & header) QueryPlanStepPtr buildFilter(Block & header, ContextPtr context) { auto actions_dag = std::make_shared(std::move(blockToNameAndTypeList(header))); -// auto int_type = std::make_shared(); -// auto const_node = actions_dag->addInput(ColumnWithTypeAndName(int_type->createColumnConst(1, 4), int_type, "_1")); -// actions_dag->addOrReplaceInIndex(const_node); + // auto int_type = std::make_shared(); + // auto const_node = actions_dag->addInput(ColumnWithTypeAndName(int_type->createColumnConst(1, 4), int_type, "_1")); + // actions_dag->addOrReplaceInIndex(const_node); std::string empty_string; std::vector args = {"x1", "x2"}; - const auto & filter_node = actions_dag->addFunction(std::move(getFunction("less", context)), getArguments(actions_dag->getIndex(), args), std::move(empty_string)); + const auto & filter_node = actions_dag->addFunction( + std::move(getFunction("less", context)), getArguments(actions_dag->getIndex(), args), std::move(empty_string)); actions_dag->getIndex().push_back(&filter_node); - DataStream input_stream = DataStream{.header=header}; + DataStream input_stream = DataStream{.header = header}; auto filter = std::make_unique(input_stream, actions_dag, std::move(filter_node.result_name), true); return std::move(filter); } -void buildAgg(Block & header, QueryPlan& query_plan, ContextPtr context) +void buildAgg(Block & header, QueryPlan & query_plan, ContextPtr context) { auto aggregates = AggregateDescriptions(); auto count = AggregateDescription(); @@ -190,50 +194,66 @@ void buildAgg(Block & header, QueryPlan& query_plan, ContextPtr context) query_plan.addStep(std::move(aggregating_step)); } -int main(int, char **) +void generateSubStraitPlan() { - auto plan = io::substrait::Plan(); - plan.add_relations()->read(); - auto table = plan.mutable_relations(0); - auto local_files = table->mutable_read()->mutable_local_files(); - auto file = io::substrait::ReadRel_LocalFiles_FileOrFiles(); - file.set_uri_path("test.txt"); - local_files->mutable_items()->Add(std::move(file)); - std::cout << plan.SerializeAsString(); + dbms::SerializedSchemaBuilder schema_builder; + auto schema = schema_builder.column("x1", "I8").column("x2", "String").build(); + std::cout << schema->SerializeAsString(); + dbms::SerializedPlanBuilder plan_builder; + auto plan = plan_builder.files("/test/test.csv", std::move(schema)).build(); + std::cout << plan->SerializeAsString(); + std::ofstream output; + output.open("/Users/neng.liu/Documents/GitHub/ClickHouse/plan.txt", std::fstream::in | std::fstream::out | std::fstream::trunc); + // output << plan->SerializeAsString(); + plan->SerializeToOstream(&output); + output.flush(); + output.close(); +} -// auto shared_context = Context::createShared(); -// auto global_context = Context::createGlobal(shared_context.get()); -// registerAllFunctions(); -// auto & factory = FunctionFactory::instance(); -// std::ifstream ifs("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.json"); -// IStreamWrapper isw(ifs); -// -// Document d; -// d.ParseStream(isw); -// auto cols = getColumns(d); -// auto header = getTableHeader(*cols); -// -// QueryPlan query_plan; -// auto file = "/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.csv"; -// auto buf = std::make_unique(file); -// -// auto source = getSource(*buf, header); -// -// std::unique_ptr query_pipelines = std::make_unique(); -// auto source_step = std::make_unique(Pipe(source), "CSV"); -// query_plan.addStep(std::move(source_step)); -// -// auto filter = buildFilter(header, global_context); -// query_plan.addStep(std::move(filter)); -// buildAgg(header, query_plan, global_context); -// QueryPlanOptimizationSettings optimization_settings{.optimize_plan=false}; -// auto query_pipline = query_plan.buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); -// -// auto buffer = WriteBufferFromFile("/Users/neng.liu/Documents/GitHub/ClickHouse/output.txt"); -// auto output = std::make_shared(buffer, query_pipline->getHeader(), true, RowOutputFormatParams(), FormatSettings()); -// query_pipline->setOutputFormat(output); -// auto executor = query_pipline->execute(); -// executor->execute(1); +void runSamplePipeline() +{ + auto shared_context = Context::createShared(); + auto global_context = Context::createGlobal(shared_context.get()); + registerAllFunctions(); + auto & factory = FunctionFactory::instance(); + std::ifstream ifs("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.json"); + IStreamWrapper isw(ifs); + + Document d; + d.ParseStream(isw); + auto cols = getColumns(d); + auto header = getTableHeader(*cols); + + QueryPlan query_plan; + auto file = "/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.csv"; + auto buf = std::make_unique(file); + + auto source = getSource(*buf, header); + + std::unique_ptr query_pipelines = std::make_unique(); + auto source_step = std::make_unique(Pipe(source), "CSV"); + query_plan.addStep(std::move(source_step)); + + auto filter = buildFilter(header, global_context); + query_plan.addStep(std::move(filter)); + buildAgg(header, query_plan, global_context); + QueryPlanOptimizationSettings optimization_settings{.optimize_plan = false}; + auto query_pipline = query_plan.buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); + + auto buffer = WriteBufferFromFile("/Users/neng.liu/Documents/GitHub/ClickHouse/output.txt"); + auto output = std::make_shared(buffer, query_pipline->getHeader(), true, RowOutputFormatParams(), FormatSettings()); + query_pipline->setOutputFormat(output); + auto executor = query_pipline->execute(); + executor->execute(1); +} + +void generateFunctions() { +} + +int main(int, char **) +{ +// generateSubStraitPlan(); + runSamplePipeline(); } // auto col = ColumnUInt8::create(1, 1); diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp new file mode 100644 index 000000000000..d354ebc562c7 --- /dev/null +++ b/utils/local-engine/local_engine_jni.cpp @@ -0,0 +1,263 @@ +#include "include/io_kyligence_jni_engine_LocalEngine.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +//#include + +using namespace DB; +using namespace rapidjson; + +/** + * SQL example:++ + * + * SELECT min(x1),max(x2),sum(x3),count(x4),avg(x5) FROM table1 WHERE x6=* GROUP BY x7 + * + * table defination + * SQL columns: + * project + * filter + * aggregate + */ +Block getTableHeader(std::map & cols) +{ + auto internalCols = std::make_shared>(); + internalCols->reserve(cols.size()); + for (const auto & [key, value] : cols) + { + ColumnWithTypeAndName col; + auto & data_type_factory = DataTypeFactory::instance(); + auto type = data_type_factory.get(value); + internalCols->push_back(ColumnWithTypeAndName(type->createColumn(), type, key)); + } + return Block(*internalCols); +} + +std::shared_ptr getSource(ReadBuffer & buf, Block & header) +{ + FormatSettings settings; + return std::make_shared(header, buf, RowInputFormatParams{.max_block_size = 100}, false, settings); +} + + +std::shared_ptr> getColumns(Document & config) +{ + auto columns = std::make_shared>(); + auto cols = config["columns"].GetArray(); + for (auto * it = cols.Begin(); it != cols.End(); it++) + { + auto col = it->GetObject(); + if (columns->contains(col["name"].GetString())) + { + throw std::logic_error("duplicate column"); + } + columns->emplace(col["name"].GetString(), col["type"].GetString()); + } + return columns; +} + +void registerAllFunctions() +{ + registerFunctions(); + registerAggregateFunctions(); +} + +FunctionOverloadResolverPtr getFunction(const std::string & name, ContextPtr context) +{ + auto & factory = FunctionFactory::instance(); + return factory.get(name, context); +} + +AggregateFunctionPtr getAggregateFunction(const std::string & name, DataTypes arg_types) +{ + auto & factory = AggregateFunctionFactory::instance(); + AggregateFunctionProperties properties; + return factory.get(name, arg_types, Array{}, properties); +} + +ActionsDAG::NodeRawConstPtrs getArguments(ActionsDAG::NodeRawConstPtrs nodes, std::vector & args) +{ + ActionsDAG::NodeRawConstPtrs result; + result.reserve(args.size()); + for (const auto & item : nodes) + { + if (std::find(args.begin(), args.end(), item->result_name) != args.end()) + { + result.emplace_back(item); + } + } + return result; +} + +NamesAndTypesList blockToNameAndTypeList(Block & header) +{ + NamesAndTypesList types; + for (const auto & name : header.getNames()) + { + auto column = header.findByName(name); + types.push_back(NameAndTypePair(column->name, column->type)); + } + return types; +} + +QueryPlanStepPtr buildFilter(Block & header, ContextPtr context) +{ + auto actions_dag = std::make_shared(std::move(blockToNameAndTypeList(header))); + // auto int_type = std::make_shared(); + // auto const_node = actions_dag->addInput(ColumnWithTypeAndName(int_type->createColumnConst(1, 4), int_type, "_1")); + // actions_dag->addOrReplaceInIndex(const_node); + std::string empty_string; + std::vector args = {"x1", "x2"}; + const auto & filter_node = actions_dag->addFunction( + std::move(getFunction("less", context)), getArguments(actions_dag->getIndex(), args), std::move(empty_string)); + actions_dag->getIndex().push_back(&filter_node); + DataStream input_stream = DataStream{.header = header}; + auto filter = std::make_unique(input_stream, actions_dag, std::move(filter_node.result_name), true); + return std::move(filter); +} + +void buildAgg(Block & header, QueryPlan & query_plan, ContextPtr context) +{ + auto aggregates = AggregateDescriptions(); + auto count = AggregateDescription(); + count.column_name = "count(x2)"; + count.arguments = ColumnNumbers{1}; + count.argument_names = Names{"x2"}; + auto int_type = std::make_shared(); + count.function = getAggregateFunction("count", {int_type}); + aggregates.push_back(count); + Settings settings; + Aggregator::Params params( + header, + ColumnNumbers{0}, + aggregates, + false, + settings.max_rows_to_group_by, + settings.group_by_overflow_mode, + settings.group_by_two_level_threshold, + settings.group_by_two_level_threshold_bytes, + settings.max_bytes_before_external_group_by, + settings.empty_result_for_aggregation_by_empty_set, + context->getTemporaryVolume(), + settings.max_threads, + settings.min_free_disk_space_for_temporary_data, + settings.compile_aggregate_expressions, + settings.min_count_to_compile_aggregate_expression); + + SortDescription group_by_sort_description; + + auto merge_threads = 1; + auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads + ? static_cast(settings.aggregation_memory_efficient_merge_threads) + : static_cast(settings.max_threads); + + + auto aggregating_step = std::make_unique( + query_plan.getCurrentDataStream(), + params, + true, + settings.max_block_size, + merge_threads, + temporary_data_merge_threads, + false, + nullptr, + std::move(group_by_sort_description)); + + query_plan.addStep(std::move(aggregating_step)); +} + +bool inside_main = false; + +void runSamplePipeline() +{ + auto shared_context = Context::createShared(); + auto global_context = Context::createGlobal(shared_context.get()); + registerAllFunctions(); + auto & factory = FunctionFactory::instance(); + std::ifstream ifs("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.json"); + IStreamWrapper isw(ifs); + + Document d; + d.ParseStream(isw); + auto cols = getColumns(d); + auto header = getTableHeader(*cols); + + QueryPlan query_plan; + auto file = "/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.csv"; + auto buf = std::make_unique(file); + + auto source = getSource(*buf, header); + + std::unique_ptr query_pipelines = std::make_unique(); + auto source_step = std::make_unique(Pipe(source), "CSV"); + query_plan.addStep(std::move(source_step)); + + auto filter = buildFilter(header, global_context); + query_plan.addStep(std::move(filter)); + buildAgg(header, query_plan, global_context); + QueryPlanOptimizationSettings optimization_settings{.optimize_plan = false}; + auto query_pipline = query_plan.buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); + + auto buffer = WriteBufferFromFile("/Users/neng.liu/Documents/GitHub/ClickHouse/output.txt"); + auto output = std::make_shared(buffer, query_pipline->getHeader(), true, RowOutputFormatParams(), FormatSettings()); + query_pipline->setOutputFormat(output); + auto executor = query_pipline->execute(); + executor->execute(1); +} + +int main(int, char **) +{ + inside_main = true; + runSamplePipeline(); + return 0; +} + +JNIEXPORT jlong JNICALL Java_io_kyligence_jni_engine_LocalEngine_test + (JNIEnv *env, jclass, jint a, jint b) +{ +// inside_main = true; +// std::cout << "start run pipeline." << std::endl; +// try +// { +// runSamplePipeline(); +// } +// catch (Poco::Exception e) +// { +// std::cout << e.message() << std::endl; +// std::cout << e.displayText() << std::endl; +// e.rethrow(); +// } +// std::cout << "run pipeline success." << std::endl; + std::cout < Date: Wed, 24 Nov 2021 07:08:41 +0000 Subject: [PATCH 183/472] run in ubuntu success --- utils/local-engine/CMakeLists.txt | 1 + utils/local-engine/Substrait/CMakeLists.txt | 2 +- .../io/kyligence/jni/engine/LocalEngine.java | 2 +- utils/local-engine/local_engine_jni.cpp | 32 +++++++++---------- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index 368e5d07e721..1d6173d8ea0e 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -95,6 +95,7 @@ set (CLICKHOUSE_SERVER_LINK set(LOCALENGINE_SHARED_LIB local_engine_jni) add_library(${LOCALENGINE_SHARED_LIB} SHARED ${JNI_NATIVE_SOURCES}) +set_property(TARGET ${LOCALENGINE_SHARED_LIB} PROPERTY POSITION_INDEPENDENT_CODE ON) #add_executable(${LOCALENGINE_SHARED_LIB} ${JNI_NATIVE_SOURCES}) add_dependencies(${LOCALENGINE_SHARED_LIB} local_engine_headers) target_link_libraries(${LOCALENGINE_SHARED_LIB} ${CLICKHOUSE_SERVER_LINK} ) diff --git a/utils/local-engine/Substrait/CMakeLists.txt b/utils/local-engine/Substrait/CMakeLists.txt index 1b22913a8bae..cd49e02fe041 100644 --- a/utils/local-engine/Substrait/CMakeLists.txt +++ b/utils/local-engine/Substrait/CMakeLists.txt @@ -1,5 +1,5 @@ set(protobuf_generate_PROTOC_OUT_DIR "${ClickHouse_SOURCE_DIR}/utils/local-engine/Substrait") -file(GLOB PROTOBUF_DEFINITION_FILES "/Users/neng.liu/Documents/GitHub/substrait/binary/*.proto") +file(GLOB PROTOBUF_DEFINITION_FILES "/home/kyligence/Documents/code/substrait/binary/*.proto") include_directories(${Protobuf_INCLUDE_DIRS}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) PROTOBUF_GENERATE_CPP(SUBSTRAIT_SRCS SUBSTRAIT_HEADERS ${PROTOBUF_DEFINITION_FILES}) diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java index ea9bf1b07fc5..1a545cafd9b1 100644 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java @@ -5,7 +5,7 @@ public class LocalEngine { public static void main(String[] args) throws InterruptedException { System.out.println("start load library"); - System.load("/Users/neng.liu/Documents/GitHub/ClickHouse/cmake-build-debug/utils/local-engine/liblocal_engine_jnid.dylib"); + System.load("/home/kyligence/Documents/code/ClickHouse/cmake-build-debug/utils/local-engine/liblocal_engine_jnid.so"); System.out.println("start in java"); long result = test(1, 2); System.out.println(result); diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index d354ebc562c7..8fad2fb446db 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -204,7 +204,7 @@ void runSamplePipeline() auto global_context = Context::createGlobal(shared_context.get()); registerAllFunctions(); auto & factory = FunctionFactory::instance(); - std::ifstream ifs("/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.json"); + std::ifstream ifs("/home/kyligence/Documents/code/ClickHouse/utils/local-engine/table.json"); IStreamWrapper isw(ifs); Document d; @@ -213,7 +213,7 @@ void runSamplePipeline() auto header = getTableHeader(*cols); QueryPlan query_plan; - auto file = "/Users/neng.liu/Documents/GitHub/ClickHouse/utils/local-engine/table.csv"; + auto file = "/home/kyligence/Documents/code/ClickHouse/utils/local-engine/table.csv"; auto buf = std::make_unique(file); auto source = getSource(*buf, header); @@ -228,7 +228,7 @@ void runSamplePipeline() QueryPlanOptimizationSettings optimization_settings{.optimize_plan = false}; auto query_pipline = query_plan.buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); - auto buffer = WriteBufferFromFile("/Users/neng.liu/Documents/GitHub/ClickHouse/output.txt"); + auto buffer = WriteBufferFromFile("/home/kyligence/Documents/code/ClickHouse/output.txt"); auto output = std::make_shared(buffer, query_pipline->getHeader(), true, RowOutputFormatParams(), FormatSettings()); query_pipline->setOutputFormat(output); auto executor = query_pipline->execute(); @@ -245,19 +245,19 @@ int main(int, char **) JNIEXPORT jlong JNICALL Java_io_kyligence_jni_engine_LocalEngine_test (JNIEnv *env, jclass, jint a, jint b) { -// inside_main = true; -// std::cout << "start run pipeline." << std::endl; -// try -// { -// runSamplePipeline(); -// } -// catch (Poco::Exception e) -// { -// std::cout << e.message() << std::endl; -// std::cout << e.displayText() << std::endl; -// e.rethrow(); -// } -// std::cout << "run pipeline success." << std::endl; + inside_main = true; + std::cout << "start run pipeline." << std::endl; + try + { + runSamplePipeline(); + } + catch (Poco::Exception e) + { + std::cout << e.message() << std::endl; + std::cout << e.displayText() << std::endl; + e.rethrow(); + } + std::cout << "run pipeline success." << std::endl; std::cout < Date: Fri, 10 Sep 2021 10:24:21 +0300 Subject: [PATCH 184/472] Merge pull request #28656 from ClickHouse/move_docker_images_to_separate_repo Move docker images to a separate repo (cherry picked from commit 6243b64ff5f580c2a763700196ffe28a4bf5aa7d) (cherry picked from commit 28077e4a68d20658a0b0de0caf45b1b50d257f87) --- docker/images.json | 73 +++++++++---------- docker/packager/binary/Dockerfile | 2 +- docker/packager/deb/Dockerfile | 2 +- docker/packager/packager | 6 +- docker/packager/unbundled/Dockerfile | 4 +- docker/test/base/Dockerfile | 2 +- docker/test/codebrowser/Dockerfile | 6 +- docker/test/compatibility/centos/Dockerfile | 2 +- docker/test/compatibility/ubuntu/Dockerfile | 2 +- docker/test/coverage/Dockerfile | 4 +- docker/test/fasttest/Dockerfile | 2 +- docker/test/fuzzer/Dockerfile | 6 +- docker/test/integration/base/Dockerfile | 4 +- .../integration/helper_container/Dockerfile | 2 +- .../integration/kerberized_hadoop/Dockerfile | 2 +- .../test/integration/kerberos_kdc/Dockerfile | 6 +- .../mysql_golang_client/Dockerfile | 2 +- .../integration/mysql_java_client/Dockerfile | 2 +- .../integration/mysql_js_client/Dockerfile | 2 +- .../integration/mysql_php_client/Dockerfile | 2 +- .../postgresql_java_client/Dockerfile | 2 +- docker/test/integration/resolver/Dockerfile | 2 +- docker/test/integration/runner/Dockerfile | 2 +- .../compose/docker_compose_jdbc_bridge.yml | 2 +- .../runner/compose/docker_compose_keeper.yml | 6 +- .../docker_compose_kerberized_hdfs.yml | 4 +- .../docker_compose_kerberized_kafka.yml | 2 +- .../runner/compose/docker_compose_minio.yml | 6 +- .../docker_compose_mysql_golang_client.yml | 2 +- .../docker_compose_mysql_java_client.yml | 2 +- .../docker_compose_mysql_js_client.yml | 2 +- .../docker_compose_mysql_php_client.yml | 2 +- .../docker_compose_postgresql_java_client.yml | 2 +- docker/test/integration/s3_proxy/Dockerfile | 2 +- docker/test/keeper-jepsen/Dockerfile | 4 +- docker/test/performance-comparison/Dockerfile | 4 +- docker/test/performance-comparison/README.md | 2 +- docker/test/pvs/Dockerfile | 4 +- docker/test/split_build_smoke_test/Dockerfile | 4 +- docker/test/sqlancer/Dockerfile | 2 +- docker/test/stateful/Dockerfile | 4 +- docker/test/stateless/Dockerfile | 4 +- docker/test/stateless_pytest/Dockerfile | 4 +- docker/test/stateless_unbundled/Dockerfile | 4 +- docker/test/stress/Dockerfile | 4 +- docker/test/stress/README.md | 2 +- docker/test/style/Dockerfile | 2 +- docker/test/test_runner.sh | 2 +- docker/test/testflows/runner/Dockerfile | 2 +- docker/test/unit/Dockerfile | 4 +- tests/integration/README.md | 4 +- tests/integration/ci-runner.py | 12 +-- tests/integration/helpers/cluster.py | 6 +- tests/integration/helpers/network.py | 8 +- tests/integration/runner | 16 ++-- .../aes_encryption_env/clickhouse-service.yml | 2 +- .../clickhouse-service.yml | 2 +- .../example_env/clickhouse-service.yml | 2 +- .../clickhouse-service.yml | 2 +- .../kerberos_env/clickhouse-service.yml | 2 +- .../clickhouse-service.yml | 2 +- .../clickhouse-service.yml | 2 +- .../clickhouse-service.yml | 2 +- .../map_type_env/clickhouse-service.yml | 2 +- .../rbac/rbac_env/clickhouse-service.yml | 2 +- tests/testflows/runner | 2 +- .../clickhouse-service.yml | 2 +- 67 files changed, 143 insertions(+), 150 deletions(-) diff --git a/docker/images.json b/docker/images.json index c6b03fe2de50..2fc828e29b2e 100644 --- a/docker/images.json +++ b/docker/images.json @@ -1,12 +1,12 @@ { "docker/packager/deb": { - "name": "yandex/clickhouse-deb-builder", + "name": "clickhouse/deb-builder", "dependent": [ "docker/packager/unbundled" ] }, "docker/packager/binary": { - "name": "yandex/clickhouse-binary-builder", + "name": "clickhouse/binary-builder", "dependent": [ "docker/test/split_build_smoke_test", "docker/test/pvs", @@ -14,151 +14,146 @@ ] }, "docker/test/compatibility/centos": { - "name": "yandex/clickhouse-test-old-centos", + "name": "clickhouse/test-old-centos", "dependent": [] }, "docker/test/compatibility/ubuntu": { - "name": "yandex/clickhouse-test-old-ubuntu", + "name": "clickhouse/test-old-ubuntu", "dependent": [] }, "docker/test/integration/base": { - "name": "yandex/clickhouse-integration-test", + "name": "clickhouse/integration-test", "dependent": [] }, "docker/test/fuzzer": { - "name": "yandex/clickhouse-fuzzer", + "name": "clickhouse/fuzzer", "dependent": [] }, "docker/test/performance-comparison": { - "name": "yandex/clickhouse-performance-comparison", + "name": "clickhouse/performance-comparison", "dependent": [] }, "docker/test/pvs": { - "name": "yandex/clickhouse-pvs-test", + "name": "clickhouse/pvs-test", "dependent": [] }, "docker/test/stateless": { - "name": "yandex/clickhouse-stateless-test", + "name": "clickhouse/stateless-test", "dependent": [ "docker/test/stateful", "docker/test/coverage", "docker/test/unit" ] }, - "docker/test/stateless_pytest": { - "name": "yandex/clickhouse-stateless-pytest", - "dependent": [] - }, "docker/test/stateful": { - "name": "yandex/clickhouse-stateful-test", + "name": "clickhouse/stateful-test", "dependent": [ "docker/test/stress" ] }, "docker/test/coverage": { - "name": "yandex/clickhouse-test-coverage", + "name": "clickhouse/test-coverage", "dependent": [] }, "docker/test/unit": { - "name": "yandex/clickhouse-unit-test", + "name": "clickhouse/unit-test", "dependent": [] }, "docker/test/stress": { - "name": "yandex/clickhouse-stress-test", + "name": "clickhouse/stress-test", "dependent": [] }, "docker/test/split_build_smoke_test": { - "name": "yandex/clickhouse-split-build-smoke-test", + "name": "clickhouse/split-build-smoke-test", "dependent": [] }, "docker/test/codebrowser": { - "name": "yandex/clickhouse-codebrowser", + "name": "clickhouse/codebrowser", "dependent": [] }, "docker/test/integration/runner": { - "name": "yandex/clickhouse-integration-tests-runner", + "name": "clickhouse/integration-tests-runner", "dependent": [] }, "docker/test/testflows/runner": { - "name": "yandex/clickhouse-testflows-runner", + "name": "clickhouse/testflows-runner", "dependent": [] }, "docker/test/fasttest": { - "name": "yandex/clickhouse-fasttest", + "name": "clickhouse/fasttest", "dependent": [] }, "docker/test/style": { - "name": "yandex/clickhouse-style-test", + "name": "clickhouse/style-test", "dependent": [] }, "docker/test/integration/s3_proxy": { - "name": "yandex/clickhouse-s3-proxy", + "name": "clickhouse/s3-proxy", "dependent": [] }, "docker/test/integration/resolver": { - "name": "yandex/clickhouse-python-bottle", + "name": "clickhouse/python-bottle", "dependent": [] }, "docker/test/integration/helper_container": { - "name": "yandex/clickhouse-integration-helper", + "name": "clickhouse/integration-helper", "dependent": [] }, "docker/test/integration/mysql_golang_client": { - "name": "yandex/clickhouse-mysql-golang-client", + "name": "clickhouse/mysql-golang-client", "dependent": [] }, "docker/test/integration/mysql_java_client": { - "name": "yandex/clickhouse-mysql-java-client", + "name": "clickhouse/mysql-java-client", "dependent": [] }, "docker/test/integration/mysql_js_client": { - "name": "yandex/clickhouse-mysql-js-client", + "name": "clickhouse/mysql-js-client", "dependent": [] }, "docker/test/integration/mysql_php_client": { - "name": "yandex/clickhouse-mysql-php-client", + "name": "clickhouse/mysql-php-client", "dependent": [] }, "docker/test/integration/postgresql_java_client": { - "name": "yandex/clickhouse-postgresql-java-client", + "name": "clickhouse/postgresql-java-client", "dependent": [] }, "docker/test/integration/kerberos_kdc": { - "name": "yandex/clickhouse-kerberos-kdc", + "name": "clickhouse/kerberos-kdc", "dependent": [] }, "docker/test/base": { - "name": "yandex/clickhouse-test-base", + "name": "clickhouse/test-base", "dependent": [ "docker/test/stateless", "docker/test/stateless_unbundled", - "docker/test/stateless_pytest", "docker/test/integration/base", "docker/test/fuzzer", "docker/test/keeper-jepsen" ] }, "docker/packager/unbundled": { - "name": "yandex/clickhouse-unbundled-builder", + "name": "clickhouse/unbundled-builder", "dependent": [ "docker/test/stateless_unbundled" ] }, "docker/test/stateless_unbundled": { - "name": "yandex/clickhouse-stateless-unbundled-test", + "name": "clickhouse/stateless-unbundled-test", "dependent": [ ] }, "docker/test/integration/kerberized_hadoop": { - "name": "yandex/clickhouse-kerberized-hadoop", + "name": "clickhouse/kerberized-hadoop", "dependent": [] }, "docker/test/sqlancer": { - "name": "yandex/clickhouse-sqlancer-test", + "name": "clickhouse/sqlancer-test", "dependent": [] }, "docker/test/keeper-jepsen": { - "name": "yandex/clickhouse-keeper-jepsen-test", + "name": "clickhouse/keeper-jepsen-test", "dependent": [] } } diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 413ed4af3933..045d66162489 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-binary-builder . +# docker build -t clickhouse/binary-builder . FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 294c86454554..74f3a4635b30 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-deb-builder . +# docker build -t clickhouse/deb-builder . FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 diff --git a/docker/packager/packager b/docker/packager/packager index 95b7fcd85681..e337e7b8f51c 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -9,9 +9,9 @@ import sys SCRIPT_PATH = os.path.realpath(__file__) IMAGE_MAP = { - "deb": "yandex/clickhouse-deb-builder", - "binary": "yandex/clickhouse-binary-builder", - "unbundled": "yandex/clickhouse-unbundled-builder" + "deb": "clickhouse/deb-builder", + "binary": "clickhouse/binary-builder", + "unbundled": "clickhouse/unbundled-builder" } def check_image_exists_locally(image_name): diff --git a/docker/packager/unbundled/Dockerfile b/docker/packager/unbundled/Dockerfile index b2d9f555f193..3527c1057837 100644 --- a/docker/packager/unbundled/Dockerfile +++ b/docker/packager/unbundled/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-unbundled-builder . -FROM yandex/clickhouse-deb-builder +# docker build -t clickhouse/unbundled-builder . +FROM clickhouse/deb-builder RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ && wget -nv -O /tmp/arrow-keyring.deb "https://apache.jfrog.io/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-${CODENAME}.deb" \ diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 611ef6b7702f..fbbc902f6b62 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-test-base . +# docker build -t clickhouse/test-base . FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index 33173ab90f9d..6ca5c891388b 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -1,6 +1,6 @@ -# docker build --network=host -t yandex/clickhouse-codebrowser . -# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output yandex/clickhouse-codebrowser -FROM yandex/clickhouse-binary-builder +# docker build --network=host -t clickhouse/codebrowser . +# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output clickhouse/codebrowser +FROM clickhouse/binary-builder RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/test/compatibility/centos/Dockerfile b/docker/test/compatibility/centos/Dockerfile index 0ef119d1bb17..628609e374f6 100644 --- a/docker/test/compatibility/centos/Dockerfile +++ b/docker/test/compatibility/centos/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-test-old-centos . +# docker build -t clickhouse/test-old-centos . FROM centos:5 CMD /bin/sh -c "/clickhouse server --config /config/config.xml > /var/log/clickhouse-server/stderr.log 2>&1 & \ diff --git a/docker/test/compatibility/ubuntu/Dockerfile b/docker/test/compatibility/ubuntu/Dockerfile index 28f89e47b954..ddd0a76bd446 100644 --- a/docker/test/compatibility/ubuntu/Dockerfile +++ b/docker/test/compatibility/ubuntu/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-test-old-ubuntu . +# docker build -t clickhouse/test-old-ubuntu . FROM ubuntu:12.04 CMD /bin/sh -c "/clickhouse server --config /config/config.xml > /var/log/clickhouse-server/stderr.log 2>&1 & \ diff --git a/docker/test/coverage/Dockerfile b/docker/test/coverage/Dockerfile index 681f65e0f6f6..ccf0bbc7c83d 100644 --- a/docker/test/coverage/Dockerfile +++ b/docker/test/coverage/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-test-coverage . -FROM yandex/clickhouse-stateless-test +# docker build -t clickhouse/test-coverage . +FROM clickhouse/stateless-test RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 2e0bbcd350f4..ab62ceeb2c04 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-fasttest . +# docker build -t clickhouse/fasttest . FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index 18684145636b..892eb5ab5931 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-fuzzer . -FROM yandex/clickhouse-test-base +# docker build -t clickhouse/fuzzer . +FROM clickhouse/test-base ENV LANG=C.UTF-8 ENV TZ=Europe/Moscow @@ -32,5 +32,5 @@ CMD set -o pipefail \ && cd /workspace \ && /run-fuzzer.sh 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log -# docker run --network=host --volume :/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-fuzzer +# docker run --network=host --volume :/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/fuzzer diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 344c1b9a6981..519c64297e55 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-integration-test . -FROM yandex/clickhouse-test-base +# docker build -t clickhouse/integration-test . +FROM clickhouse/test-base SHELL ["/bin/bash", "-c"] diff --git a/docker/test/integration/helper_container/Dockerfile b/docker/test/integration/helper_container/Dockerfile index 922eb2c6f22e..6a093081bf2c 100644 --- a/docker/test/integration/helper_container/Dockerfile +++ b/docker/test/integration/helper_container/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-integration-helper . +# docker build -t clickhouse/integration-helper . # Helper docker container to run iptables without sudo FROM alpine diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index ccd5f38dec3e..025f4b27fde1 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-kerberized-hadoop . +# docker build -t clickhouse/kerberized-hadoop . FROM sequenceiq/hadoop-docker:2.7.0 diff --git a/docker/test/integration/kerberos_kdc/Dockerfile b/docker/test/integration/kerberos_kdc/Dockerfile index 7391e7df77cc..a203c33a3313 100644 --- a/docker/test/integration/kerberos_kdc/Dockerfile +++ b/docker/test/integration/kerberos_kdc/Dockerfile @@ -1,11 +1,9 @@ -# docker build -t yandex/clickhouse-kerberos-kdc . - +# docker build -t clickhouse/kerberos-kdc . FROM centos:6 -# old OS to make is faster and smaller RUN sed -i '/^mirrorlist/s/^/#/;/^#baseurl/{s/#//;s/mirror.centos.org\/centos\/$releasever/vault.centos.org\/6.10/}' /etc/yum.repos.d/*B* -RUN yum install -y krb5-server krb5-libs krb5-auth-dialog krb5-workstation +RUN yum install -y ca-certificates krb5-server krb5-libs krb5-auth-dialog krb5-workstation EXPOSE 88 749 diff --git a/docker/test/integration/mysql_golang_client/Dockerfile b/docker/test/integration/mysql_golang_client/Dockerfile index 767494fb5763..68b0aaab42c7 100644 --- a/docker/test/integration/mysql_golang_client/Dockerfile +++ b/docker/test/integration/mysql_golang_client/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-mysql-golang-client . +# docker build -t clickhouse/mysql-golang-client . # MySQL golang client docker container FROM golang:1.13 diff --git a/docker/test/integration/mysql_java_client/Dockerfile b/docker/test/integration/mysql_java_client/Dockerfile index fcb6a39f33b7..0abf50cd4937 100644 --- a/docker/test/integration/mysql_java_client/Dockerfile +++ b/docker/test/integration/mysql_java_client/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-mysql-java-client . +# docker build -t clickhouse/mysql-java-client . # MySQL Java client docker container FROM ubuntu:18.04 diff --git a/docker/test/integration/mysql_js_client/Dockerfile b/docker/test/integration/mysql_js_client/Dockerfile index 4f12de004acc..b1397b40d383 100644 --- a/docker/test/integration/mysql_js_client/Dockerfile +++ b/docker/test/integration/mysql_js_client/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-mysql-js-client . +# docker build -t clickhouse/mysql-js-client . # MySQL JavaScript client docker container FROM node:8 diff --git a/docker/test/integration/mysql_php_client/Dockerfile b/docker/test/integration/mysql_php_client/Dockerfile index e2ceb62f44f8..0fb77bf8ffb7 100644 --- a/docker/test/integration/mysql_php_client/Dockerfile +++ b/docker/test/integration/mysql_php_client/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-mysql-php-client . +# docker build -t clickhouse/mysql-php-client . # MySQL PHP client docker container FROM php:7.3-cli diff --git a/docker/test/integration/postgresql_java_client/Dockerfile b/docker/test/integration/postgresql_java_client/Dockerfile index eab236c95906..f5484028ec9e 100644 --- a/docker/test/integration/postgresql_java_client/Dockerfile +++ b/docker/test/integration/postgresql_java_client/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-postgresql-java-client . +# docker build -t clickhouse/postgresql-java-client . # PostgreSQL Java client docker container FROM ubuntu:18.04 diff --git a/docker/test/integration/resolver/Dockerfile b/docker/test/integration/resolver/Dockerfile index b0efb4b46d5e..01b9b7776142 100644 --- a/docker/test/integration/resolver/Dockerfile +++ b/docker/test/integration/resolver/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-python-bottle . +# docker build -t clickhouse/python-bottle . # Helper docker container to run python bottle apps FROM python:3 diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index cb69a00fc63d..5b77248427b2 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-integration-tests-runner . +# docker build -t clickhouse/integration-tests-runner . FROM ubuntu:20.04 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/test/integration/runner/compose/docker_compose_jdbc_bridge.yml b/docker/test/integration/runner/compose/docker_compose_jdbc_bridge.yml index a65ef629df69..b3686adc21c4 100644 --- a/docker/test/integration/runner/compose/docker_compose_jdbc_bridge.yml +++ b/docker/test/integration/runner/compose/docker_compose_jdbc_bridge.yml @@ -1,7 +1,7 @@ version: '2.3' services: bridge1: - image: yandex/clickhouse-jdbc-bridge + image: clickhouse/jdbc-bridge command: | /bin/bash -c 'cat << EOF > config/datasources/self.json { diff --git a/docker/test/integration/runner/compose/docker_compose_keeper.yml b/docker/test/integration/runner/compose/docker_compose_keeper.yml index e11a13e6eabf..134ffbff1f74 100644 --- a/docker/test/integration/runner/compose/docker_compose_keeper.yml +++ b/docker/test/integration/runner/compose/docker_compose_keeper.yml @@ -1,7 +1,7 @@ version: '2.3' services: zoo1: - image: ${image:-yandex/clickhouse-integration-test} + image: ${image:-clickhouse/integration-test} restart: always user: ${user:-} volumes: @@ -31,7 +31,7 @@ services: - inet6 - rotate zoo2: - image: ${image:-yandex/clickhouse-integration-test} + image: ${image:-clickhouse/integration-test} restart: always user: ${user:-} volumes: @@ -61,7 +61,7 @@ services: - inet6 - rotate zoo3: - image: ${image:-yandex/clickhouse-integration-test} + image: ${image:-clickhouse/integration-test} restart: always user: ${user:-} volumes: diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml index b09e75a85157..88be3e45085f 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml @@ -4,7 +4,7 @@ services: kerberizedhdfs1: cap_add: - DAC_READ_SEARCH - image: yandex/clickhouse-kerberized-hadoop:16621 + image: clickhouse/kerberized-hadoop hostname: kerberizedhdfs1 restart: always volumes: @@ -22,7 +22,7 @@ services: entrypoint: /etc/bootstrap.sh -d hdfskerberos: - image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} + image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} hostname: hdfskerberos volumes: - ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml index 081b90c4f278..d57e4e4d5bea 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml @@ -50,7 +50,7 @@ services: - label:disable kafka_kerberos: - image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} + image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} hostname: kafka_kerberos volumes: - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab diff --git a/docker/test/integration/runner/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml index 33c656e83348..6e8c826b2346 100644 --- a/docker/test/integration/runner/compose/docker_compose_minio.yml +++ b/docker/test/integration/runner/compose/docker_compose_minio.yml @@ -21,14 +21,14 @@ services: # HTTP proxies for Minio. proxy1: - image: yandex/clickhouse-s3-proxy + image: clickhouse/s3-proxy expose: - "8080" # Redirect proxy port - "80" # Reverse proxy port - "443" # Reverse proxy port (secure) proxy2: - image: yandex/clickhouse-s3-proxy + image: clickhouse/s3-proxy expose: - "8080" - "80" @@ -36,7 +36,7 @@ services: # Empty container to run proxy resolver. resolver: - image: yandex/clickhouse-python-bottle + image: clickhouse/python-bottle expose: - "8080" tty: true diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_golang_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_golang_client.yml index a6a338eb6a86..56cc04105740 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_golang_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_golang_client.yml @@ -1,6 +1,6 @@ version: '2.3' services: golang1: - image: yandex/clickhouse-mysql-golang-client:${DOCKER_MYSQL_GOLANG_CLIENT_TAG:-latest} + image: clickhouse/mysql-golang-client:${DOCKER_MYSQL_GOLANG_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml index 21d927df82c8..eb5ffb01baa2 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml @@ -1,6 +1,6 @@ version: '2.3' services: java1: - image: yandex/clickhouse-mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG:-latest} + image: clickhouse/mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_js_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_js_client.yml index dbd85cf23822..90939449c5f3 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_js_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_js_client.yml @@ -1,6 +1,6 @@ version: '2.3' services: mysqljs1: - image: yandex/clickhouse-mysql-js-client:${DOCKER_MYSQL_JS_CLIENT_TAG:-latest} + image: clickhouse/mysql-js-client:${DOCKER_MYSQL_JS_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_php_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_php_client.yml index f24f5337a7ec..408b8ff089a9 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_php_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_php_client.yml @@ -1,6 +1,6 @@ version: '2.3' services: php1: - image: yandex/clickhouse-mysql-php-client:${DOCKER_MYSQL_PHP_CLIENT_TAG:-latest} + image: clickhouse/mysql-php-client:${DOCKER_MYSQL_PHP_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/integration/runner/compose/docker_compose_postgresql_java_client.yml b/docker/test/integration/runner/compose/docker_compose_postgresql_java_client.yml index 38191f1bdd6e..904bfffdfd5b 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgresql_java_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgresql_java_client.yml @@ -1,6 +1,6 @@ version: '2.2' services: java: - image: yandex/clickhouse-postgresql-java-client:${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:-latest} + image: clickhouse/postgresql-java-client:${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/integration/s3_proxy/Dockerfile b/docker/test/integration/s3_proxy/Dockerfile index d8b1754fa71d..5858218e4e4c 100644 --- a/docker/test/integration/s3_proxy/Dockerfile +++ b/docker/test/integration/s3_proxy/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-s3-proxy . +# docker build -t clickhouse/s3-proxy . FROM nginx:alpine COPY run.sh /run.sh diff --git a/docker/test/keeper-jepsen/Dockerfile b/docker/test/keeper-jepsen/Dockerfile index 1a62d5e793fc..5bb7f9433c2e 100644 --- a/docker/test/keeper-jepsen/Dockerfile +++ b/docker/test/keeper-jepsen/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-keeper-jepsen-test . -FROM yandex/clickhouse-test-base +# docker build -t clickhouse/keeper-jepsen-test . +FROM clickhouse/test-base ENV DEBIAN_FRONTEND=noninteractive ENV CLOJURE_VERSION=1.10.3.814 diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 1a61c4b274ae..88b66d42ecbc 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-performance-comparison . +# docker build -t clickhouse/performance-comparison . FROM ubuntu:18.04 ENV LANG=C.UTF-8 @@ -54,4 +54,4 @@ COPY * / # it gives '/bin/sh: 1: [bash,: not found' otherwise. CMD ["bash", "-c", "node=$((RANDOM % $(numactl --hardware | sed -n 's/^.*available:\\(.*\\)nodes.*$/\\1/p'))); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"] -# docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison +# docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/performance-comparison diff --git a/docker/test/performance-comparison/README.md b/docker/test/performance-comparison/README.md index 782644a81dd4..75213fad0779 100644 --- a/docker/test/performance-comparison/README.md +++ b/docker/test/performance-comparison/README.md @@ -116,7 +116,7 @@ pull requests (0 for master) manually. docker run --network=host --volume=$(pwd)/workspace:/workspace --volume=$(pwd)/output:/output [-e REF_PR={} -e REF_SHA={}] -e PR_TO_TEST={} -e SHA_TO_TEST={} - yandex/clickhouse-performance-comparison + clickhouse/performance-comparison ``` Then see the `report.html` in the `output` directory. diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 35e07748845d..513a1bf9a99e 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -1,6 +1,6 @@ -# docker build -t yandex/clickhouse-pvs-test . +# docker build -t clickhouse/pvs-test . -FROM yandex/clickhouse-binary-builder +FROM clickhouse/binary-builder RUN apt-get update --yes \ && apt-get install \ diff --git a/docker/test/split_build_smoke_test/Dockerfile b/docker/test/split_build_smoke_test/Dockerfile index 54a9eb17868c..3cc2f26a5076 100644 --- a/docker/test/split_build_smoke_test/Dockerfile +++ b/docker/test/split_build_smoke_test/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-split-build-smoke-test . -FROM yandex/clickhouse-binary-builder +# docker build -t clickhouse/split-build-smoke-test . +FROM clickhouse/binary-builder COPY run.sh /run.sh COPY process_split_build_smoke_test_result.py / diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile index 3a0e489d1a39..e73fd03fb6da 100644 --- a/docker/test/sqlancer/Dockerfile +++ b/docker/test/sqlancer/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-sqlancer-test . +# docker build -t clickhouse/sqlancer-test . FROM ubuntu:20.04 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 07aad75a2eae..c237a712f524 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-stateful-test . -FROM yandex/clickhouse-stateless-test +# docker build -t clickhouse/stateful-test . +FROM clickhouse/stateless-test RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 39c8a2e53580..7812e11962d3 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-stateless-test . -FROM yandex/clickhouse-test-base +# docker build -t clickhouse/stateless-test . +FROM clickhouse/test-base ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" diff --git a/docker/test/stateless_pytest/Dockerfile b/docker/test/stateless_pytest/Dockerfile index 947a70426d62..c1e47523f6d1 100644 --- a/docker/test/stateless_pytest/Dockerfile +++ b/docker/test/stateless_pytest/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-stateless-pytest . -FROM yandex/clickhouse-test-base +# docker build -t clickhouse/stateless-pytest . +FROM clickhouse/test-base RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ diff --git a/docker/test/stateless_unbundled/Dockerfile b/docker/test/stateless_unbundled/Dockerfile index 53857a90ac79..dfe441e08a62 100644 --- a/docker/test/stateless_unbundled/Dockerfile +++ b/docker/test/stateless_unbundled/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-stateless-unbundled-test . -FROM yandex/clickhouse-test-base +# docker build -t clickhouse/stateless-unbundled-test . +FROM clickhouse/test-base ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index e1df32ec3d72..3fe1b790d5a8 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-stress-test . -FROM yandex/clickhouse-stateful-test +# docker build -t clickhouse/stress-test . +FROM clickhouse/stateful-test RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/test/stress/README.md b/docker/test/stress/README.md index f747996fa2d5..b1519e7968d1 100644 --- a/docker/test/stress/README.md +++ b/docker/test/stress/README.md @@ -6,7 +6,7 @@ Usage: ``` $ ls $HOME/someclickhouse clickhouse-client_18.14.9_all.deb clickhouse-common-static_18.14.9_amd64.deb clickhouse-server_18.14.9_all.deb clickhouse-test_18.14.9_all.deb -$ docker run --volume=$HOME/someclickhouse:/package_folder --volume=$HOME/test_output:/test_output yandex/clickhouse-stress-test +$ docker run --volume=$HOME/someclickhouse:/package_folder --volume=$HOME/test_output:/test_output clickhouse/stress-test Selecting previously unselected package clickhouse-common-static. (Reading database ... 14442 files and directories currently installed.) ... diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index c0b3b0102cfe..33cdb9db57a9 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-style-test . +# docker build -t clickhouse/style-test . FROM ubuntu:20.04 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/test/test_runner.sh b/docker/test/test_runner.sh index cd6367b29646..0c99c8c2b323 100755 --- a/docker/test/test_runner.sh +++ b/docker/test/test_runner.sh @@ -49,7 +49,7 @@ fi # Build server image (optional) from local packages if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then - CLICKHOUSE_SERVER_IMAGE="yandex/clickhouse-server:local" + CLICKHOUSE_SERVER_IMAGE="clickhouse/server:local" if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then docker build --network=host \ diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index 81d431635b76..91d0eb844d9e 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-testflows-runner . +# docker build -t clickhouse/testflows-runner . FROM ubuntu:20.04 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/test/unit/Dockerfile b/docker/test/unit/Dockerfile index e111611eecd6..20d677733637 100644 --- a/docker/test/unit/Dockerfile +++ b/docker/test/unit/Dockerfile @@ -1,5 +1,5 @@ -# docker build -t yandex/clickhouse-unit-test . -FROM yandex/clickhouse-stateless-test +# docker build -t clickhouse/unit-test . +FROM clickhouse/stateless-test RUN apt-get install gdb diff --git a/tests/integration/README.md b/tests/integration/README.md index ed96eafdef8e..1b1e7f3e052b 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -66,7 +66,7 @@ For tests that use common docker compose files you may need to set up their path ### Running with runner script The only requirement is fresh configured docker and -docker pull yandex/clickhouse-integration-tests-runner +docker pull clickhouse/integration-tests-runner Notes: * If you want to run integration tests without `sudo` you have to add your user to docker group `sudo usermod -aG docker $USER`. [More information](https://docs.docker.com/install/linux/linux-postinstall/) about docker configuration. @@ -122,7 +122,7 @@ You can just open shell inside a container by overwritting the command: The main container used for integration tests lives in `docker/test/integration/Dockerfile`. Rebuild it with ``` cd docker/test/integration -docker build -t yandex/clickhouse-integration-test . +docker build -t clickhouse/integration-test . ``` The helper container used by the `runner` script is in `docker/test/integration/runner/Dockerfile`. diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index bf7549a83c44..f17eb84e5f32 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -207,11 +207,11 @@ def shuffle_test_groups(self): @staticmethod def get_images_names(): - return ["yandex/clickhouse-integration-tests-runner", "yandex/clickhouse-mysql-golang-client", - "yandex/clickhouse-mysql-java-client", "yandex/clickhouse-mysql-js-client", - "yandex/clickhouse-mysql-php-client", "yandex/clickhouse-postgresql-java-client", - "yandex/clickhouse-integration-test", "yandex/clickhouse-kerberos-kdc", - "yandex/clickhouse-integration-helper", ] + return ["clickhouse/integration-tests-runner", "clickhouse/mysql-golang-client", + "clickhouse/mysql-java-client", "clickhouse/mysql-js-client", + "clickhouse/mysql-php-client", "clickhouse/postgresql-java-client", + "clickhouse/integration-test", "clickhouse/kerberos-kdc", + "clickhouse/integration-helper", ] def _can_run_with(self, path, opt): @@ -343,7 +343,7 @@ def _get_runner_image_cmd(self, repo_path): image_cmd = '' if self._can_run_with(os.path.join(repo_path, "tests/integration", "runner"), '--docker-image-version'): for img in self.get_images_names(): - if img == "yandex/clickhouse-integration-tests-runner": + if img == "clickhouse/integration-tests-runner": runner_version = self.get_single_image_version() logging.info("Can run with custom docker image version %s", runner_version) image_cmd += ' --docker-image-version={} '.format(runner_version) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 3316c94abca0..7d36fa1ab8fb 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -521,7 +521,7 @@ def setup_keeper_cmd(self, instance, env_variables, docker_compose_yml_dir): binary_path = binary_path[:-len('-server')] env_variables['keeper_binary'] = binary_path - env_variables['image'] = "yandex/clickhouse-integration-test:" + self.docker_base_tag + env_variables['image'] = "clickhouse/integration-test:" + self.docker_base_tag env_variables['user'] = str(os.getuid()) env_variables['keeper_fs'] = 'bind' for i in range(1, 4): @@ -738,7 +738,7 @@ def add_instance(self, name, base_config_dir=None, main_configs=None, user_confi with_odbc_drivers=False, with_postgres=False, with_postgres_cluster=False, with_hdfs=False, with_kerberized_hdfs=False, with_mongo=False, with_mongo_secure=False, with_redis=False, with_minio=False, with_cassandra=False, with_jdbc_bridge=False, - hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", tag=None, + hostname=None, env_variables=None, image="clickhouse/integration-test", tag=None, stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None, zookeeper_docker_compose_path=None, minio_certs_dir=None, use_keeper=True, main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True): @@ -1757,7 +1757,7 @@ def __init__( clickhouse_start_command=CLICKHOUSE_START_COMMAND, main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True, hostname=None, env_variables=None, - image="yandex/clickhouse-integration-test", tag="latest", + image="clickhouse/integration-test", tag="latest", stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None): self.name = name diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 7d9906ae663a..ac3571a7cf14 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -195,21 +195,21 @@ def _ensure_container(self): print("Error removing network blocade container, will try again", str(ex)) time.sleep(i) - image = subprocess.check_output("docker images -q yandex/clickhouse-integration-helper 2>/dev/null", shell=True) + image = subprocess.check_output("docker images -q clickhouse/integration-helper 2>/dev/null", shell=True) if not image.strip(): print("No network image helper, will try download") # for some reason docker api may hang if image doesn't exist, so we download it # before running for i in range(5): try: - subprocess.check_call("docker pull yandex/clickhouse-integration-helper", shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL + subprocess.check_call("docker pull clickhouse/integration-helper", shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL break except: time.sleep(i) else: - raise Exception("Cannot pull yandex/clickhouse-integration-helper image") + raise Exception("Cannot pull clickhouse/integration-helper image") - self._container = self._docker_client.containers.run('yandex/clickhouse-integration-helper', + self._container = self._docker_client.containers.run('clickhouse/integration-helper', auto_remove=True, command=('sleep %s' % self.container_exit_timeout), detach=True, network_mode='host') diff --git a/tests/integration/runner b/tests/integration/runner index 2143d7ebf296..3ff982a9913a 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -19,7 +19,7 @@ CONFIG_DIR_IN_REPO = "programs/server" INTERGATION_DIR_IN_REPO = "tests/integration" SRC_DIR_IN_REPO = "src" -DIND_INTEGRATION_TESTS_IMAGE_NAME = "yandex/clickhouse-integration-tests-runner" +DIND_INTEGRATION_TESTS_IMAGE_NAME = "clickhouse/integration-tests-runner" def check_args_and_update_paths(args): if args.clickhouse_root: @@ -225,19 +225,19 @@ if __name__ == "__main__": if args.docker_compose_images_tags is not None: for img_tag in args.docker_compose_images_tags: [image, tag] = img_tag.split(":") - if image == "yandex/clickhouse-mysql-golang-client": + if image == "clickhouse/mysql-golang-client": env_tags += "-e {}={} ".format("DOCKER_MYSQL_GOLANG_CLIENT_TAG", tag) - elif image == "yandex/clickhouse-mysql-java-client": + elif image == "clickhouse/mysql-java-client": env_tags += "-e {}={} ".format("DOCKER_MYSQL_JAVA_CLIENT_TAG", tag) - elif image == "yandex/clickhouse-mysql-js-client": + elif image == "clickhouse/mysql-js-client": env_tags += "-e {}={} ".format("DOCKER_MYSQL_JS_CLIENT_TAG", tag) - elif image == "yandex/clickhouse-mysql-php-client": + elif image == "clickhouse/mysql-php-client": env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) - elif image == "yandex/clickhouse-postgresql-java-client": + elif image == "clickhouse/postgresql-java-client": env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) - elif image == "yandex/clickhouse-integration-test": + elif image == "clickhouse/integration-test": env_tags += "-e {}={} ".format("DOCKER_BASE_TAG", tag) - elif image == "yandex/clickhouse-kerberos-kdc": + elif image == "clickhouse/kerberos-kdc": env_tags += "-e {}={}".format("DOCKER_KERBEROS_KDC_TAG", tag) else: logging.info("Unknown image %s" % (image)) diff --git a/tests/testflows/aes_encryption/aes_encryption_env/clickhouse-service.yml b/tests/testflows/aes_encryption/aes_encryption_env/clickhouse-service.yml index 0789decf022f..0c9352dbc0b6 100644 --- a/tests/testflows/aes_encryption/aes_encryption_env/clickhouse-service.yml +++ b/tests/testflows/aes_encryption/aes_encryption_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" diff --git a/tests/testflows/datetime64_extended_range/datetime64_extended_range_env/clickhouse-service.yml b/tests/testflows/datetime64_extended_range/datetime64_extended_range_env/clickhouse-service.yml index 0789decf022f..0c9352dbc0b6 100644 --- a/tests/testflows/datetime64_extended_range/datetime64_extended_range_env/clickhouse-service.yml +++ b/tests/testflows/datetime64_extended_range/datetime64_extended_range_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" diff --git a/tests/testflows/example/example_env/clickhouse-service.yml b/tests/testflows/example/example_env/clickhouse-service.yml index 0789decf022f..0c9352dbc0b6 100644 --- a/tests/testflows/example/example_env/clickhouse-service.yml +++ b/tests/testflows/example/example_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" diff --git a/tests/testflows/extended_precision_data_types/extended-precision-data-type_env/clickhouse-service.yml b/tests/testflows/extended_precision_data_types/extended-precision-data-type_env/clickhouse-service.yml index fdd4a8057a90..afb31f77c94c 100644 --- a/tests/testflows/extended_precision_data_types/extended-precision-data-type_env/clickhouse-service.yml +++ b/tests/testflows/extended_precision_data_types/extended-precision-data-type_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" diff --git a/tests/testflows/kerberos/kerberos_env/clickhouse-service.yml b/tests/testflows/kerberos/kerberos_env/clickhouse-service.yml index 14736a264b86..9f30ca3039a3 100644 --- a/tests/testflows/kerberos/kerberos_env/clickhouse-service.yml +++ b/tests/testflows/kerberos/kerberos_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test:21454 + image: clickhouse/integration-test:21454 expose: - "9000" - "9009" diff --git a/tests/testflows/ldap/authentication/ldap_authentication_env/clickhouse-service.yml b/tests/testflows/ldap/authentication/ldap_authentication_env/clickhouse-service.yml index 0789decf022f..0c9352dbc0b6 100644 --- a/tests/testflows/ldap/authentication/ldap_authentication_env/clickhouse-service.yml +++ b/tests/testflows/ldap/authentication/ldap_authentication_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" diff --git a/tests/testflows/ldap/external_user_directory/ldap_external_user_directory_env/clickhouse-service.yml b/tests/testflows/ldap/external_user_directory/ldap_external_user_directory_env/clickhouse-service.yml index 0789decf022f..0c9352dbc0b6 100644 --- a/tests/testflows/ldap/external_user_directory/ldap_external_user_directory_env/clickhouse-service.yml +++ b/tests/testflows/ldap/external_user_directory/ldap_external_user_directory_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" diff --git a/tests/testflows/ldap/role_mapping/ldap_role_mapping_env/clickhouse-service.yml b/tests/testflows/ldap/role_mapping/ldap_role_mapping_env/clickhouse-service.yml index 0789decf022f..0c9352dbc0b6 100644 --- a/tests/testflows/ldap/role_mapping/ldap_role_mapping_env/clickhouse-service.yml +++ b/tests/testflows/ldap/role_mapping/ldap_role_mapping_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" diff --git a/tests/testflows/map_type/map_type_env/clickhouse-service.yml b/tests/testflows/map_type/map_type_env/clickhouse-service.yml index fdd4a8057a90..afb31f77c94c 100755 --- a/tests/testflows/map_type/map_type_env/clickhouse-service.yml +++ b/tests/testflows/map_type/map_type_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" diff --git a/tests/testflows/rbac/rbac_env/clickhouse-service.yml b/tests/testflows/rbac/rbac_env/clickhouse-service.yml index 2d79443dcbbd..ac52e3b83eb7 100755 --- a/tests/testflows/rbac/rbac_env/clickhouse-service.yml +++ b/tests/testflows/rbac/rbac_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" diff --git a/tests/testflows/runner b/tests/testflows/runner index 772a4d01a844..0208512762ce 100755 --- a/tests/testflows/runner +++ b/tests/testflows/runner @@ -14,7 +14,7 @@ DEFAULT_CLICKHOUSE_ROOT = os.path.abspath(os.path.join(CUR_FILE_DIR, "../../")) CURRENT_WORK_DIR = os.getcwd() CONTAINER_NAME = "clickhouse_testflows_tests" -DIND_TESTFLOWS_TESTS_IMAGE_NAME = "yandex/clickhouse-testflows-runner" +DIND_TESTFLOWS_TESTS_IMAGE_NAME = "clickhouse/testflows-runner" def check_args_and_update_paths(args): if not os.path.isabs(args.binary): diff --git a/tests/testflows/window_functions/window_functions_env/clickhouse-service.yml b/tests/testflows/window_functions/window_functions_env/clickhouse-service.yml index fdd4a8057a90..afb31f77c94c 100755 --- a/tests/testflows/window_functions/window_functions_env/clickhouse-service.yml +++ b/tests/testflows/window_functions/window_functions_env/clickhouse-service.yml @@ -2,7 +2,7 @@ version: '2.3' services: clickhouse: - image: yandex/clickhouse-integration-test + image: clickhouse/integration-test expose: - "9000" - "9009" From b0a4f2e0c995ea0ebeff8763f59f49888945ceb5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 23 Nov 2021 12:05:20 +0300 Subject: [PATCH 185/472] Fix ci runner (cherry picked from commit c2e59cb324f6e2a98dbddc56d7ed2d89b75d30d0) --- tests/integration/ci-runner.py | 83 +++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index f17eb84e5f32..25d09a8c4c53 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -16,9 +16,9 @@ NUM_WORKERS = 5 SLEEP_BETWEEN_RETRIES = 5 PARALLEL_GROUP_SIZE = 100 -CLICKHOUSE_BINARY_PATH = "/usr/bin/clickhouse" -CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH = "/usr/bin/clickhouse-odbc-bridge" -CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH = "/usr/bin/clickhouse-library-bridge" +CLICKHOUSE_BINARY_PATH = "usr/bin/clickhouse" +CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-odbc-bridge" +CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-library-bridge" TRIES_COUNT = 10 MAX_TIME_SECONDS = 3600 @@ -122,7 +122,7 @@ def get_test_times(output): def clear_ip_tables_and_restart_daemons(): - logging.info("Dump iptables after run %s", subprocess.check_output("iptables -L", shell=True)) + logging.info("Dump iptables after run %s", subprocess.check_output("sudo iptables -L", shell=True)) try: logging.info("Killing all alive docker containers") subprocess.check_output("timeout -s 9 10m docker kill $(docker ps -q)", shell=True) @@ -135,33 +135,35 @@ def clear_ip_tables_and_restart_daemons(): except subprocess.CalledProcessError as err: logging.info("docker rm excepted: " + str(err)) - try: - logging.info("Stopping docker daemon") - subprocess.check_output("service docker stop", shell=True) - except subprocess.CalledProcessError as err: - logging.info("docker stop excepted: " + str(err)) + # don't restart docker if it's disabled + if os.environ.get("CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER", '1') == '1': + try: + logging.info("Stopping docker daemon") + subprocess.check_output("service docker stop", shell=True) + except subprocess.CalledProcessError as err: + logging.info("docker stop excepted: " + str(err)) - try: - for i in range(200): - try: - logging.info("Restarting docker %s", i) - subprocess.check_output("service docker start", shell=True) - subprocess.check_output("docker ps", shell=True) - break - except subprocess.CalledProcessError as err: - time.sleep(0.5) - logging.info("Waiting docker to start, current %s", str(err)) - else: - raise Exception("Docker daemon doesn't responding") - except subprocess.CalledProcessError as err: - logging.info("Can't reload docker: " + str(err)) + try: + for i in range(200): + try: + logging.info("Restarting docker %s", i) + subprocess.check_output("service docker start", shell=True) + subprocess.check_output("docker ps", shell=True) + break + except subprocess.CalledProcessError as err: + time.sleep(0.5) + logging.info("Waiting docker to start, current %s", str(err)) + else: + raise Exception("Docker daemon doesn't responding") + except subprocess.CalledProcessError as err: + logging.info("Can't reload docker: " + str(err)) iptables_iter = 0 try: for i in range(1000): iptables_iter = i # when rules will be empty, it will raise exception - subprocess.check_output("iptables -D DOCKER-USER 1", shell=True) + subprocess.check_output("sudo iptables -D DOCKER-USER 1", shell=True) except subprocess.CalledProcessError as err: logging.info("All iptables rules cleared, " + str(iptables_iter) + "iterations, last error: " + str(err)) @@ -175,6 +177,9 @@ def __init__(self, result_path, params): self.image_versions = self.params['docker_images_with_versions'] self.shuffle_groups = self.params['shuffle_test_groups'] self.flaky_check = 'flaky check' in self.params['context_name'] + # if use_tmpfs is not set we assume it to be true, otherwise check + self.use_tmpfs = 'use_tmpfs' not in self.params or self.params['use_tmpfs'] + self.disable_net_host = 'disable_net_host' in self.params and self.params['disable_net_host'] self.start_time = time.time() self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) @@ -231,7 +236,7 @@ def _install_clickhouse(self, debs_path): log_name = "install_" + f + ".log" log_path = os.path.join(str(self.path()), log_name) with open(log_path, 'w') as log: - cmd = "dpkg -i {}".format(full_path) + cmd = "dpkg -x {} .".format(full_path) logging.info("Executing installation cmd %s", cmd) retcode = subprocess.Popen(cmd, shell=True, stderr=log, stdout=log).wait() if retcode == 0: @@ -248,26 +253,30 @@ def _install_clickhouse(self, debs_path): os.chmod(CLICKHOUSE_BINARY_PATH, 0o777) os.chmod(CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH, 0o777) os.chmod(CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH, 0o777) - result_path_bin = os.path.join(str(self.base_path()), "clickhouse") - result_path_odbc_bridge = os.path.join(str(self.base_path()), "clickhouse-odbc-bridge") - result_path_library_bridge = os.path.join(str(self.base_path()), "clickhouse-library-bridge") - shutil.copy(CLICKHOUSE_BINARY_PATH, result_path_bin) - shutil.copy(CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH, result_path_odbc_bridge) - shutil.copy(CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH, result_path_library_bridge) - return None, None + shutil.copy(CLICKHOUSE_BINARY_PATH, os.getenv("CLICKHOUSE_TESTS_SERVER_BIN_PATH")) + shutil.copy(CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH, os.getenv("CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH")) + shutil.copy(CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH, os.getenv("CLICKHOUSE_TESTS_LIBRARY_BRIDGE_BIN_PATH")) def _compress_logs(self, dir, relpaths, result_path): subprocess.check_call("tar czf {} -C {} {}".format(result_path, dir, ' '.join(relpaths)), shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL + def _get_runner_opts(self): + result = [] + if self.use_tmpfs: + result.append("--tmpfs") + if self.disable_net_host: + result.append("--disable-net-host") + return " ".join(result) + def _get_all_tests(self, repo_path): image_cmd = self._get_runner_image_cmd(repo_path) out_file = "all_tests.txt" out_file_full = "all_tests_full.txt" cmd = "cd {repo_path}/tests/integration && " \ - "timeout -s 9 1h ./runner --tmpfs {image_cmd} ' --setup-plan' " \ + "timeout -s 9 1h ./runner {runner_opts} {image_cmd} ' --setup-plan' " \ "| tee {out_file_full} | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' " \ "| grep -v 'SKIPPED' | sort -u > {out_file}".format( - repo_path=repo_path, image_cmd=image_cmd, out_file=out_file, out_file_full=out_file_full) + repo_path=repo_path, runner_opts=self._get_runner_opts(), image_cmd=image_cmd, out_file=out_file, out_file_full=out_file_full) logging.info("Getting all tests with cmd '%s'", cmd) subprocess.check_call(cmd, shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL @@ -437,8 +446,8 @@ def run_test_group(self, repo_path, test_group, tests_in_group, num_tries, num_w test_cmd = ' '.join([test for test in sorted(test_names)]) parallel_cmd = " --parallel {} ".format(num_workers) if num_workers > 0 else "" - cmd = "cd {}/tests/integration && timeout -s 9 1h ./runner --tmpfs {} -t {} {} '-rfEp --run-id={} --color=no --durations=0 {}' | tee {}".format( - repo_path, image_cmd, test_cmd, parallel_cmd, i, _get_deselect_option(self.should_skip_tests()), info_path) + cmd = "cd {}/tests/integration && timeout -s 9 1h ./runner {} {} -t {} {} '-rfEp --run-id={} --color=no --durations=0 {}' | tee {}".format( + repo_path, self._get_runner_opts(), image_cmd, test_cmd, parallel_cmd, i, _get_deselect_option(self.should_skip_tests()), info_path) log_basename = test_group_str + "_" + str(i) + ".log" log_path = os.path.join(repo_path, "tests/integration", log_basename) @@ -565,7 +574,7 @@ def run_impl(self, repo_path, build_path): return self.run_flaky_check(repo_path, build_path) self._install_clickhouse(build_path) - logging.info("Dump iptables before run %s", subprocess.check_output("iptables -L", shell=True)) + logging.info("Dump iptables before run %s", subprocess.check_output("sudo iptables -L", shell=True)) all_tests = self._get_all_tests(repo_path) parallel_skip_tests = self._get_parallel_tests_skip_list(repo_path) logging.info("Found %s tests first 3 %s", len(all_tests), ' '.join(all_tests[:3])) From 9cf7dd1e32189576e4287fd12858f18be15f2179 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Wed, 24 Nov 2021 11:22:27 +0000 Subject: [PATCH 186/472] add executor --- utils/local-engine/CMakeLists.txt | 18 ++++++---- .../Parser/SerializedPlanParser.cpp | 33 ++++++++++++++++--- .../Parser/SerializedPlanParser.h | 13 +++++++- .../io/kyligence/jni/engine/LocalEngine.java | 14 ++++++++ 4 files changed, 66 insertions(+), 12 deletions(-) diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index 1d6173d8ea0e..7aae21f49a6d 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -52,12 +52,12 @@ function(add_cxx_compile_options option) add_compile_options("$<$,CXX>:${option}>") endfunction() add_cxx_compile_options(-Wzero-as-null-pointer-constant) -#add_subdirectory(Substrait) -#add_subdirectory(Builder) -#add_headers_and_sources(builder Builder) -#add_headers_and_sources(parser Parser) +add_subdirectory(Substrait) +add_subdirectory(Builder) +add_headers_and_sources(builder Builder) +add_headers_and_sources(parser Parser) #include (../../cmake/find/parquet.cmake) -#include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${ClickHouse_SOURCE_DIR}/utils/local-engine) #add_executable (local_engine # local_engine.cpp @@ -94,7 +94,13 @@ set (CLICKHOUSE_SERVER_LINK set(LOCALENGINE_SHARED_LIB local_engine_jni) -add_library(${LOCALENGINE_SHARED_LIB} SHARED ${JNI_NATIVE_SOURCES}) +add_library(${LOCALENGINE_SHARED_LIB} SHARED + ${JNI_NATIVE_SOURCES} + ${builder_headers} + ${builder_sources} + ${parser_headers} + ${parser_sources} + ) set_property(TARGET ${LOCALENGINE_SHARED_LIB} PROPERTY POSITION_INDEPENDENT_CODE ON) #add_executable(${LOCALENGINE_SHARED_LIB} ${JNI_NATIVE_SOURCES}) add_dependencies(${LOCALENGINE_SHARED_LIB} local_engine_headers) diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index ec4c4e0aa2fd..c8205b0eb1db 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -1,10 +1,12 @@ #include "SerializedPlanParser.h" #include +#include #include -#include +#include #include #include #include +#include #include DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocalFile(const io::substrait::ReadRel& rel) @@ -138,8 +140,29 @@ void dbms::LocalExecutor::execute(DB::QueryPlanPtr query_plan) { QueryPlanOptimizationSettings optimization_settings{.optimize_plan = false}; auto query_pipeline = query_plan->buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); - auto executor = DB::PullingPipelineExecutor(*query_pipeline); - DB::Chunk chunk; - // TODO pull chunk + this->executor = std::make_unique(*query_pipeline); + this->header = query_plan->getCurrentDataStream().header; + this->ch_column_to_arrow_column = std::make_unique(header, "Arrow", false); +} +void dbms::LocalExecutor::writeChunkToArrowString(DB::Chunk &chunk, std::string & arrowChunk) +{ + std::shared_ptr arrow_table; + ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunk, chunk.getNumColumns()); + DB::WriteBufferFromString buf(arrowChunk); + auto out_stream = std::make_shared(buf); + arrow::Result> writer_status; + writer_status = arrow::ipc::MakeFileWriter(out_stream.get(), arrow_table->schema()); + if (!writer_status.ok()) + throw std::runtime_error("Error while opening a table writer"); + auto writer = *writer_status; + auto write_status = writer->WriteTable(*arrow_table, 1000000); + if (writer_status.ok()) + { + throw std::runtime_error("Error while writing a table"); + } + auto close_status = writer->Close(); + if (close_status.ok()) + { + throw std::runtime_error("Error while close a table"); + } } - diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 603a2cf926db..cde348ec7206 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -10,6 +10,8 @@ #include #include #include +#include +#include namespace DB { @@ -66,7 +68,16 @@ class SerializedPlanParser class LocalExecutor { - static void execute(QueryPlanPtr query_plan); +public: + void execute(QueryPlanPtr query_plan); + std::string next(); + bool hasNext(); + +private: + void writeChunkToArrowString(Chunk& chunk, std::string &arrowChunk); + std::unique_ptr executor; + Block header; + std::unique_ptr ch_column_to_arrow_column; }; } diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java index 1a545cafd9b1..1a81a8b10f42 100644 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java @@ -3,6 +3,8 @@ public class LocalEngine { public static native long test(int a, int b); + public static native void initEngineEnv(); + public static void main(String[] args) throws InterruptedException { System.out.println("start load library"); System.load("/home/kyligence/Documents/code/ClickHouse/cmake-build-debug/utils/local-engine/liblocal_engine_jnid.so"); @@ -10,4 +12,16 @@ public static void main(String[] args) throws InterruptedException { long result = test(1, 2); System.out.println(result); } + + private byte[] plan; + + public void execute() {} + + public boolean hasNext() { + return true; + } + + public byte[] next() { + return null; + } } From 5cf73d5c0375d1917aa8c589586235240a01f2ce Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 24 Nov 2021 19:05:16 +0000 Subject: [PATCH 187/472] Backport #31673 to 21.9: Remove outstandingly wrong code with LIVE VIEW --- src/IO/WriteBufferValidUTF8.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp index ecdf38eae340..270d825cff83 100644 --- a/src/IO/WriteBufferValidUTF8.cpp +++ b/src/IO/WriteBufferValidUTF8.cpp @@ -118,9 +118,6 @@ void WriteBufferValidUTF8::nextImpl() memory[i] = p[i]; working_buffer = Buffer(&memory[cnt], memory.data() + memory.size()); - - /// Propagate next() to the output buffer - output_buffer.next(); } From 662abe9068f1ba19eccf69b35288cbc8946192d5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 24 Nov 2021 22:00:00 +0000 Subject: [PATCH 188/472] Backport #31638 to 21.9: Fix rename dictionary --- src/Databases/DatabaseAtomic.cpp | 13 ++---- src/Databases/DatabaseOnDisk.cpp | 9 ++-- src/Storages/StorageDictionary.cpp | 36 ++++++++++++---- ...55_rename_move_materialized_view.reference | 23 +++++++--- .../01155_rename_move_materialized_view.sql | 42 ++++++++++++++----- .../01191_rename_dictionary.reference | 8 +++- .../0_stateless/01191_rename_dictionary.sql | 13 ++++-- 7 files changed, 103 insertions(+), 41 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index b69b74451c76..f6d6674553b8 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -226,15 +226,8 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_ StoragePtr table = getTableUnlocked(table_name, db_lock); - if (table->isDictionary() && !dictionary) - { - if (exchange) - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Use EXCHANGE DICTIONARIES for dictionaries and EXCHANGE TABLES for tables."); - else - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Use RENAME DICTIONARY for dictionaries and RENAME TABLE for tables."); - } + if (dictionary && !table->isDictionary()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Use RENAME/EXCHANGE TABLE (instead of RENAME/EXCHANGE DICTIONARY) for tables"); table->checkTableCanBeRenamed(); assert_can_move_mat_view(table); @@ -242,6 +235,8 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_ if (exchange) { other_table = other_db.getTableUnlocked(to_table_name, other_db_lock); + if (dictionary && !other_table->isDictionary()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Use RENAME/EXCHANGE TABLE (instead of RENAME/EXCHANGE DICTIONARY) for tables"); other_table->checkTableCanBeRenamed(); assert_can_move_mat_view(other_table); } diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 6facbbb03d94..f800ddcbe284 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -39,6 +39,7 @@ namespace ErrorCodes extern const int TABLE_ALREADY_EXISTS; extern const int EMPTY_LIST_OF_COLUMNS_PASSED; extern const int DATABASE_NOT_EMPTY; + extern const int INCORRECT_QUERY; } @@ -403,8 +404,6 @@ void DatabaseOnDisk::renameTable( { if (exchange) throw Exception("Tables can be exchanged only in Atomic databases", ErrorCodes::NOT_IMPLEMENTED); - if (dictionary) - throw Exception("Dictionaries can be renamed only in Atomic databases", ErrorCodes::NOT_IMPLEMENTED); bool from_ordinary_to_atomic = false; bool from_atomic_to_ordinary = false; @@ -425,8 +424,12 @@ void DatabaseOnDisk::renameTable( String table_metadata_path; ASTPtr attach_query; /// DatabaseLazy::detachTable may return nullptr even if table exists, so we need tryGetTable for this case. - StoragePtr table = tryGetTable(table_name, getContext()); + StoragePtr table = tryGetTable(table_name, local_context); + if (dictionary && table && !table->isDictionary()) + throw Exception("Use RENAME/EXCHANGE TABLE (instead of RENAME/EXCHANGE DICTIONARY) for tables", ErrorCodes::INCORRECT_QUERY); + detachTable(table_name); + UUID prev_uuid = UUIDHelpers::Nil; try { diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index e4d34c9a24bf..da90ea624c6d 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -217,19 +217,39 @@ void StorageDictionary::renameInMemory(const StorageID & new_table_id) auto old_table_id = getStorageID(); IStorage::renameInMemory(new_table_id); - bool has_configuration = false; + assert((location == Location::SameDatabaseAndNameAsDictionary) == (getConfiguration().get() != nullptr)); + if (location != Location::SameDatabaseAndNameAsDictionary) + return; + + /// It's DDL dictionary, need to update configuration and reload + + bool move_to_atomic = old_table_id.uuid == UUIDHelpers::Nil && new_table_id.uuid != UUIDHelpers::Nil; + bool move_to_ordinary = old_table_id.uuid != UUIDHelpers::Nil && new_table_id.uuid == UUIDHelpers::Nil; + assert(old_table_id.uuid == new_table_id.uuid || move_to_atomic || move_to_ordinary); + { std::lock_guard lock(dictionary_config_mutex); - if (configuration) - { - has_configuration = true; - configuration->setString("dictionary.database", new_table_id.database_name); - configuration->setString("dictionary.name", new_table_id.table_name); - } + configuration->setString("dictionary.database", new_table_id.database_name); + configuration->setString("dictionary.name", new_table_id.table_name); + if (move_to_atomic) + configuration->setString("dictionary.uuid", toString(new_table_id.uuid)); + else if (move_to_ordinary) + configuration->remove("dictionary.uuid"); } - if (has_configuration) + /// Dictionary is moving between databases of different engines or is renaming inside Ordinary database + bool recreate_dictionary = old_table_id.uuid == UUIDHelpers::Nil || new_table_id.uuid == UUIDHelpers::Nil; + + if (recreate_dictionary) + { + /// It's too hard to update both name and uuid, better to reload dictionary with new name + removeDictionaryConfigurationFromRepository(); + auto repository = std::make_unique(*this); + remove_repository_callback = getContext()->getExternalDictionariesLoader().addConfigRepository(std::move(repository)); + /// Dictionary will be reloaded lazily to avoid exceptions in the middle of renaming + } + else { const auto & external_dictionaries_loader = getContext()->getExternalDictionariesLoader(); auto result = external_dictionaries_loader.getLoadResult(old_table_id.getInternalDictionaryName()); diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.reference b/tests/queries/0_stateless/01155_rename_move_materialized_view.reference index 635fd16620d7..452ffe7bae44 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.reference +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.reference @@ -4,8 +4,13 @@ 1 mv1 before moving tablesmv1 1 mv2 before moving tablesmv2 1 src before moving tables +asdf +asdf +test_01155_ordinary dict1 00000000-0000-0000-0000-000000000000 +asdf ordinary: .inner.mv1 +dict dist dst mv1 @@ -14,6 +19,7 @@ src ordinary after rename: atomic after rename: .inner_id. +dict dist dst mv1 @@ -33,12 +39,14 @@ src 3 src after moving tables 3 src after renaming database 3 src before moving tables -.inner_id. -dist -dst -mv1 -mv2 -src +asdf +test_01155_ordinary .inner_id. +test_01155_ordinary dict +test_01155_ordinary dist +test_01155_ordinary dst +test_01155_ordinary mv1 +test_01155_ordinary mv2 +test_01155_ordinary src CREATE DATABASE test_01155_atomic\nENGINE = Atomic 4 .inner.mv1 after renaming databasemv1 4 .inner.mv1 after renaming tablesmv1 @@ -60,8 +68,11 @@ CREATE DATABASE test_01155_atomic\nENGINE = Atomic 4 src after renaming database 4 src after renaming tables 4 src before moving tables +asdf +test_01155_ordinary dict 00000000-0000-0000-0000-000000000000 test_01155_ordinary: .inner.mv1 +dict dist dst mv1 diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql index 882be2702d81..5863117dbc44 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql @@ -5,15 +5,27 @@ CREATE DATABASE test_01155_ordinary ENGINE=Ordinary; CREATE DATABASE test_01155_atomic ENGINE=Atomic; USE test_01155_ordinary; -CREATE TABLE src (s String) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY s; -CREATE MATERIALIZED VIEW mv1 (s String) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY s AS SELECT (*,).1 || 'mv1' as s FROM src; -CREATE TABLE dst (s String) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY s; -CREATE MATERIALIZED VIEW mv2 TO dst (s String) AS SELECT (*,).1 || 'mv2' as s FROM src; -CREATE TABLE dist (s String) Engine=Distributed(test_shard_localhost, test_01155_ordinary, src); -INSERT INTO dist VALUES ('before moving tables'); +CREATE TABLE src (s String, x String DEFAULT 'a') ENGINE=MergeTree() PARTITION BY tuple() ORDER BY s; +CREATE MATERIALIZED VIEW mv1 (s String, x String DEFAULT 'b') ENGINE=MergeTree() PARTITION BY tuple() ORDER BY s AS SELECT (*,).1 || 'mv1' as s FROM src; +CREATE TABLE dst (s String, x String DEFAULT 'c') ENGINE=MergeTree() PARTITION BY tuple() ORDER BY s; +CREATE MATERIALIZED VIEW mv2 TO dst (s String, x String DEFAULT 'd') AS SELECT (*,).1 || 'mv2' as s FROM src; +CREATE TABLE dist (s String, x String DEFAULT 'asdf') ENGINE=Distributed(test_shard_localhost, test_01155_ordinary, src); +INSERT INTO dist(s) VALUES ('before moving tables'); SYSTEM FLUSH DISTRIBUTED dist; + +CREATE DICTIONARY dict (s String, x String DEFAULT 'qwerty') PRIMARY KEY s +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dist' DB 'test_01155_ordinary')) +LIFETIME(MIN 0 MAX 2) LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 123)); + -- FIXME Cannot convert column `1` because it is non constant in source stream but must be constant in result SELECT materialize(1), substr(_table, 1, 10), s FROM merge('test_01155_ordinary', '') ORDER BY _table, s; +SELECT dictGet('test_01155_ordinary.dict', 'x', 'before moving tables'); + +RENAME DICTIONARY test_01155_ordinary.dict TO test_01155_ordinary.dict1; +SELECT dictGet('test_01155_ordinary.dict1', 'x', 'before moving tables'); +SELECT database, name, uuid FROM system.dictionaries WHERE database='test_01155_ordinary'; +RENAME TABLE test_01155_ordinary.dict1 TO test_01155_ordinary.dict; +SELECT dictGet('test_01155_ordinary.dict', 'x', 'before moving tables'); -- Move tables with materialized views from Ordinary to Atomic SELECT 'ordinary:'; @@ -22,7 +34,10 @@ RENAME TABLE test_01155_ordinary.mv1 TO test_01155_atomic.mv1; RENAME TABLE test_01155_ordinary.mv2 TO test_01155_atomic.mv2; RENAME TABLE test_01155_ordinary.dst TO test_01155_atomic.dst; RENAME TABLE test_01155_ordinary.src TO test_01155_atomic.src; +SET check_table_dependencies=0; RENAME TABLE test_01155_ordinary.dist TO test_01155_atomic.dist; +SET check_table_dependencies=1; +RENAME DICTIONARY test_01155_ordinary.dict TO test_01155_atomic.dict; SELECT 'ordinary after rename:'; SELECT substr(name, 1, 10) FROM system.tables WHERE database='test_01155_ordinary'; SELECT 'atomic after rename:'; @@ -30,17 +45,19 @@ SELECT substr(name, 1, 10) FROM system.tables WHERE database='test_01155_atomic' DROP DATABASE test_01155_ordinary; USE default; -INSERT INTO test_01155_atomic.src VALUES ('after moving tables'); -SELECT materialize(2), substr(_table, 1, 10), s FROM merge('test_01155_atomic', '') ORDER BY _table, s; -- { serverError 81 } +INSERT INTO test_01155_atomic.src(s) VALUES ('after moving tables'); +--SELECT materialize(2), substr(_table, 1, 10), s FROM merge('test_01155_atomic', '') ORDER BY _table, s; -- { serverError 81 } +--SELECT dictGet('test_01155_ordinary.dict', 'x', 'after moving tables'); -- { serverError 36 } RENAME DATABASE test_01155_atomic TO test_01155_ordinary; USE test_01155_ordinary; -INSERT INTO dist VALUES ('after renaming database'); +INSERT INTO dist(s) VALUES ('after renaming database'); SYSTEM FLUSH DISTRIBUTED dist; SELECT materialize(3), substr(_table, 1, 10), s FROM merge('test_01155_ordinary', '') ORDER BY _table, s; +SELECT dictGet('test_01155_ordinary.dict', 'x', 'after renaming database'); -SELECT substr(name, 1, 10) FROM system.tables WHERE database='test_01155_ordinary'; +SELECT database, substr(name, 1, 10) FROM system.tables WHERE database like 'test_01155_%'; -- Move tables back RENAME DATABASE test_01155_ordinary TO test_01155_atomic; @@ -53,10 +70,13 @@ RENAME TABLE test_01155_atomic.mv2 TO test_01155_ordinary.mv2; RENAME TABLE test_01155_atomic.dst TO test_01155_ordinary.dst; RENAME TABLE test_01155_atomic.src TO test_01155_ordinary.src; RENAME TABLE test_01155_atomic.dist TO test_01155_ordinary.dist; +RENAME DICTIONARY test_01155_atomic.dict TO test_01155_ordinary.dict; -INSERT INTO dist VALUES ('after renaming tables'); +INSERT INTO dist(s) VALUES ('after renaming tables'); SYSTEM FLUSH DISTRIBUTED dist; SELECT materialize(4), substr(_table, 1, 10), s FROM merge('test_01155_ordinary', '') ORDER BY _table, s; +SELECT dictGet('test_01155_ordinary.dict', 'x', 'after renaming tables'); +SELECT database, name, uuid FROM system.dictionaries WHERE database='test_01155_ordinary'; SELECT 'test_01155_ordinary:'; SHOW TABLES FROM test_01155_ordinary; SELECT 'test_01155_atomic:'; diff --git a/tests/queries/0_stateless/01191_rename_dictionary.reference b/tests/queries/0_stateless/01191_rename_dictionary.reference index 7b6ac0526888..8548815735cb 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.reference +++ b/tests/queries/0_stateless/01191_rename_dictionary.reference @@ -1,7 +1,13 @@ dict NOT_LOADED _ Memory dict Dictionary -dict1 NOT_LOADED +t Memory +t NOT_LOADED +_ Memory +dict Memory +t Dictionary +test +dict1 LOADED _ Memory dict1 Dictionary test diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index 264c527ccca9..74c937232f74 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -2,6 +2,7 @@ DROP DATABASE IF EXISTS test_01191; CREATE DATABASE test_01191 ENGINE=Atomic; CREATE TABLE test_01191._ (n UInt64, s String) ENGINE = Memory(); +CREATE TABLE test_01191.t (n UInt64, s String) ENGINE = Memory(); CREATE DICTIONARY test_01191.dict (n UInt64, s String) PRIMARY KEY n @@ -14,9 +15,15 @@ SELECT name, status FROM system.dictionaries WHERE database='test_01191'; SELECT name, engine FROM system.tables WHERE database='test_01191' ORDER BY name; RENAME DICTIONARY test_01191.table TO test_01191.table1; -- {serverError 60} -EXCHANGE TABLES test_01191.table AND test_01191.dict; -- {serverError 60} -EXCHANGE TABLES test_01191.dict AND test_01191.table; -- {serverError 80} -RENAME TABLE test_01191.dict TO test_01191.dict1; -- {serverError 80} +EXCHANGE DICTIONARIES test_01191._ AND test_01191.dict; -- {serverError 80} +EXCHANGE TABLES test_01191.t AND test_01191.dict; +SELECT name, status FROM system.dictionaries WHERE database='test_01191'; +SELECT name, engine FROM system.tables WHERE database='test_01191' ORDER BY name; +SELECT dictGet(test_01191.t, 's', toUInt64(42)); +EXCHANGE TABLES test_01191.dict AND test_01191.t; +RENAME DICTIONARY test_01191.t TO test_01191.dict1; -- {serverError 80} +DROP DICTIONARY test_01191.t; -- {serverError 80} +DROP TABLE test_01191.t; CREATE DATABASE dummy_db ENGINE=Atomic; RENAME DICTIONARY test_01191.dict TO dummy_db.dict1; From b815acc3dd128c3bf1142dfd937f83f4568ce50e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 25 Nov 2021 07:00:54 +0000 Subject: [PATCH 189/472] Backport #31742 to 21.9: Fix usage of `Buffer` table engine with type `Map` --- src/Columns/ColumnMap.cpp | 2 +- .../02124_buffer_with_type_map_long.reference | 1 + .../02124_buffer_with_type_map_long.sh | 35 +++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02124_buffer_with_type_map_long.reference create mode 100755 tests/queries/0_stateless/02124_buffer_with_type_map_long.sh diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 2e50d95826c6..2d01a8d13598 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -263,7 +263,7 @@ void ColumnMap::getExtremes(Field & min, Field & max) const void ColumnMap::forEachSubcolumn(ColumnCallback callback) { - nested->forEachSubcolumn(callback); + callback(nested); } bool ColumnMap::structureEquals(const IColumn & rhs) const diff --git a/tests/queries/0_stateless/02124_buffer_with_type_map_long.reference b/tests/queries/0_stateless/02124_buffer_with_type_map_long.reference new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/tests/queries/0_stateless/02124_buffer_with_type_map_long.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02124_buffer_with_type_map_long.sh b/tests/queries/0_stateless/02124_buffer_with_type_map_long.sh new file mode 100755 index 000000000000..1b2197ef9431 --- /dev/null +++ b/tests/queries/0_stateless/02124_buffer_with_type_map_long.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_buffer_map" +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_buffer_map(m1 Map(String, UInt64), m2 Map(String, String)) ENGINE = Buffer('', '', 1, 1, 1, 1000000000000, 1000000000000, 1000000000000, 1000000000000)" + +function insert1 +{ + while true; do + $CLICKHOUSE_CLIENT -q "INSERT INTO t_buffer_map SELECT (range(10), range(10)), (range(10), range(10)) from numbers(100)" + done +} + +function select1 +{ + while true; do + $CLICKHOUSE_CLIENT -q "SELECT * FROM t_buffer_map" 2> /dev/null > /dev/null + done +} + +TIMEOUT=10 + +export -f insert1 +export -f select1 + +timeout $TIMEOUT bash -c insert1 & +timeout $TIMEOUT bash -c select1 & + +wait + +echo "OK" +$CLICKHOUSE_CLIENT -q "DROP TABLE t_buffer_map" From 9272de0465028c9e59f71d16aca595a16afd7149 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Thu, 25 Nov 2021 07:40:55 +0000 Subject: [PATCH 190/472] first jni --- utils/local-engine/CMakeLists.txt | 6 +- .../Parser/SerializedPlanParser.cpp | 28 +++++++++- .../Parser/SerializedPlanParser.h | 6 +- .../java/io/kyligence/jni/engine/Chunk.java | 53 ------------------ .../io/kyligence/jni/engine/LocalEngine.java | 23 +++++--- utils/local-engine/local_engine_jni.cpp | 55 +++++++++++++++++-- 6 files changed, 98 insertions(+), 73 deletions(-) delete mode 100644 utils/local-engine/java/src/main/java/io/kyligence/jni/engine/Chunk.java diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index 7aae21f49a6d..8a12f8822e8d 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -23,7 +23,6 @@ set(JNI_NATIVE_SOURCES set(JAVA_MAIN_CLASSES ${JAVA_MAIN_CLASS_PATH}/io/kyligence/jni/engine/LocalEngine.java - ${JAVA_MAIN_CLASS_PATH}/io/kyligence/jni/engine/Chunk.java ) # Create the jni header file (from the java class). set(JNI_HEADERS_DIR ${PROJECT_SOURCE_DIR}/utils/local-engine/include) @@ -85,8 +84,9 @@ set (CLICKHOUSE_SERVER_LINK clickhouse_aggregate_functions clickhouse_common_io clickhouse_functions -# clickhouse_storages_system -# substrait + clickhouse_storages_system + arrow_shared + substrait ) #target_link_libraries(local_engine ${CLICKHOUSE_SERVER_LINK} ) diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index c8205b0eb1db..8894d78c6907 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -80,6 +80,12 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptr(); + plan_ptr->ParseFromString(plan); + return parse(std::move(plan_ptr)); +} DB::Chunk DB::BatchParquetFileSource::generate() { while (!finished_generate) @@ -144,11 +150,11 @@ void dbms::LocalExecutor::execute(DB::QueryPlanPtr query_plan) this->header = query_plan->getCurrentDataStream().header; this->ch_column_to_arrow_column = std::make_unique(header, "Arrow", false); } -void dbms::LocalExecutor::writeChunkToArrowString(DB::Chunk &chunk, std::string & arrowChunk) +void dbms::LocalExecutor::writeChunkToArrowString(DB::Chunk &chunk, std::string & arrow_chunk) { std::shared_ptr arrow_table; ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunk, chunk.getNumColumns()); - DB::WriteBufferFromString buf(arrowChunk); + DB::WriteBufferFromString buf(arrow_chunk); auto out_stream = std::make_shared(buf); arrow::Result> writer_status; writer_status = arrow::ipc::MakeFileWriter(out_stream.get(), arrow_table->schema()); @@ -166,3 +172,21 @@ void dbms::LocalExecutor::writeChunkToArrowString(DB::Chunk &chunk, std::string throw std::runtime_error("Error while close a table"); } } +bool dbms::LocalExecutor::hasNext() +{ + bool has_next; + if (this->current_chunk->empty()) + { + this->current_chunk = std::make_unique(); + has_next = this->executor->pull(*this->current_chunk); + } else { + has_next = true; + } + return has_next; +} +std::string dbms::LocalExecutor::next() +{ + std::string arrow_chunk; + writeChunkToArrowString(*this->current_chunk, arrow_chunk); + return arrow_chunk; +} diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index cde348ec7206..1211492b6c01 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -23,7 +23,7 @@ struct FilesInfo }; using FilesInfoPtr = std::shared_ptr; -class BatchParquetFileSource : DB::SourceWithProgress +class BatchParquetFileSource : public DB::SourceWithProgress { public: BatchParquetFileSource(FilesInfoPtr files, const Block & header); @@ -59,6 +59,7 @@ using namespace DB; class SerializedPlanParser { public: + static DB::QueryPlanPtr parse(std::string& plan); static DB::QueryPlanPtr parse(std::unique_ptr plan); static DB::BatchParquetFileSourcePtr parseReadRealWithLocalFile(const io::substrait::ReadRel& rel); static DB::Block parseNameStruct(const io::substrait::Type_NamedStruct& struct_); @@ -74,10 +75,11 @@ class LocalExecutor bool hasNext(); private: - void writeChunkToArrowString(Chunk& chunk, std::string &arrowChunk); + void writeChunkToArrowString(Chunk& chunk, std::string & arrow_chunk); std::unique_ptr executor; Block header; std::unique_ptr ch_column_to_arrow_column; + std::unique_ptr current_chunk; }; } diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/Chunk.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/Chunk.java deleted file mode 100644 index 4f9d5deae74b..000000000000 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/Chunk.java +++ /dev/null @@ -1,53 +0,0 @@ -package io.kyligence.jni.engine; - -public class Chunk { - public static class ColumnInfo { - // The data stored in these two allocations need to maintain binary compatible. We can - // directly pass this buffer to external components. - private long nulls; - private long data; - - // Only set if type is Array or Map. - private long lengthData; - private long offsetData; - - public ColumnInfo(long nulls, long data, long lengthData, long offsetData) { - this.nulls = nulls; - this.data = data; - this.lengthData = lengthData; - this.offsetData = offsetData; - } - - public long getNulls() { - return nulls; - } - - public long getData() { - return data; - } - - public long getLengthData() { - return lengthData; - } - - public long getOffsetData() { - return offsetData; - } - } - - private final ColumnInfo[] columns; - private final long rowCount; - - public Chunk(ColumnInfo[] columns, long rowCount) { - this.columns = columns; - this.rowCount = rowCount; - } - - public ColumnInfo[] getColumns() { - return columns; - } - - public long getRowCount() { - return rowCount; - } -} diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java index 1a81a8b10f42..0a11b1f955fe 100644 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java @@ -1,6 +1,9 @@ package io.kyligence.jni.engine; -public class LocalEngine { +import java.io.Closeable; +import java.io.IOException; + +public class LocalEngine implements Closeable { public static native long test(int a, int b); public static native void initEngineEnv(); @@ -13,15 +16,19 @@ public static void main(String[] args) throws InterruptedException { System.out.println(result); } + private long nativeExecutor; private byte[] plan; - public void execute() {} - - public boolean hasNext() { - return true; + public LocalEngine(byte[] plan) { + this.plan = plan; } - public byte[] next() { - return null; - } + public native void execute(); + + public native boolean hasNext(); + + public native byte[] next(); + + @Override + public native void close() throws IOException; } diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index 8fad2fb446db..4d5c2b257262 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -21,19 +20,17 @@ #include #include #include -#include #include #include #include #include #include +#include #include -#include #include #include #include -//#include using namespace DB; using namespace rapidjson; @@ -260,4 +257,52 @@ JNIEXPORT jlong JNICALL Java_io_kyligence_jni_engine_LocalEngine_test std::cout << "run pipeline success." << std::endl; std::cout <GetObjectClass(obj); + jfieldID plan_field_id = env->GetFieldID(this_class, "plan", "[B"); + jobject plan_data = env->GetObjectField(obj, plan_field_id); + jbyteArray *plan = reinterpret_cast(&plan_data); + jsize plan_size = env->GetArrayLength(*plan); + jbyte *plan_address = env->GetByteArrayElements(*plan, nullptr); + std::string plan_string; + plan_string.assign(reinterpret_cast(plan_address), plan_size); + auto query_plan = dbms::SerializedPlanParser::parse(plan_string); + dbms::LocalExecutor* executor = new dbms::LocalExecutor(); + executor->execute(std::move(query_plan)); + + jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "L"); + env->SetLongField(obj, executor_field_id, reinterpret_cast(executor)); +} +jboolean Java_io_kyligence_jni_engine_LocalEngine_hasNext(JNIEnv *env, jobject obj) +{ + jclass this_class = env->GetObjectClass(obj); + jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "L"); + jlong executor_address = env->GetLongField(obj, executor_field_id); + dbms::LocalExecutor* executor = reinterpret_cast(executor_address); + return executor->hasNext(); +} +jbyteArray Java_io_kyligence_jni_engine_LocalEngine_next(JNIEnv *env, jobject obj) +{ + jclass this_class = env->GetObjectClass(obj); + jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "L"); + jlong executor_address = env->GetLongField(obj, executor_field_id); + dbms::LocalExecutor* executor = reinterpret_cast(executor_address); + std::string arrow_batch = executor->next(); + jbyteArray result = env->NewByteArray(arrow_batch.size()); + env->SetByteArrayRegion(result, 0, arrow_batch.size(), reinterpret_cast(arrow_batch.data())); + return result; +} +void Java_io_kyligence_jni_engine_LocalEngine_close(JNIEnv *env, jobject obj) +{ + jclass this_class = env->GetObjectClass(obj); + jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "L"); + jlong executor_address = env->GetLongField(obj, executor_field_id); + dbms::LocalExecutor* executor = reinterpret_cast(executor_address); + delete executor; +} From 0614d685a713a65550a6ae973034859d762a87f7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 25 Nov 2021 11:19:22 +0300 Subject: [PATCH 191/472] Trying to fix pr info for backport (cherry picked from commit 811e0be749bba7239f3c56474910db4026060181) --- tests/ci/pr_info.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 1a09646b01c4..3df2b0909ef4 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -102,7 +102,11 @@ def __init__(self, github_event, need_orgs=False, need_changed_files=False): else: self.changed_files = set([]) else: - diff_url = pull_request['diff_url'] + if 'pr-backport' in self.labels: + diff_url = f"https://github.com/{os.getenv('GITHUB_REPOSITORY')}/compare/master...{self.head_ref}.diff" + else: + diff_url = pull_request['diff_url'] + diff = urllib.request.urlopen(diff_url) diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) self.changed_files = { f.path for f in diff_object } From f50b856a75540f7c8b6c4490d2443b0d765903cd Mon Sep 17 00:00:00 2001 From: tavplubix Date: Thu, 25 Nov 2021 12:09:07 +0300 Subject: [PATCH 192/472] Update 01155_rename_move_materialized_view.sql --- .../0_stateless/01155_rename_move_materialized_view.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql index 5863117dbc44..e4607f8879b3 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql @@ -34,9 +34,9 @@ RENAME TABLE test_01155_ordinary.mv1 TO test_01155_atomic.mv1; RENAME TABLE test_01155_ordinary.mv2 TO test_01155_atomic.mv2; RENAME TABLE test_01155_ordinary.dst TO test_01155_atomic.dst; RENAME TABLE test_01155_ordinary.src TO test_01155_atomic.src; -SET check_table_dependencies=0; + RENAME TABLE test_01155_ordinary.dist TO test_01155_atomic.dist; -SET check_table_dependencies=1; + RENAME DICTIONARY test_01155_ordinary.dict TO test_01155_atomic.dict; SELECT 'ordinary after rename:'; SELECT substr(name, 1, 10) FROM system.tables WHERE database='test_01155_ordinary'; From e1106ae9da8a03f66973f7b47519958fd5ecb146 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 25 Nov 2021 13:06:06 +0000 Subject: [PATCH 193/472] Backport #31697 to 21.9: Fix parsing of domain data types --- src/Processors/Formats/IRowInputFormat.cpp | 4 +++- tests/queries/0_stateless/00418_input_format_allow_errors.sh | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 52e64a9d90d7..a72e8b9ae577 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes extern const int INCORRECT_NUMBER_OF_COLUMNS; extern const int ARGUMENT_OUT_OF_BOUND; extern const int INCORRECT_DATA; + extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; } @@ -36,7 +37,8 @@ bool isParseError(int code) || code == ErrorCodes::CANNOT_READ_ALL_DATA || code == ErrorCodes::TOO_LARGE_STRING_SIZE || code == ErrorCodes::ARGUMENT_OUT_OF_BOUND /// For Decimals - || code == ErrorCodes::INCORRECT_DATA; /// For some ReadHelpers + || code == ErrorCodes::INCORRECT_DATA /// For some ReadHelpers + || code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; } IRowInputFormat::IRowInputFormat(Block header, ReadBuffer & in_, Params params_) diff --git a/tests/queries/0_stateless/00418_input_format_allow_errors.sh b/tests/queries/0_stateless/00418_input_format_allow_errors.sh index b27c6f3fe294..4f0ee77a85d5 100755 --- a/tests/queries/0_stateless/00418_input_format_allow_errors.sh +++ b/tests/queries/0_stateless/00418_input_format_allow_errors.sh @@ -26,3 +26,5 @@ echo -ne 'x=1\ts=TSKV\nx=minus2\ts=trash1\ns=trash2\tx=-3\ns=TSKV Ok\tx=4\ns=tra $CLICKHOUSE_CLIENT --query="SELECT * FROM formats_test ORDER BY x, s" $CLICKHOUSE_CLIENT --query="DROP TABLE formats_test" + +echo '::' | $CLICKHOUSE_LOCAL --structure 'i IPv4' --query='SELECT * FROM table' --input_format_allow_errors_num=1 From 6d9369d78815c95869d87c5056678e2ca0467ae5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 25 Nov 2021 16:06:41 +0000 Subject: [PATCH 194/472] Backport #31736 to 21.9: Fix race in JSONEachRowWithProgress output format --- ...JSONEachRowWithProgressRowOutputFormat.cpp | 39 ++++++++++++++++--- .../JSONEachRowWithProgressRowOutputFormat.h | 10 +++++ ...2124_json_each_row_with_progress.reference | 1 + .../02124_json_each_row_with_progress.sh | 11 ++++++ 4 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02124_json_each_row_with_progress.reference create mode 100755 tests/queries/0_stateless/02124_json_each_row_with_progress.sh diff --git a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp index 4612ce99f053..c63014939d84 100644 --- a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp @@ -1,15 +1,16 @@ #include -#include +#include #include #include - namespace DB { void JSONEachRowWithProgressRowOutputFormat::writeRowStartDelimiter() { + if (has_progress) + writeProgress(); writeCString("{\"row\":{", out); } @@ -22,11 +23,39 @@ void JSONEachRowWithProgressRowOutputFormat::writeRowEndDelimiter() void JSONEachRowWithProgressRowOutputFormat::onProgress(const Progress & value) { progress.incrementPiecewiseAtomically(value); - writeCString("{\"progress\":", out); - progress.writeJSON(out); - writeCString("}\n", out); + String progress_line; + WriteBufferFromString ostr(progress_line); + writeCString("{\"progress\":", ostr); + progress.writeJSON(ostr); + writeCString("}\n", ostr); + ostr.finalize(); + std::lock_guard lock(progress_lines_mutex); + progress_lines.emplace_back(std::move(progress_line)); + has_progress = true; } +void JSONEachRowWithProgressRowOutputFormat::flush() +{ + if (has_progress) + writeProgress(); + IOutputFormat::flush(); +} + +void JSONEachRowWithProgressRowOutputFormat::writeSuffix() +{ + if (has_progress) + writeProgress(); + JSONEachRowRowOutputFormat::writeSuffix(); +} + +void JSONEachRowWithProgressRowOutputFormat::writeProgress() +{ + std::lock_guard lock(progress_lines_mutex); + for (const auto & progress_line : progress_lines) + writeString(progress_line, out); + progress_lines.clear(); + has_progress = false; +} void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory) { diff --git a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.h b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.h index 3062d6641998..020da4bf7456 100644 --- a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { @@ -11,10 +12,19 @@ class JSONEachRowWithProgressRowOutputFormat : public JSONEachRowRowOutputFormat void writeRowStartDelimiter() override; void writeRowEndDelimiter() override; + void writeSuffix() override; void onProgress(const Progress & value) override; + void flush() override; + + void writeProgress(); private: Progress progress; + std::vector progress_lines; + std::mutex progress_lines_mutex; + /// To not lock mutex and check progress_lines every row, + /// we will use atomic flag that progress_lines is not empty. + std::atomic_bool has_progress = false; }; } diff --git a/tests/queries/0_stateless/02124_json_each_row_with_progress.reference b/tests/queries/0_stateless/02124_json_each_row_with_progress.reference new file mode 100644 index 000000000000..87766d889a3b --- /dev/null +++ b/tests/queries/0_stateless/02124_json_each_row_with_progress.reference @@ -0,0 +1 @@ +200000 diff --git a/tests/queries/0_stateless/02124_json_each_row_with_progress.sh b/tests/queries/0_stateless/02124_json_each_row_with_progress.sh new file mode 100755 index 000000000000..9319f76dfcd7 --- /dev/null +++ b/tests/queries/0_stateless/02124_json_each_row_with_progress.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS ${CLICKHOUSE_URL} -d "drop table if exists test_progress" +${CLICKHOUSE_CURL} -sS ${CLICKHOUSE_URL} -d "create table test_progress (x UInt64, y UInt64, d Date, a Array(UInt64), s String) engine=MergeTree() order by x" +${CLICKHOUSE_CURL} -sS ${CLICKHOUSE_URL} -d "insert into test_progress select number as x, number + 1 as y, toDate(number) as d, range(number % 10) as a, repeat(toString(number), 10) as s from numbers(200000)" +${CLICKHOUSE_CURL} -sS ${CLICKHOUSE_URL} -d "SELECT * from test_progress FORMAT JSONEachRowWithProgress" | grep -v --text "progress" | wc -l +${CLICKHOUSE_CURL} -sS ${CLICKHOUSE_URL} -d "drop table test_progress"; From 5e2f59f6981c9c9f4fca3e34e843f3d0d1bcbcfe Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 25 Nov 2021 19:00:41 +0000 Subject: [PATCH 195/472] Backport #31723 to 21.9: Fix 'there are no such cluster here' --- src/Interpreters/Context.cpp | 15 +++++++-------- src/Interpreters/DDLTask.cpp | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 108c2ef7712c..395b69f952d3 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1824,21 +1824,20 @@ std::optional Context::getTCPPortSecure() const std::shared_ptr Context::getCluster(const std::string & cluster_name) const { - auto res = getClusters()->getCluster(cluster_name); - if (res) + if (auto res = tryGetCluster(cluster_name)) return res; - if (!cluster_name.empty()) - res = tryGetReplicatedDatabaseCluster(cluster_name); - if (res) - return res; - throw Exception("Requested cluster '" + cluster_name + "' not found", ErrorCodes::BAD_GET); } std::shared_ptr Context::tryGetCluster(const std::string & cluster_name) const { - return getClusters()->getCluster(cluster_name); + auto res = getClusters()->getCluster(cluster_name); + if (res) + return res; + if (!cluster_name.empty()) + res = tryGetReplicatedDatabaseCluster(cluster_name); + return res; } diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 0391e76e7634..561c299e8c8b 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -201,7 +201,7 @@ void DDLTask::setClusterInfo(ContextPtr context, Poco::Logger * log) if (!cluster) throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, - "DDL task {} contains current host {} in cluster {}, but there are no such cluster here.", + "DDL task {} contains current host {} in cluster {}, but there is no such cluster here.", entry_name, host_id.readableString(), cluster_name); /// Try to find host from task host list in cluster From eda0a66be9c25ecba3663b537087ab70e1af3770 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 25 Nov 2021 17:45:35 +0300 Subject: [PATCH 196/472] Merge pull request #31782 from ClickHouse/use_git_version_number_for_builds Use version from git describe in builds (cherry picked from commit 8890dc675e8aca37d9ab2dc37e8097e43e5a610a) --- tests/ci/build_check.py | 11 +++++++++-- tests/ci/version_helper.py | 16 +++++++++++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 1ba5589965c9..35737e5fa713 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -119,8 +119,15 @@ def build_clickhouse(packager_cmd, logs_path): image_version = docker_image.version version = get_version_from_repo(repo_path) - version.tweak_update() - update_version_local(repo_path, pr_info.sha, version) + logging.info("Got version from repo %s", version.get_version_string()) + + version_type = 'testing' + if 'release' in pr_info.labels or 'release-lts' in pr_info.labels: + version_type = 'stable' + + update_version_local(repo_path, pr_info.sha, version, version_type) + + logging.info("Updated local files with version") build_name = build_config_to_string(build_config) logging.info("Build short name %s", build_name) diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index dd3845eae660..e207dac4671e 100644 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -66,13 +66,27 @@ def _get_version_from_line(line): _, ver_with_bracket = line.strip().split(' ') return ver_with_bracket[:-1] +def get_tweak_from_git_describe(repo_path): + # something like v21.12.1.8816-testing-358-g81942b8128 + # or v21.11.4.14-stable-31-gd6aab025e0 + output = subprocess.check_output(f"cd {repo_path} && git describe --long", shell=True).decode('utf-8') + commits_number = int(output.split('-')[-2]) + # for testing releases we have to also add fourth number of + # the previous tag + if 'testing' in output: + previous_version = output.split('-')[0] + previous_version_commits = int(previous_version.split('.')[3]) + commits_number += previous_version_commits + + return commits_number + def get_version_from_repo(repo_path): path_to_file = os.path.join(repo_path, FILE_WITH_VERSION_PATH) major = 0 minor = 0 patch = 0 - tweak = 0 + tweak = get_tweak_from_git_describe(repo_path) version_revision = 0 with open(path_to_file, 'r') as ver_file: for line in ver_file: From 4936a8a575adb681b79f6c82ee970e595178e749 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Fri, 26 Nov 2021 10:15:09 +0000 Subject: [PATCH 197/472] test success --- .../Builder/SerializedPlanBuilder.cpp | 12 + utils/local-engine/CMakeLists.txt | 24 +- .../Parser/SerializedPlanParser.cpp | 21 +- .../Parser/SerializedPlanParser.h | 1 + utils/local-engine/Substrait/CMakeLists.txt | 2 +- utils/local-engine/Substrait/expression.proto | 226 ++++++++++++++++++ utils/local-engine/Substrait/extensions.proto | 70 ++++++ utils/local-engine/Substrait/function.proto | 152 ++++++++++++ .../Substrait/parameterized_types.proto | 116 +++++++++ utils/local-engine/Substrait/plan.proto | 20 ++ utils/local-engine/Substrait/relations.proto | 175 ++++++++++++++ utils/local-engine/Substrait/selection.proto | 114 +++++++++ utils/local-engine/Substrait/type.proto | 181 ++++++++++++++ .../Substrait/type_expressions.proto | 138 +++++++++++ utils/local-engine/java/pom.xml | 14 +- .../io/kyligence/jni/engine/LocalEngine.java | 4 +- .../kyligence/jni/engine/LocalEngineTest.java | 31 +++ utils/local-engine/local_engine_jni.cpp | 10 +- utils/local-engine/tests/CMakeLists.txt | 36 +++ utils/local-engine/tests/data/iris.parquet | Bin 0 -> 5012 bytes .../local-engine/tests/gtest_local_engine.cpp | 47 ++++ utils/local-engine/tests/testConfig.h.in | 1 + 22 files changed, 1369 insertions(+), 26 deletions(-) create mode 100644 utils/local-engine/Substrait/expression.proto create mode 100644 utils/local-engine/Substrait/extensions.proto create mode 100644 utils/local-engine/Substrait/function.proto create mode 100644 utils/local-engine/Substrait/parameterized_types.proto create mode 100644 utils/local-engine/Substrait/plan.proto create mode 100644 utils/local-engine/Substrait/relations.proto create mode 100644 utils/local-engine/Substrait/selection.proto create mode 100644 utils/local-engine/Substrait/type.proto create mode 100644 utils/local-engine/Substrait/type_expressions.proto create mode 100644 utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java create mode 100644 utils/local-engine/tests/CMakeLists.txt create mode 100644 utils/local-engine/tests/data/iris.parquet create mode 100644 utils/local-engine/tests/gtest_local_engine.cpp create mode 100644 utils/local-engine/tests/testConfig.h.in diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp index 742fa8548159..f8778496d426 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.cpp +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -38,6 +38,18 @@ std::unique_ptr SerializedSchemaBuilder::build( t->mutable_string()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } + else if (type == "FP32") + { + auto *t = type_struct->mutable_types()->Add(); + t->mutable_fp32()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + } + else if (type == "FP64") + { + auto *t = type_struct->mutable_types()->Add(); + t->mutable_fp64()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + } else { throw "doesn't support type "+ type; } diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index 8a12f8822e8d..ae4a5e4da186 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -46,6 +46,16 @@ else () message("generating headers to ${JNI_HEADERS_DIR}") endif() +set (CLICKHOUSE_SERVER_LINK + PRIVATE + # dbms + clickhouse_aggregate_functions + clickhouse_common_io + clickhouse_functions + clickhouse_storages_system + arrow_shared + substrait + ) function(add_cxx_compile_options option) add_compile_options("$<$,CXX>:${option}>") @@ -78,16 +88,6 @@ include_directories( ) -set (CLICKHOUSE_SERVER_LINK - -# dbms - clickhouse_aggregate_functions - clickhouse_common_io - clickhouse_functions - clickhouse_storages_system - arrow_shared - substrait - ) #target_link_libraries(local_engine ${CLICKHOUSE_SERVER_LINK} ) #create_javah() @@ -105,3 +105,7 @@ set_property(TARGET ${LOCALENGINE_SHARED_LIB} PROPERTY POSITION_INDEPENDENT_CODE #add_executable(${LOCALENGINE_SHARED_LIB} ${JNI_NATIVE_SOURCES}) add_dependencies(${LOCALENGINE_SHARED_LIB} local_engine_headers) target_link_libraries(${LOCALENGINE_SHARED_LIB} ${CLICKHOUSE_SERVER_LINK} ) + +if (ENABLE_TESTS) + add_subdirectory(tests) +endif () diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 8894d78c6907..92599f0ab3bc 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -53,6 +53,14 @@ DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type& { return factory.get("String"); } + else if (type.has_fp32()) + { + return factory.get("Float32"); + } + else if (type.has_fp64()) + { + return factory.get("Float64"); + } else { throw std::runtime_error("doesn't support type " + type.DebugString()); @@ -61,7 +69,7 @@ DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type& DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptr plan) { auto query_plan = std::make_unique(); - if (plan->relations().Capacity() == 1) + if (plan->relations_size() == 1) { auto rel = plan->relations().at(0); if (rel.has_read()) { @@ -76,7 +84,7 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptrbuildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); + this->query_pipeline = query_plan->buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); this->executor = std::make_unique(*query_pipeline); this->header = query_plan->getCurrentDataStream().header; this->ch_column_to_arrow_column = std::make_unique(header, "Arrow", false); @@ -162,12 +170,12 @@ void dbms::LocalExecutor::writeChunkToArrowString(DB::Chunk &chunk, std::string throw std::runtime_error("Error while opening a table writer"); auto writer = *writer_status; auto write_status = writer->WriteTable(*arrow_table, 1000000); - if (writer_status.ok()) + if (!write_status.ok()) { throw std::runtime_error("Error while writing a table"); } auto close_status = writer->Close(); - if (close_status.ok()) + if (!close_status.ok()) { throw std::runtime_error("Error while close a table"); } @@ -175,7 +183,7 @@ void dbms::LocalExecutor::writeChunkToArrowString(DB::Chunk &chunk, std::string bool dbms::LocalExecutor::hasNext() { bool has_next; - if (this->current_chunk->empty()) + if (!this->current_chunk || this->current_chunk->empty()) { this->current_chunk = std::make_unique(); has_next = this->executor->pull(*this->current_chunk); @@ -188,5 +196,6 @@ std::string dbms::LocalExecutor::next() { std::string arrow_chunk; writeChunkToArrowString(*this->current_chunk, arrow_chunk); + this->current_chunk.reset(); return arrow_chunk; } diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 1211492b6c01..4bac707088d9 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -76,6 +76,7 @@ class LocalExecutor private: void writeChunkToArrowString(Chunk& chunk, std::string & arrow_chunk); + QueryPipelinePtr query_pipeline; std::unique_ptr executor; Block header; std::unique_ptr ch_column_to_arrow_column; diff --git a/utils/local-engine/Substrait/CMakeLists.txt b/utils/local-engine/Substrait/CMakeLists.txt index cd49e02fe041..69fe6d8d85f2 100644 --- a/utils/local-engine/Substrait/CMakeLists.txt +++ b/utils/local-engine/Substrait/CMakeLists.txt @@ -1,5 +1,5 @@ set(protobuf_generate_PROTOC_OUT_DIR "${ClickHouse_SOURCE_DIR}/utils/local-engine/Substrait") -file(GLOB PROTOBUF_DEFINITION_FILES "/home/kyligence/Documents/code/substrait/binary/*.proto") +file(GLOB PROTOBUF_DEFINITION_FILES "${ClickHouse_SOURCE_DIR}/utils/local-engine/Substrait/*.proto") include_directories(${Protobuf_INCLUDE_DIRS}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) PROTOBUF_GENERATE_CPP(SUBSTRAIT_SRCS SUBSTRAIT_HEADERS ${PROTOBUF_DEFINITION_FILES}) diff --git a/utils/local-engine/Substrait/expression.proto b/utils/local-engine/Substrait/expression.proto new file mode 100644 index 000000000000..2744991a9b82 --- /dev/null +++ b/utils/local-engine/Substrait/expression.proto @@ -0,0 +1,226 @@ +syntax = "proto3"; + +package io.substrait; + +import "type.proto"; +import "selection.proto"; +import "extensions.proto"; + +option java_multiple_files = true; + +message Expression { + oneof rex_type { + Literal literal = 1; + FieldReference selection = 2; + ScalarFunction scalar_function = 3; + WindowFunction window_function = 5; + IfThen if_then = 6; + SwitchExpression switch_expression = 7; + SingularOrList singular_or_list = 8; + MultiOrList multi_or_list = 9; + Enum enum = 10; + } + + message Enum { + oneof enum_kind { + string specified = 1; + Empty unspecified = 2; + } + + message Empty {} + } + + message Literal { + oneof literal_type { + bool boolean = 1; + int32 i8 = 2; + int32 i16 = 3; + int32 i32 = 5; + int64 i64 = 7; + float fp32 = 10; + double fp64 = 11; + string string = 12; + bytes binary = 13; + fixed64 timestamp = 14; + fixed32 date = 16; + uint64 time = 17; + IntervalYearToMonth interval_year_to_month = 19; + IntervalDayToSecond interval_day_to_second = 20; + string fixed_char = 21; + string var_char = 22; + bytes fixed_binary = 23; + bytes decimal = 24; + Struct struct = 25; + Map map = 26; + fixed64 timestamp_tz = 27; + bytes uuid = 28; + Type null = 29; // a typed null literal + List list = 30; + } + + message Map { + message KeyValue { + Literal key = 1; + Literal value = 2; + } + + repeated KeyValue key_values = 1; + } + + message IntervalYearToMonth { + int32 years = 1; + int32 months = 2; + } + + message IntervalDayToSecond { + int32 days = 1; + int32 seconds = 2; + } + + message Struct { + // A possibly heterogeneously typed list of literals + repeated Literal fields = 1; + } + + message List { + // A homogeneously typed list of literals + repeated Literal values = 1; + } + } + + message ScalarFunction { + Extensions.FunctionId id = 1; + repeated Expression args = 2; + Type output_type = 3; + } + + message AggregateFunction { + Extensions.FunctionId id = 1; + repeated Expression args = 2; + repeated SortField sorts = 3; + AggregationPhase phase = 4; + Type output_type = 5; + } + + enum AggregationPhase { + UNKNOWN = 0; + INITIAL_TO_INTERMEDIATE = 1; + INTERMEDIATE_TO_INTERMEDIATE = 2; + INITIAL_TO_RESULT = 3; + INTERMEDIATE_TO_RESULT = 4; + } + + + message WindowFunction { + Extensions.FunctionId id = 1; + repeated Expression partitions = 2; + repeated SortField sorts = 3; + Bound upper_bound = 4; + Bound lower_bound = 5; + AggregationPhase phase = 6; + Type output_type = 7; + repeated Expression args = 8; + + message Bound { + + message Preceding { + int64 offset = 1; + } + + message Following { + int64 offset = 1; + } + + message CurrentRow {} + + message Unbounded {} + + oneof kind { + Preceding preceding = 1; + Following following = 2; + CurrentRow current_row = 3; + Unbounded unbounded = 4; + } + + } + + } + + message SortField { + Expression expr = 1; + + oneof sort_kind { + SortType formal = 2; + Extensions.FunctionId comparison_function = 3; + } + enum SortType { + UNKNOWN = 0; + ASC_NULLS_FIRST = 1; + ASC_NULLS_LAST = 2; + DESC_NULLS_FIRST = 3; + DESC_NULLS_LAST = 4; + CLUSTERED = 5; + } + + } + + message IfThen { + + repeated IfClause ifs = 1; + Expression else = 2; + + message IfClause { + Expression if = 1; + Expression then = 2; + } + + } + + message SwitchExpression { + repeated IfValue ifs = 1; + Expression else = 2; + + message IfValue { + Expression if = 1; + Expression then = 2; + } + } + + message SingularOrList { + Expression value = 1; + repeated Expression options = 2; + } + + message MultiOrList { + repeated Expression value = 1; + repeated Record options = 2; + + message Record { + repeated Expression fields = 1; + } + + } + + message EmbeddedFunction { + repeated Expression arguments = 1; + Type output_type = 2; + oneof kind { + PythonPickleFunction python_pickle_function = 3; + WebAssemblyFunction web_assembly_function = 4; + } + + message PythonPickleFunction { + bytes function = 1; + repeated string prerequisite = 2; + } + + message WebAssemblyFunction { + bytes script = 1; + repeated string prerequisite = 2; + } + } +} + + + + diff --git a/utils/local-engine/Substrait/extensions.proto b/utils/local-engine/Substrait/extensions.proto new file mode 100644 index 000000000000..8c5793c04578 --- /dev/null +++ b/utils/local-engine/Substrait/extensions.proto @@ -0,0 +1,70 @@ +syntax = "proto3"; + +package io.substrait; + +option java_multiple_files = true; + + +message Extensions { + + + message Extension { + + // unique that describes a particular source for (and type of) extensions. + ExtensionId extension_id = 1; + + oneof extension_type { + // git uri for extension types information + TypeExtension type_extension = 2; + FunctionExtension function_extension = 3; + } + + message TypeExtension { + string git_uri = 1; + } + + message FunctionExtension { + string git_uri = 1; + } + + } + + message Mapping { + + oneof mapping_type { + TypeMapping type_mapping = 1; + FunctionMapping function_mapping = 2; + } + + message TypeMapping { + TypeId type_id = 1; + ExtensionId extension_id = 2; + string name = 3; + } + + message FunctionMapping { + FunctionId function_id = 1; + ExtensionId extension_id = 2; + string name = 3; + uint32 index = 4; + repeated Option options = 5; + message Option { + string key = 1; + string value = 2; + } + } + } + + message ExtensionId { + uint32 id = 1; + } + + message FunctionId { + uint64 id = 1; + } + + message TypeId { + uint64 id = 1; + } +} + diff --git a/utils/local-engine/Substrait/function.proto b/utils/local-engine/Substrait/function.proto new file mode 100644 index 000000000000..7e2142687946 --- /dev/null +++ b/utils/local-engine/Substrait/function.proto @@ -0,0 +1,152 @@ +syntax = "proto3"; + +package io.substrait; + +import "type.proto"; +import "parameterized_types.proto"; +import "type_expressions.proto"; +import "extensions.proto"; + +option java_multiple_files = true; + + +// List of function signatures available. +message FunctionSignature { + + message FinalArgVariadic { + // the minimum number of arguments allowed for the list of final arguments (inclusive). + int64 min_args = 1; + + // the maximum number of arguments allowed for the list of final arguments (exclusive) + int64 max_args = 2; + + // the type of parameterized type consistency + ParameterConsistency consistency = 3; + + enum ParameterConsistency { + UNKNOWN = 0; + + // All argument must be the same concrete type. + CONSISTENT = 1; + + // Each argument can be any possible concrete type afforded by the bounds of any parameter defined in + // the arguments specification. + INCONSISTENT = 2; + } + } + + message FinalArgNormal {} + + message Scalar { + Extensions.FunctionId id = 1; + repeated Argument arguments = 2; + repeated string name = 3; + Description description = 4; + + bool deterministic = 7; + bool session_dependent = 8; + + DerivationExpression output_type = 9; + + oneof final_variable_behavior { + FinalArgVariadic variadic = 10; + FinalArgNormal normal = 11; + } + + repeated Implementation implementations = 12; + } + + message Aggregate { + Extensions.FunctionId id = 1; + repeated Argument arguments = 2; + repeated string name = 3; + Description description = 4; + + bool deterministic = 7; + bool session_dependent = 8; + + DerivationExpression output_type = 9; + + oneof final_variable_behavior { + FinalArgVariadic variadic = 10; + FinalArgNormal normal = 11; + } + + bool ordered = 14; + uint64 max_set = 12; + Type intermediate_type = 13; + + repeated Implementation implementations = 15; + } + + message Window { + Extensions.FunctionId id = 1; + repeated Argument arguments = 2; + repeated string name = 3; + Description description = 4; + + bool deterministic = 7; + bool session_dependent = 8; + + DerivationExpression intermediate_type = 9; + DerivationExpression output_type = 10; + oneof final_variable_behavior { + FinalArgVariadic variadic = 16; + FinalArgNormal normal = 17; + } + bool ordered = 11; + uint64 max_set = 12; + WindowType window_type = 14; + repeated Implementation implementations = 15; + + enum WindowType { + UNKNOWN = 0; + STREAMING = 1; + PARTITION = 2; + } + } + + message Description { + string language = 1; + string body = 2; + } + + message Implementation { + + Type type = 1; + string uri = 2; + + enum Type { + UNKNOWN = 0; + WEB_ASSEMBLY = 1; + TRINO_JAR = 2; + } + } + + message Argument { + string name = 1; + + oneof argument_kind { + ValueArgument value = 2; + TypeArgument type = 3; + EnumArgument enum = 4; + } + + message ValueArgument { + ParameterizedType type = 1; + bool constant = 2; + } + + message TypeArgument { + ParameterizedType type = 1; + } + + message EnumArgument { + repeated string options = 1; + bool optional = 2; + } + + } + +} + diff --git a/utils/local-engine/Substrait/parameterized_types.proto b/utils/local-engine/Substrait/parameterized_types.proto new file mode 100644 index 000000000000..ffcd9f933228 --- /dev/null +++ b/utils/local-engine/Substrait/parameterized_types.proto @@ -0,0 +1,116 @@ +syntax = "proto3"; +package io.substrait; + +import "type.proto"; +import "extensions.proto"; + +option java_multiple_files = true; + +message ParameterizedType { + + oneof kind { + Type.Boolean bool = 1; + Type.I8 i8 = 2; + Type.I16 i16 = 3; + Type.I32 i32 = 5; + Type.I64 i64 = 7; + Type.FP32 fp32 = 10; + Type.FP64 fp64 = 11; + Type.String string = 12; + Type.Binary binary = 13; + Type.Timestamp timestamp = 14; + Type.Date date = 16; + Type.Time time = 17; + Type.IntervalYear interval_year = 19; + Type.IntervalDay interval_day = 20; + Type.TimestampTZ timestamp_tz = 29; + Type.UUID uuid = 32; + + ParameterizedFixedChar fixed_char = 21; + ParameterizedVarChar varchar = 22; + ParameterizedFixedBinary fixed_binary = 23; + ParameterizedDecimal decimal = 24; + + ParameterizedStruct struct = 25; + ParameterizedList list = 27; + ParameterizedMap map = 28; + + Extensions.TypeId user_defined = 31; + + TypeParameter type_parameter = 33; + + } + + message TypeParameter { + string name = 1; + repeated ParameterizedType bounds = 2; + } + + message IntegerParameter { + string name = 1; + NullableInteger range_start_inclusive = 2; + NullableInteger range_end_exclusive = 3; + } + + message NullableInteger { + int64 value = 1; + } + + message ParameterizedFixedChar { + IntegerOption length = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedVarChar { + IntegerOption length = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedFixedBinary { + IntegerOption length = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedDecimal { + IntegerOption scale = 1; + IntegerOption precision = 2; + Type.Variation variation = 3; + Type.Nullability nullability = 4; + } + + message ParameterizedStruct { + repeated ParameterizedType types = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedNamedStruct { + // list of names in dfs order + repeated string names = 1; + ParameterizedStruct struct = 2; + } + + message ParameterizedList { + ParameterizedType type = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedMap { + ParameterizedType key = 1; + ParameterizedType value = 2; + Type.Variation variation = 3; + Type.Nullability nullability = 4; + } + + message IntegerOption { + oneof integer_type { + int32 literal = 1; + IntegerParameter parameter = 2; + } + } +} + diff --git a/utils/local-engine/Substrait/plan.proto b/utils/local-engine/Substrait/plan.proto new file mode 100644 index 000000000000..3981583c9730 --- /dev/null +++ b/utils/local-engine/Substrait/plan.proto @@ -0,0 +1,20 @@ +syntax = "proto3"; + +package io.substrait; + +import "relations.proto"; +import "extensions.proto"; + +option java_multiple_files = true; + + +// Describe a set of operations to complete. +// For compactness sake, identifiers are normalized at the plan level. +message Plan { + + repeated Extensions.Extension extensions = 1; + repeated Extensions.Mapping mappings = 2; + repeated Rel relations = 3; + +} + diff --git a/utils/local-engine/Substrait/relations.proto b/utils/local-engine/Substrait/relations.proto new file mode 100644 index 000000000000..4ba08184244b --- /dev/null +++ b/utils/local-engine/Substrait/relations.proto @@ -0,0 +1,175 @@ +syntax = "proto3"; + +package io.substrait; + +import "type.proto"; +import "expression.proto"; +import "selection.proto"; + +option java_multiple_files = true; + +message RelCommon { + + oneof kind { + Direct direct = 1; + Emit emit = 2; + } + + Hint hint = 3; + RuntimeConstraint constraint = 4; + + message Direct {} + message Emit { + repeated int32 output_mapping = 1; + } + + message Hint { + repeated HintKeyValue hint_key_values = 1; + Stats stats = 2; + + message Stats { + double row_count = 1; + double record_size = 2; + } + + message HintKeyValue { + string key = 1; + bytes value = 2; + } + + } + + message RuntimeConstraint { + // TODO: nodes, cpu threads/%, memory, iops, etc. + } + +} + +message ReadRel { + RelCommon common = 1; + Type.NamedStruct base_schema = 2; + Expression filter = 3; + MaskExpression projection = 4; + + oneof read_type { + VirtualTable virtual_table = 5; + LocalFiles local_files = 6; + NamedTable named_table = 7; + } + + message NamedTable { + repeated string names = 1; + } + + + message VirtualTable { + repeated Expression.Literal.Struct values = 1; + } + + message LocalFiles { + + repeated FileOrFiles items = 1; + + message FileOrFiles { + oneof path_type { + string uri_path = 1; + string uri_path_glob = 2; + } + + Format format = 3; + + enum Format { + UNKNOWN = 0; + PARQUET = 1; + } + } + + } + +} + +message ProjectRel { + RelCommon common = 1; + Rel input = 2; + repeated Expression expressions = 3; +} + +message JoinRel { + RelCommon common = 1; + Rel left = 2; + Rel right = 3; + Expression expression = 4; + Expression post_join_filter = 5; + + enum JoinType { + UNKNOWN = 0; + INNER = 1; + OUTER = 2; + LEFT = 3; + RIGHT = 4; + } +} + +message FetchRel { + RelCommon common = 1; + Rel input = 2; + int64 offset = 3; + int64 count = 4; +} + +message AggregateRel { + RelCommon common = 1; + Rel input = 2; + repeated Grouping groupings = 3; + repeated Measure measures = 4; + Expression.AggregationPhase phase = 5; + + message Grouping { + repeated int32 input_fields = 1; + } + + message Measure { + Expression.AggregateFunction measure = 1; + } +} + +message SortRel { + RelCommon common = 1; + Rel input = 2; + repeated Expression.SortField sorts = 3; +} + +message FilterRel { + RelCommon common = 1; + Rel input = 2; + Expression condition = 3; +} + +message SetRel { + RelCommon common = 1; + repeated Rel inputs = 2; + SetOp op = 3; + + enum SetOp { + UNKNOWN = 0; + MINUS_PRIMARY = 1; + MINUS_MULTISET = 2; + INTERSECTION_PRIMARY = 3; + INTERSECTION_MULTISET = 4; + UNION_DISTINCT = 5; + UNION_ALL = 6; + } +} + +message Rel { + oneof RelType { + ReadRel read = 1; + FilterRel filter = 2; + FetchRel fetch = 3; + AggregateRel aggregate = 4; + SortRel sort = 5; + JoinRel join = 6; + ProjectRel project = 7; + SetRel set = 8; + } +} \ No newline at end of file diff --git a/utils/local-engine/Substrait/selection.proto b/utils/local-engine/Substrait/selection.proto new file mode 100644 index 000000000000..83bc8aad3ecd --- /dev/null +++ b/utils/local-engine/Substrait/selection.proto @@ -0,0 +1,114 @@ +syntax = "proto3"; + +package io.substrait; + +option java_multiple_files = true; + +message ReferenceSegment { + + oneof reference_type { + MapKey map_key = 1; + MapKeyExpression expression = 2; + StructField struct_field = 3; + ListElement list_element = 4; + ListRange list_range = 5; + } + + message MapKey { + string map_key = 1; + ReferenceSegment child = 2; + } + + message MapKeyExpression { + string map_key_expression = 1; + ReferenceSegment child = 2; + } + + message StructField { + int32 field = 1; + ReferenceSegment child = 2; + } + + message ListElement { + int32 offset = 1; + ReferenceSegment child = 2; + } + + message ListRange { + int32 start = 1; + int32 end = 2; + ReferenceSegment child = 3; + } +} + +message MaskExpression { + + StructSelect select = 1; + bool maintain_singular_struct = 2; + + message Select { + oneof type { + StructSelect struct = 1; + ListSelect list = 2; + MapSelect map = 3; + } + } + + message StructSelect { + repeated StructItem struct_items = 1; + } + + message StructItem { + int32 field = 1; + Select child = 2; + } + + message ListSelect { + + repeated ListSelectItem selection = 1; + Select child = 2; + + message ListSelectItem { + oneof type { + ListElement item = 1; + ListSlice slice = 2; + } + + message ListElement { + int32 field = 1; + } + + message ListSlice { + int32 start = 1; + int32 end = 2; + } + + } + } + + message MapSelect { + oneof select { + MapKey key = 1; + MapKeyExpression expression = 2; + } + + Select child = 3; + + message MapKey { + string map_key = 1; + } + + message MapKeyExpression { + string map_key_expression = 1; + } + } +} + +message FieldReference { + + oneof reference_type { + ReferenceSegment direct_reference = 1; + MaskExpression masked_reference = 2; + } + +} diff --git a/utils/local-engine/Substrait/type.proto b/utils/local-engine/Substrait/type.proto new file mode 100644 index 000000000000..62c8e7f64e5b --- /dev/null +++ b/utils/local-engine/Substrait/type.proto @@ -0,0 +1,181 @@ +syntax = "proto3"; +package io.substrait; + +import "extensions.proto"; + +option java_multiple_files = true; + +message Type { + + oneof kind { + Boolean bool = 1; + I8 i8 = 2; + I16 i16 = 3; + I32 i32 = 5; + I64 i64 = 7; + FP32 fp32 = 10; + FP64 fp64 = 11; + String string = 12; + Binary binary = 13; + Timestamp timestamp = 14; + Date date = 16; + Time time = 17; + IntervalYear interval_year = 19; + IntervalDay interval_day = 20; + TimestampTZ timestamp_tz = 29; + UUID uuid = 32; + + FixedChar fixed_char = 21; + VarChar varchar = 22; + FixedBinary fixed_binary = 23; + Decimal decimal = 24; + + Struct struct = 25; + List list = 27; + Map map = 28; + + Extensions.TypeId user_defined = 31; + } + + enum Nullability { + NULLABLE = 0; + REQUIRED = 1; + } + + message Boolean { + Variation variation = 1; + Nullability nullability = 2; + } + message I8 { + Variation variation = 1; + Nullability nullability = 2; + } + + message I16 { + Variation variation = 1; + Nullability nullability = 2; + } + + message I32 { + Variation variation = 1; + Nullability nullability = 2; + } + + message I64 { + Variation variation = 1; + Nullability nullability = 2; + } + + message FP32 { + Variation variation = 1; + Nullability nullability = 2; + } + + message FP64 { + Variation variation = 1; + Nullability nullability = 2; + } + + message String { + Variation variation = 1; + Nullability nullability = 2; + } + + message Binary { + Variation variation = 1; + Nullability nullability = 2; + } + + message Timestamp { + Variation variation = 1; + Nullability nullability = 2; + } + + message Date { + Variation variation = 1; + Nullability nullability = 2; + } + + message Time { + Variation variation = 1; + Nullability nullability = 2; + } + + message TimestampTZ { + Variation variation = 1; + Nullability nullability = 2; + } + + message IntervalYear { + Variation variation = 1; + Nullability nullability = 2; + } + + message IntervalDay { + Variation variation = 1; + Nullability nullability = 2; + } + + message UUID { + Variation variation = 1; + Nullability nullability = 2; + } + + // Start compound types. + message FixedChar { + int32 length = 1; + Variation variation = 2; + Nullability nullability = 3; + } + + message VarChar { + int32 length = 1; + Variation variation = 2; + Nullability nullability = 3; + } + + message FixedBinary { + int32 length = 1; + Variation variation = 2; + Nullability nullability = 3; + } + + message Decimal { + int32 scale = 1; + int32 precision = 2; + Variation variation = 3; + Nullability nullability = 4; + } + + message Struct { + repeated Type types = 1; + Variation variation = 2; + Nullability nullability = 3; + } + + message NamedStruct { + // list of names in dfs order + repeated string names = 1; + Struct struct = 2; + } + + message List { + Type type = 1; + Variation variation = 2; + Nullability nullability = 3; + } + + message Map { + Type key = 1; + Type value = 2; + Variation variation = 3; + Nullability nullability = 4; + } + + message Variation { + int32 organization = 1; + string name = 2; + } + +} + diff --git a/utils/local-engine/Substrait/type_expressions.proto b/utils/local-engine/Substrait/type_expressions.proto new file mode 100644 index 000000000000..87fe5d762fda --- /dev/null +++ b/utils/local-engine/Substrait/type_expressions.proto @@ -0,0 +1,138 @@ +syntax = "proto3"; +package io.substrait; + +import "type.proto"; +import "extensions.proto"; + +option java_multiple_files = true; + +message DerivationExpression { + + oneof kind { + Type.Boolean bool = 1; + Type.I8 i8 = 2; + Type.I16 i16 = 3; + Type.I32 i32 = 5; + Type.I64 i64 = 7; + Type.FP32 fp32 = 10; + Type.FP64 fp64 = 11; + Type.String string = 12; + Type.Binary binary = 13; + Type.Timestamp timestamp = 14; + Type.Date date = 16; + Type.Time time = 17; + Type.IntervalYear interval_year = 19; + Type.IntervalDay interval_day = 20; + Type.TimestampTZ timestamp_tz = 29; + Type.UUID uuid = 32; + + ExpressionFixedChar fixed_char = 21; + ExpressionVarChar varchar = 22; + ExpressionFixedBinary fixed_binary = 23; + ExpressionDecimal decimal = 24; + + ExpressionStruct struct = 25; + ExpressionList list = 27; + ExpressionMap map = 28; + + Extensions.TypeId user_defined = 31; + + string type_parameter_name = 33; + string integer_parameter_name = 34; + + int32 integer_literal = 35; + UnaryOp unary_op = 36; + BinaryOp binary_op = 37; + IfElse if_else = 38; + } + + message ExpressionFixedChar { + DerivationExpression length = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ExpressionVarChar { + DerivationExpression length = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ExpressionFixedBinary { + DerivationExpression length = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ExpressionDecimal { + DerivationExpression scale = 1; + DerivationExpression precision = 2; + Type.Variation variation = 3; + Type.Nullability nullability = 4; + } + + message ExpressionStruct { + repeated DerivationExpression types = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ExpressionNamedStruct { + repeated string names = 1; + ExpressionStruct struct = 2; + } + + message ExpressionList { + DerivationExpression type = 1; + Type.Variation variation = 2; + Type.Nullability nullability = 3; + } + + message ExpressionMap { + DerivationExpression key = 1; + DerivationExpression value = 2; + Type.Variation variation = 3; + Type.Nullability nullability = 4; + } + + + message IfElse { + DerivationExpression if_condition = 1; + DerivationExpression if_return = 2; + DerivationExpression else_return = 3; + } + + message UnaryOp { + OpType op_type = 1; + DerivationExpression arg = 2; + + enum OpType { + UNKNOWN = 0; + BOOLEAN_NOT = 1; + } + } + + message BinaryOp { + + OpType op_type = 1; + DerivationExpression arg1 = 2; + DerivationExpression arg2 = 3; + + enum OpType { + UNKNOWN = 0; + PLUS = 1; + MINUS = 2; + MULTIPLY = 3; + DIVIDE = 4; + MIN = 5; + MAX = 6; + GREATER_THAN = 7; + LESS_THAN = 8; + AND = 9; + OR = 10; + EQUALS = 11; + COVERS = 12; + } + } + +} diff --git a/utils/local-engine/java/pom.xml b/utils/local-engine/java/pom.xml index 7680287e1e09..b029f3cb8f48 100644 --- a/utils/local-engine/java/pom.xml +++ b/utils/local-engine/java/pom.xml @@ -26,7 +26,19 @@ 20.0 - + + + commons-io + commons-io + 2.11.0 + + + junit + junit + 4.13.2 + test + + diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java index 0a11b1f955fe..8d25288f8794 100644 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java @@ -16,8 +16,8 @@ public static void main(String[] args) throws InterruptedException { System.out.println(result); } - private long nativeExecutor; - private byte[] plan; + public long nativeExecutor; + public byte[] plan; public LocalEngine(byte[] plan) { this.plan = plan; diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java new file mode 100644 index 000000000000..594a3f6eb34e --- /dev/null +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -0,0 +1,31 @@ +package io.kyligence.jni.engine; + + +import org.apache.commons.io.IOUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.nio.charset.StandardCharsets; + +@RunWith(JUnit4.class) +public class LocalEngineTest { + + @Before + public void setup() { + System.load("/home/kyligence/Documents/code/ClickHouse/cmake-build-debug/utils/local-engine/liblocal_engine_jnid.so"); + + } + + @Test + public void testLocalEngine() throws Exception{ + String plan = IOUtils.resourceToString("/plan.txt", StandardCharsets.UTF_8); + LocalEngine localEngine = new LocalEngine(plan.getBytes(StandardCharsets.UTF_8)); + localEngine.execute(); + Assert.assertTrue(localEngine.hasNext()); + byte[] data = localEngine.next(); + Assert.assertEquals(6490, data.length); + } +} diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index 4d5c2b257262..0608b34f6c8b 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -1,10 +1,8 @@ #include "include/io_kyligence_jni_engine_LocalEngine.h" #include #include - #include #include -#include #include #include #include @@ -276,13 +274,13 @@ void Java_io_kyligence_jni_engine_LocalEngine_execute(JNIEnv *env, jobject obj) dbms::LocalExecutor* executor = new dbms::LocalExecutor(); executor->execute(std::move(query_plan)); - jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "L"); + jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "J"); env->SetLongField(obj, executor_field_id, reinterpret_cast(executor)); } jboolean Java_io_kyligence_jni_engine_LocalEngine_hasNext(JNIEnv *env, jobject obj) { jclass this_class = env->GetObjectClass(obj); - jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "L"); + jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "J"); jlong executor_address = env->GetLongField(obj, executor_field_id); dbms::LocalExecutor* executor = reinterpret_cast(executor_address); return executor->hasNext(); @@ -290,7 +288,7 @@ jboolean Java_io_kyligence_jni_engine_LocalEngine_hasNext(JNIEnv *env, jobject o jbyteArray Java_io_kyligence_jni_engine_LocalEngine_next(JNIEnv *env, jobject obj) { jclass this_class = env->GetObjectClass(obj); - jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "L"); + jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "J"); jlong executor_address = env->GetLongField(obj, executor_field_id); dbms::LocalExecutor* executor = reinterpret_cast(executor_address); std::string arrow_batch = executor->next(); @@ -301,7 +299,7 @@ jbyteArray Java_io_kyligence_jni_engine_LocalEngine_next(JNIEnv *env, jobject ob void Java_io_kyligence_jni_engine_LocalEngine_close(JNIEnv *env, jobject obj) { jclass this_class = env->GetObjectClass(obj); - jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "L"); + jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "J"); jlong executor_address = env->GetLongField(obj, executor_field_id); dbms::LocalExecutor* executor = reinterpret_cast(executor_address); delete executor; diff --git a/utils/local-engine/tests/CMakeLists.txt b/utils/local-engine/tests/CMakeLists.txt new file mode 100644 index 000000000000..6eb5b6291c19 --- /dev/null +++ b/utils/local-engine/tests/CMakeLists.txt @@ -0,0 +1,36 @@ +set(USE_INTERNAL_GTEST_LIBRARY 0) +enable_testing() +include(CTest) +include (${PROJECT_SOURCE_DIR}/cmake/find/gtest.cmake) +message(GTEST_INCLUDE_DIRS:${GTEST_INCLUDE_DIRS}) +include_directories(${GTEST_INCLUDE_DIRS}) +macro (grep_gtest_sources BASE_DIR DST_VAR) + # Cold match files that are not in tests/ directories + file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") +endmacro() +set(TEST_DATA_DIR "${ClickHouse_SOURCE_DIR}/utils/local-engine/tests") + +configure_file( + ${ClickHouse_SOURCE_DIR}/utils/local-engine/tests/testConfig.h.in + ${ClickHouse_SOURCE_DIR}/utils/local-engine/tests/testConfig.h +) +# attach all dbms gtest sources +grep_gtest_sources("${ClickHouse_SOURCE_DIR}/utils/local_engine/tests" local_engine_gtest_sources) +add_executable(unit_tests_local_engine ${local_engine_gtest_sources}) +target_compile_options(unit_tests_local_engine PRIVATE + -Wno-zero-as-null-pointer-constant + -Wno-covered-switch-default + -Wno-undef + -Wno-sign-compare + -Wno-used-but-marked-unused + -Wno-missing-noreturn + -Wno-gnu-zero-variadic-macro-arguments + ) +target_include_directories(unit_tests_local_engine + PRIVATE + ${GTEST_INCLUDE_DIRS}/include + ${builder_headers} + ${parser_headers} + ) +target_link_libraries(unit_tests_local_engine ${CLICKHOUSE_SERVER_LINK} ${LOCALENGINE_SHARED_LIB} ${GTEST_BOTH_LIBRARIES}) +add_check(unit_tests_local_engine) \ No newline at end of file diff --git a/utils/local-engine/tests/data/iris.parquet b/utils/local-engine/tests/data/iris.parquet new file mode 100644 index 0000000000000000000000000000000000000000..70da577573cd888f9efee3a953b18b402fef3ccc GIT binary patch literal 5012 zcmcgweQZtHbMC$8p7T4ub0da$uZ1PqizItV=3)C-vI(KnG8Cr>WefMa-x7qfnfv5p zf*_kXs#O_p;t)ls4Txn-w3_Rr2-M8bY87f_IHX}{)Kcl27&ELyi1 zF&s^CNI@V@aS{=XYAXqc;znYi<_Y%GdO)c;Ee)5%tp=!CDQJ=li3(8EHp47`!jdpF zJG|6fOcge2$KE%8!VoU=9mT~fO|uHi=A-SKyVgCOAl-jG%$`B-y=CdweOx;G&#~8) zpFi`VuzSmmk>e-NI@G^8Cv$v6D$SRvvrPP`ic^?S6GM~hIck*be2g&w&WghT6pfSs zqZ;5;!3RWz@ou1~0RY7itE4d`0C3L$6gB{bT|nlIpwYoh3#YT3Vjje&LFfW{s$h(j z^j5Zlbj!C<^6^Ea7wk)6s+9uLwUk6E`ZA&q8&q7FCd~BDvPzn#5l_nuT0%whDizHk z*-2C5b<^?dPYu1l_1d3?u8vstMh59qZ#x%Zdv48Qj2^b>C$ef`as z?w)%|Nd5G4Z^t(Oj@#S*aOU3e&B1eL{#-aebK!y<8IQCqfv?QQ@+&DxYyKiP9#b1X zV;0e`b>IM2G~a%60Tz398jnkkpbArbo~iGRpxWLS{fDLNqW|jvR-ZV0=0^&%3w$@n!3W z`huqW{Fdmg;l1zVukp0)ddoPy(@SBp4JeLJaW~R7bYGCgZp_T*GmRoe(*i>sxud&AC3A(IJ6AOxB5-X=XuRvrN|Kb?)0i~n6hbC)n) z)fi(PS(gX4fNj{gblg&rMlWTk9%|Y6(zTR1Z%Fd%~FDk!d)3rogZF{BmdojM~ z+VYs!)B}96@7S~i-71``(iO&1nY!pMidSMQ<=$j0oS&EkxDU07smNq_avG;fb~-#c zIZ?zOO~od2sfi4hT06`g<~GQQs+%=BbBEQ|VQG6FX_dU!yU)L;TyM39l|RC*GttZQ z{4jjD?dAEMyd%N)@qB{sPDnC`W0}5)g|MfhcNBZdbNH1`k@Rk!FY>StdVENa8Fw(} z`F+FPF#54OEfzvf-_C$_a4O>R5*R5a-06x;?8-*cUNPqM&A7)W5`K$0 z=kD$wk625Vh~HM0b>>o$bYDI+;@$9s|jKY{z;y``yOVAwj??Z&nXg5Oe%`MS)pfJ4N0a;HPNh_%l=I6f@d ziuirzpx>JmoW0l|SYOJ;!Kc`Ce=V6`P9 z{<7_;)mIGqOL4&}9<2JZc6W4$0=}cQBu3JLIb8NX9rpWjp}wvfU+S)dd)p2M1A}qF zB^E2#q$AF*q|gℑJR_A_0f(TVkCL1-wbX!yLreI$^!D)aB{Mxl7}deM zh|hw&=nVV4RbSTZj+GeT{HYVaa<1V#t%q&cZ(GW(j4vN`iM27!Mly@JSvOu4K23-G zedb7~8|Ng%uapgD`jgSjK`F=KOc?i13DZ)JTSI~VnXtdBAm#VCyD-qbv*^x9+*^uS z+BY4wic^sk=AFCf8FyPe8C5wYVQb z1KxmXh+~w;1r~2!yu|Q; +#include +#include +#include +#include "testConfig.h" +#include + +TEST(TestSelect, ReadRel) +{ + dbms::SerializedSchemaBuilder schema_builder; + auto schema = schema_builder + .column("sepal_length", "FP64") + .column("sepal_width", "FP64") + .column("petal_length", "FP64") + .column("petal_width", "FP64") + .column("type", "I32") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto plan = plan_builder.files( TEST_DATA(/data/iris.parquet), std::move(schema)).build(); + + std::ofstream output; + output.open(TEST_DATA(/../java/src/test/resources/plan.txt), std::fstream::in | std::fstream::out | std::fstream::trunc); + // output << plan->SerializeAsString(); + plan->SerializeToOstream(&output); + output.flush(); + output.close(); + + ASSERT_TRUE(plan->relations(0).has_read()); + ASSERT_EQ(plan->relations_size(), 1); + auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + std::cout << "start execute" < Date: Fri, 26 Nov 2021 16:03:49 +0000 Subject: [PATCH 198/472] Backport #31804 to 21.9: Fix possible Assertion '!hasPendingData()' failed in TSKV format --- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 1 + .../02125_tskv_proper_names_reading.reference | 1 + .../0_stateless/02125_tskv_proper_names_reading.sh | 13 +++++++++++++ 3 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/02125_tskv_proper_names_reading.reference create mode 100755 tests/queries/0_stateless/02125_tskv_proper_names_reading.sh diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index ee6fce833583..8c8efb9c837d 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -52,6 +52,7 @@ static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp) if (next_pos == buf.buffer().end()) { tmp.append(buf.position(), next_pos - buf.position()); + buf.position() = buf.buffer().end(); buf.next(); continue; } diff --git a/tests/queries/0_stateless/02125_tskv_proper_names_reading.reference b/tests/queries/0_stateless/02125_tskv_proper_names_reading.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/02125_tskv_proper_names_reading.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh b/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh new file mode 100755 index 000000000000..49c895329d6f --- /dev/null +++ b/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +DATA_FILE=$USER_FILES_PATH/test_02125.data + +echo "number=1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02125.data', 'TSKV', 'number UInt64') settings max_read_buffer_size=3, input_format_parallel_parsing=0" From 0f38dbb2ca4a61e6c26c44c4a264d48764fcfb3c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 27 Nov 2021 03:54:28 +0000 Subject: [PATCH 199/472] Backport #31883 to 21.9: Fix function `empty` with `UUID` type --- src/Functions/EmptyImpl.h | 2 +- tests/queries/0_stateless/02124_empty_uuid.reference | 4 ++++ tests/queries/0_stateless/02124_empty_uuid.sql | 7 +++++++ 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02124_empty_uuid.reference create mode 100644 tests/queries/0_stateless/02124_empty_uuid.sql diff --git a/src/Functions/EmptyImpl.h b/src/Functions/EmptyImpl.h index c3117e0e52db..60daa66ea037 100644 --- a/src/Functions/EmptyImpl.h +++ b/src/Functions/EmptyImpl.h @@ -58,7 +58,7 @@ struct EmptyImpl static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray & res) { for (size_t i = 0; i < n; ++i) - res[i] = negative ^ (container.data()->toUnderType() == 0); + res[i] = negative ^ (container[i].toUnderType() == 0); } }; diff --git a/tests/queries/0_stateless/02124_empty_uuid.reference b/tests/queries/0_stateless/02124_empty_uuid.reference new file mode 100644 index 000000000000..31c1ccfc7cae --- /dev/null +++ b/tests/queries/0_stateless/02124_empty_uuid.reference @@ -0,0 +1,4 @@ +00000000-0000-0000-0000-000000000000 1 +992f6910-42b2-43cd-98bc-c812fbf9b683 0 +992f6910-42b2-43cd-98bc-c812fbf9b683 0 +00000000-0000-0000-0000-000000000000 1 diff --git a/tests/queries/0_stateless/02124_empty_uuid.sql b/tests/queries/0_stateless/02124_empty_uuid.sql new file mode 100644 index 000000000000..8dbfa3bae27c --- /dev/null +++ b/tests/queries/0_stateless/02124_empty_uuid.sql @@ -0,0 +1,7 @@ +SELECT + arrayJoin([toUUID('00000000-0000-0000-0000-000000000000'), toUUID('992f6910-42b2-43cd-98bc-c812fbf9b683')]) AS x, + empty(x) AS emp; + +SELECT + arrayJoin([toUUID('992f6910-42b2-43cd-98bc-c812fbf9b683'), toUUID('00000000-0000-0000-0000-000000000000')]) AS x, + empty(x) AS emp; From 9b3fc08917278cd4d227c6a54c64c3fd50dcb3dc Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Sun, 28 Nov 2021 04:32:41 +0000 Subject: [PATCH 200/472] support i64 --- utils/local-engine/Builder/SerializedPlanBuilder.cpp | 6 ++++++ utils/local-engine/Parser/SerializedPlanParser.cpp | 10 +++++++--- .../java/io/kyligence/jni/engine/LocalEngineTest.java | 2 +- utils/local-engine/tests/gtest_local_engine.cpp | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp index f8778496d426..785cb9bcb4b6 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.cpp +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -20,6 +20,12 @@ std::unique_ptr SerializedSchemaBuilder::build( t->mutable_i32()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } + else if (type == "I64") + { + auto *t = type_struct->mutable_types()->Add(); + t->mutable_i64()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + } else if (type == "Boolean") { auto *t = type_struct->mutable_types()->Add(); diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 92599f0ab3bc..9357f904049d 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -39,15 +39,19 @@ DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type& auto & factory = DB::DataTypeFactory::instance(); if (type.has_bool_() || type.has_i8()) { - return factory.get("UInt8"); + return factory.get("Int8"); } else if (type.has_i16()) { - return factory.get("UInt16"); + return factory.get("Int16"); } else if (type.has_i32()) { - return factory.get("UInt32"); + return factory.get("Int32"); + } + else if (type.has_i64()) + { + return factory.get("Int64"); } else if (type.has_string()) { diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index 594a3f6eb34e..e9d76d03ccad 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -26,6 +26,6 @@ public void testLocalEngine() throws Exception{ localEngine.execute(); Assert.assertTrue(localEngine.hasNext()); byte[] data = localEngine.next(); - Assert.assertEquals(6490, data.length); + Assert.assertEquals(7106, data.length); } } diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index bb74244b5975..1013ad2c8537 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -13,7 +13,7 @@ TEST(TestSelect, ReadRel) .column("sepal_width", "FP64") .column("petal_length", "FP64") .column("petal_width", "FP64") - .column("type", "I32") + .column("type", "I64") .build(); dbms::SerializedPlanBuilder plan_builder; auto plan = plan_builder.files( TEST_DATA(/data/iris.parquet), std::move(schema)).build(); From f4548cf742b06fb3ca842be27730f136f318bfc9 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Mon, 29 Nov 2021 07:49:48 +0000 Subject: [PATCH 201/472] add project parse --- .../Parser/SerializedPlanParser.cpp | 42 +++++++++++++++---- .../Parser/SerializedPlanParser.h | 4 ++ 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 9357f904049d..b9207bdf2f44 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -76,15 +76,7 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptrrelations_size() == 1) { auto rel = plan->relations().at(0); - if (rel.has_read()) { - std::shared_ptr source = std::dynamic_pointer_cast(SerializedPlanParser::parseReadRealWithLocalFile(rel.read())); - auto source_step = std::make_unique(Pipe(source), "Parquet"); - query_plan->addStep(std::move(source_step)); - } - else - { - throw std::runtime_error("unsupported relation"); - } + parse(*query_plan, rel); } else { @@ -98,6 +90,38 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::string& plan) plan_ptr->ParseFromString(plan); return parse(std::move(plan_ptr)); } +void dbms::SerializedPlanParser::parse(DB::QueryPlan & query_plan, const io::substrait::ReadRel & rel) +{ + std::shared_ptr source = std::dynamic_pointer_cast(SerializedPlanParser::parseReadRealWithLocalFile(rel)); + auto source_step = std::make_unique(Pipe(source), "Parquet"); + query_plan.addStep(std::move(source_step)); +} +void dbms::SerializedPlanParser::parse(DB::QueryPlan & query_plan, const io::substrait::Rel& rel) +{ + if (rel.has_read()) { + parse(query_plan, rel.read()); + } + else if (rel.has_project()) + { + parse(query_plan, rel.project()); + } + else + { + throw std::runtime_error("unsupported relation"); + } +} +void dbms::SerializedPlanParser::parse(DB::QueryPlan & query_plan, const io::substrait::ProjectRel & rel) +{ + if (rel.has_input()) + { + parse(query_plan, rel.input()); + } + else + { + throw std::runtime_error("project relation should contains a input relation"); + } + //TODO add project step +} DB::Chunk DB::BatchParquetFileSource::generate() { while (!finished_generate) diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 4bac707088d9..9d3525d557d0 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -64,6 +64,10 @@ class SerializedPlanParser static DB::BatchParquetFileSourcePtr parseReadRealWithLocalFile(const io::substrait::ReadRel& rel); static DB::Block parseNameStruct(const io::substrait::Type_NamedStruct& struct_); static DB::DataTypePtr parseType(const io::substrait::Type& type); +private: + static void parse(DB::QueryPlan & query_plan, const io::substrait::Rel& rel); + static void parse(DB::QueryPlan & query_plan, const io::substrait::ReadRel& rel); + static void parse(DB::QueryPlan & query_plan, const io::substrait::ProjectRel& rel); }; From 73ecb1d7360c61b259b1144ff0f0009280ee1215 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Mon, 29 Nov 2021 07:53:37 +0000 Subject: [PATCH 202/472] chang group id --- utils/local-engine/java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/local-engine/java/pom.xml b/utils/local-engine/java/pom.xml index b029f3cb8f48..699092dc6e03 100644 --- a/utils/local-engine/java/pom.xml +++ b/utils/local-engine/java/pom.xml @@ -2,7 +2,7 @@ - io.kylingence.jni + io.kyligence.jni 0.1.0-SNAPSHOT 4.0.0 local-engine From 7874a231b185498d252cd7dbecb1344ea61b7427 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 28 Nov 2021 15:33:41 +0300 Subject: [PATCH 203/472] Merge pull request #31861 from ClickHouse/more_clear_build_paths Get rid of build numbers and simplify builds paths in S3 (cherry picked from commit 91ff4caba4409908fcbd045b349b53159790eac4) (cherry picked from commit 25ce8b6614b729fc9902f51f6107c6d4f7a9f693) (cherry picked from commit 279c055fbc20a65e04171eb8956b07943d6c8268) --- .github/workflows/release_branches.yml | 26 +- tests/ci/ast_fuzzer_check.py | 11 +- tests/ci/build_check.py | 20 +- tests/ci/build_download_helper.py | 17 +- tests/ci/ci_config.py | 561 +++---------------------- tests/ci/metrics_lambda/app.py | 2 +- 6 files changed, 99 insertions(+), 538 deletions(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index e279ae915888..988db77e62a2 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -41,7 +41,7 @@ jobs: sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 compatibility_check.py 0 + cd $REPO_COPY/tests/ci && python3 compatibility_check.py - name: Cleanup if: always() run: | @@ -72,12 +72,12 @@ jobs: REPO_COPY: ${{runner.temp}}/build_check/ClickHouse CACHES_PATH: ${{runner.temp}}/../ccaches CHECK_NAME: 'ClickHouse build check (actions)' - BUILD_NUMBER: 0 + BUILD_NAME: 'package_release' run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: @@ -110,12 +110,12 @@ jobs: REPO_COPY: ${{runner.temp}}/build_check/ClickHouse CACHES_PATH: ${{runner.temp}}/../ccaches CHECK_NAME: 'ClickHouse build check (actions)' - BUILD_NUMBER: 3 + BUILD_NAME: 'package_asan' run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: @@ -148,12 +148,12 @@ jobs: REPO_COPY: ${{runner.temp}}/build_check/ClickHouse CACHES_PATH: ${{runner.temp}}/../ccaches CHECK_NAME: 'ClickHouse build check (actions)' - BUILD_NUMBER: 4 + BUILD_NAME: 'package_ubsan' run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: @@ -186,12 +186,12 @@ jobs: REPO_COPY: ${{runner.temp}}/build_check/ClickHouse CACHES_PATH: ${{runner.temp}}/../ccaches CHECK_NAME: 'ClickHouse build check (actions)' - BUILD_NUMBER: 5 + BUILD_NAME: 'package_tsan' run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: @@ -224,12 +224,12 @@ jobs: REPO_COPY: ${{runner.temp}}/build_check/ClickHouse CACHES_PATH: ${{runner.temp}}/../ccaches CHECK_NAME: 'ClickHouse build check (actions)' - BUILD_NUMBER: 6 + BUILD_NAME: 'package_msan' run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: @@ -262,12 +262,12 @@ jobs: REPO_COPY: ${{runner.temp}}/build_check/ClickHouse CACHES_PATH: ${{runner.temp}}/../ccaches CHECK_NAME: 'ClickHouse build check (actions)' - BUILD_NUMBER: 7 + BUILD_NAME: 'package_debug' run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index d842d4848413..02c81a4db318 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -11,8 +11,7 @@ from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import PRInfo -from ci_config import build_config_to_string -from build_download_helper import get_build_config_for_check, get_build_urls +from build_download_helper import get_build_name_for_check, get_build_urls from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse @@ -54,11 +53,9 @@ def get_commit(gh, commit_sha): docker_image = get_image_with_version(temp_path, IMAGE_NAME) - build_config = get_build_config_for_check(check_name) - print(build_config) - build_config_str = build_config_to_string(build_config) - print(build_config_str) - urls = get_build_urls(build_config_str, reports_path) + build_name = get_build_name_for_check(check_name) + print(build_name) + urls = get_build_urls(build_name, reports_path) if not urls: raise Exception("No build URLs found") diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 35737e5fa713..67b443596a80 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -12,19 +12,17 @@ from get_robot_token import get_best_robot_token from version_helper import get_version_from_repo, update_version_local from ccache_utils import get_ccache_if_not_exists, upload_ccache -from ci_config import build_config_to_string, CI_CONFIG +from ci_config import CI_CONFIG from docker_pull_helper import get_image_with_version -def get_build_config(build_check_name, build_number): +def get_build_config(build_check_name, build_name): if build_check_name == 'ClickHouse build check (actions)': build_config_name = 'build_config' - elif build_check_name == 'ClickHouse special build check (actions)': - build_config_name = 'special_build_config' else: raise Exception(f"Unknown build check name {build_check_name}") - return CI_CONFIG[build_config_name][build_number] + return CI_CONFIG[build_config_name][build_name] def _can_export_binaries(build_config): @@ -98,9 +96,9 @@ def build_clickhouse(packager_cmd, logs_path): caches_path = os.getenv("CACHES_PATH", temp_path) build_check_name = sys.argv[1] - build_number = int(sys.argv[2]) + build_name = sys.argv[2] - build_config = get_build_config(build_check_name, build_number) + build_config = get_build_config(build_check_name, build_name) if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -129,7 +127,6 @@ def build_clickhouse(packager_cmd, logs_path): logging.info("Updated local files with version") - build_name = build_config_to_string(build_config) logging.info("Build short name %s", build_name) subprocess.check_call(f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True) @@ -165,7 +162,12 @@ def build_clickhouse(packager_cmd, logs_path): logging.info("Will upload cache") upload_ccache(ccache_path, s3_helper, pr_info.number, temp_path) - s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + build_check_name.lower().replace(' ', '_') + "/" + build_name + # for release pull requests we use branch names prefixes, not pr numbers + if 'release' in pr_info.labels or 'release-lts' in pr_info.labels: + s3_path_prefix = pr_info.head_ref + "/" + pr_info.sha + "/" + build_name + else: + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + build_name + if os.path.exists(log_path): log_url = s3_helper.upload_build_file_to_s3(log_path, s3_path_prefix + "/" + os.path.basename(log_path)) logging.info("Log url %s", log_url) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 2770b7370415..5ce54423e199 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -8,17 +8,17 @@ import requests -from ci_config import CI_CONFIG, build_config_to_string +from ci_config import CI_CONFIG DOWNLOAD_RETRIES_COUNT = 5 -def get_build_config_for_check(check_name): - return CI_CONFIG["tests_config"][check_name]['required_build_properties'] +def get_build_name_for_check(check_name): + return CI_CONFIG['tests_config'][check_name]['required_build'] -def get_build_urls(build_config_str, reports_path): +def get_build_urls(build_name, reports_path): for root, _, files in os.walk(reports_path): for f in files: - if build_config_str in f : + if build_name in f : logging.info("Found build report json %s", f) with open(os.path.join(root, f), 'r', encoding='utf-8') as file_handler: build_report = json.load(file_handler) @@ -72,11 +72,8 @@ def download_builds(result_path, build_urls, filter_fn): dowload_build_with_progress(url, os.path.join(result_path, fname)) def download_builds_filter(check_name, reports_path, result_path, filter_fn=lambda _: True): - build_config = get_build_config_for_check(check_name) - print(build_config) - build_config_str = build_config_to_string(build_config) - print(build_config_str) - urls = get_build_urls(build_config_str, reports_path) + build_name = get_build_name_for_check(check_name) + urls = get_build_urls(build_name, reports_path) print(urls) if not urls: diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 64a2b4d5a2ab..65e3c1bfd05c 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 CI_CONFIG = { - "build_config": [ - { + "build_config": { + "package_release": { "compiler": "clang-11", "build_type": "", "sanitizer": "", @@ -13,7 +13,7 @@ "tidy": "disable", "with_coverage": False }, - { + "performance": { "compiler": "clang-11", "build_type": "", "sanitizer": "", @@ -23,7 +23,7 @@ "tidy": "disable", "with_coverage": False }, - { + "binary_gcc": { "compiler": "gcc-10", "build_type": "", "sanitizer": "", @@ -33,7 +33,7 @@ "tidy": "disable", "with_coverage": False }, - { + "package_asan": { "compiler": "clang-11", "build_type": "", "sanitizer": "address", @@ -43,7 +43,7 @@ "tidy": "disable", "with_coverage": False }, - { + "package_ubsan": { "compiler": "clang-11", "build_type": "", "sanitizer": "undefined", @@ -53,7 +53,7 @@ "tidy": "disable", "with_coverage": False }, - { + "package_tsan": { "compiler": "clang-11", "build_type": "", "sanitizer": "thread", @@ -63,7 +63,7 @@ "tidy": "disable", "with_coverage": False }, - { + "package_msan": { "compiler": "clang-11", "build_type": "", "sanitizer": "memory", @@ -73,7 +73,7 @@ "tidy": "disable", "with_coverage": False }, - { + "package_debug": { "compiler": "clang-11", "build_type": "debug", "sanitizer": "", @@ -83,17 +83,7 @@ "tidy": "disable", "with_coverage": False }, - { - "compiler": "gcc-10", - "build_type": "", - "sanitizer": "", - "package_type": "deb", - "bundled": "unbundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": False - }, - { + "binary_release": { "compiler": "clang-11", "build_type": "", "sanitizer": "", @@ -102,10 +92,8 @@ "splitted": "unsplitted", "tidy": "disable", "with_coverage": False - } - ], - "special_build_config": [ - { + }, + "package_tidy": { "compiler": "clang-11", "build_type": "debug", "sanitizer": "", @@ -115,7 +103,7 @@ "tidy": "enable", "with_coverage": False }, - { + "binary_splitted": { "compiler": "clang-11", "build_type": "", "sanitizer": "", @@ -125,7 +113,7 @@ "tidy": "disable", "with_coverage": False }, - { + "binary_darwin": { "compiler": "clang-11-darwin", "build_type": "", "sanitizer": "", @@ -135,7 +123,7 @@ "tidy": "disable", "with_coverage": False }, - { + "binary_aarch64": { "compiler": "clang-11-aarch64", "build_type": "", "sanitizer": "", @@ -145,7 +133,7 @@ "tidy": "disable", "with_coverage": False }, - { + "binary_freebsd": { "compiler": "clang-11-freebsd", "build_type": "", "sanitizer": "", @@ -155,7 +143,7 @@ "tidy": "disable", "with_coverage": False }, - { + "binary_darwin_aarch64": { "compiler": "clang-11-darwin-aarch64", "build_type": "", "sanitizer": "", @@ -165,7 +153,7 @@ "tidy": "disable", "with_coverage": False }, - { + "binary_ppc64le": { "compiler": "clang-11-ppc64le", "build_type": "", "sanitizer": "", @@ -175,562 +163,139 @@ "tidy": "disable", "with_coverage": False } - ], + }, "tests_config": { "Stateful tests (address, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_asan", }, "Stateful tests (thread, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_tsan", }, "Stateful tests (memory, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_msan", }, "Stateful tests (ubsan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "undefined", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_ubsan", }, "Stateful tests (debug, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "debug", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_debug", }, "Stateful tests (release, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Stateful tests (release, DatabaseOrdinary, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Stateful tests (release, DatabaseReplicated, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Stateless tests (address, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_asan", }, "Stateless tests (thread, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_tsan", }, "Stateless tests (memory, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_msan", }, "Stateless tests (ubsan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "undefined", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_ubsan", }, "Stateless tests (debug, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "debug", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_debug", }, "Stateless tests (release, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } - }, - "Stateless tests (unbundled, actions)": { - "required_build_properties": { - "compiler": "gcc-10", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "unbundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Stateless tests (release, wide parts enabled, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Stateless tests (release, DatabaseOrdinary, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Stateless tests (release, DatabaseReplicated, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Stress test (address, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_asan", }, "Stress test (thread, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_tsan", }, "Stress test (undefined, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "undefined", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_ubsan", }, "Stress test (memory, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_msan", }, "Stress test (debug, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "debug", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_debug", }, "Integration tests (asan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_asan", }, "Integration tests (thread, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_tsan", }, "Integration tests (release, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Integration tests (memory, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_msan", }, "Integration tests flaky check (asan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_asan", }, "Compatibility check (actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Split build smoke test (actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "splitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "binary_splitted", }, "Testflows check (actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Unit tests (release-gcc, actions)": { - "required_build_properties": { - "compiler": "gcc-10", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "binary_gcc", }, "Unit tests (release-clang, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "binary_release", }, "Unit tests (asan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_asan", }, "Unit tests (msan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_msan", }, "Unit tests (tsan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_tsan", }, "Unit tests (ubsan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_ubsan", }, "AST fuzzer (debug, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "debug", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_debug", }, "AST fuzzer (ASan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_asan", }, "AST fuzzer (MSan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_msan", }, "AST fuzzer (TSan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_tsan", }, "AST fuzzer (UBSan, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "undefined", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_ubsan", }, "Release (actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_release", }, "Stateless tests flaky check (address, actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "package_asan", }, "ClickHouse Keeper Jepsen (actions)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang_tidy": "disable", - "with_coverage": False - } + "required_build": "binary_release", } } } - -def build_config_to_string(build_config): - if build_config["package_type"] == "performance": - return "performance" - - return "_".join([ - build_config['compiler'], - build_config['build_type'] if build_config['build_type'] else "relwithdebuginfo", - build_config['sanitizer'] if build_config['sanitizer'] else "none", - build_config['bundled'], - build_config['splitted'], - 'tidy' if 'tidy' in build_config and build_config['tidy'] == 'enable' else 'notidy', - 'with_coverage' if 'with_coverage' in build_config and build_config['with_coverage'] else 'without_coverage', - build_config['package_type'], - ]) diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py index 4bf967a51e17..d776aa2be49f 100644 --- a/tests/ci/metrics_lambda/app.py +++ b/tests/ci/metrics_lambda/app.py @@ -163,7 +163,7 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_ru if delete_offline_runners: print("Going to delete offline runners") for runner in runners: - if runner.offline: + if runner.offline and not runner.busy: print("Deleting runner", runner) delete_runner(access_token, runner) From b4bacb8f2349386754d4582a5ce6f63d84408d71 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 29 Nov 2021 16:03:59 +0000 Subject: [PATCH 204/472] Backport #31859 to 21.9: keeper session timeout doesn't work --- src/Server/KeeperTCPHandler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index c94c3ed1874e..dfa3dd016790 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -195,8 +195,8 @@ KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSoc , log(&Poco::Logger::get("KeeperTCPHandler")) , global_context(Context::createCopy(server.context())) , keeper_dispatcher(global_context->getKeeperStorageDispatcher()) - , operation_timeout(0, global_context->getConfigRef().getUInt("keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000) - , session_timeout(0, global_context->getConfigRef().getUInt("keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000) + , operation_timeout(0, global_context->getConfigRef().getUInt("keeper_server.coordination_settings.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000) + , session_timeout(0, global_context->getConfigRef().getUInt("keeper_server.coordination_settings.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000) , poll_wrapper(std::make_unique(socket_)) , responses(std::make_unique()) { From edcc97baad69545cd7ff86b9cdbd2a640ce7729b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 30 Nov 2021 07:02:12 +0000 Subject: [PATCH 205/472] Backport #31866 to 21.9: Support toUInt8/toInt8 for if constant condition optimization. --- .../OptimizeIfWithConstantConditionVisitor.cpp | 12 ++++++++++++ ..._if_condition_and_not_existing_column.reference | 8 ++++++++ ...nstant_if_condition_and_not_existing_column.sql | 14 ++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference create mode 100644 tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index a9814ce50f51..c53ee6dfefaa 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -28,6 +28,8 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v } /// cast of numeric constant in condition to UInt8 + /// Note: this solution is ad-hoc and only implemented for yandex.metrica use case. + /// We should allow any constant condition (or maybe remove this optimization completely) later. if (const auto * function = condition->as()) { if (isFunctionCast(function)) @@ -49,6 +51,16 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v } } } + else if (function->name == "toUInt8" || function->name == "toInt8") + { + if (const auto * expr_list = function->arguments->as()) + { + if (expr_list->children.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} must have exactly two arguments", function->name); + + return tryExtractConstValueFromCondition(expr_list->children.at(0), value); + } + } } return false; diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference new file mode 100644 index 000000000000..67f2590a0c6b --- /dev/null +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference @@ -0,0 +1,8 @@ +42 +42 +42 +42 +42 +42 +42 +42 diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql new file mode 100644 index 000000000000..ad3d417bc26c --- /dev/null +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql @@ -0,0 +1,14 @@ +drop table if exists test; +-- this queries does not have to pass, but they works historically +-- let's support this while can, see #31687 +create table test (x String) Engine=StripeLog; +insert into test values (0); +select if(0, y, 42) from test; +select if(1, 42, y) from test; +select if(toUInt8(0), y, 42) from test; +select if(toInt8(0), y, 42) from test; +select if(toUInt8(1), 42, y) from test; +select if(toInt8(1), 42, y) from test; +select if(toUInt8(toUInt8(0)), y, 42) from test; +select if(cast(cast(0, 'UInt8'), 'UInt8'), y, 42) from test; +drop table if exists t; From 92f0cf9e42464b6f33b1715a3263933d009eb0d1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 30 Nov 2021 16:00:29 +0300 Subject: [PATCH 206/472] Merge pull request #31992 from ClickHouse/run_less_checks_for_backports Run less tests for backport branches (cherry picked from commit 086186c7657aeae8fdc188f82d4fa089e2c73b24) --- .github/workflows/backport_branches.yml | 379 ++++++++++++++++++++++++ .github/workflows/release_branches.yml | 1 - 2 files changed, 379 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/backport_branches.yml diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml new file mode 100644 index 000000000000..98a339276677 --- /dev/null +++ b/.github/workflows/backport_branches.yml @@ -0,0 +1,379 @@ +name: BackportPR +on: # yamllint disable-line rule:truthy + push: + branches: + - 'backport/**' +jobs: + DockerHubPush: + runs-on: [self-hosted, style-checker] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_images_check.py + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/docker_images_check/changed_images.json + CompatibilityCheck: + needs: [BuilderDebRelease] + runs-on: [self-hosted, style-checker] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: CompatibilityCheck + env: + TEMP_PATH: ${{runner.temp}}/compatibility_check + REPO_COPY: ${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH: ${{runner.temp}}/reports_dir + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 compatibility_check.py + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +######################################################################################### +#################################### ORDINARY BUILDS #################################### +######################################################################################### + BuilderDebRelease: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NAME: 'package_release' + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + BuilderDebAsan: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NAME: 'package_asan' + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + BuilderDebTsan: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NAME: 'package_tsan' + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + BuilderDebDebug: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + env: + TEMP_PATH: ${{runner.temp}}/build_check + IMAGES_PATH: ${{runner.temp}}/images_path + REPO_COPY: ${{runner.temp}}/build_check/ClickHouse + CACHES_PATH: ${{runner.temp}}/../ccaches + CHECK_NAME: 'ClickHouse build check (actions)' + BUILD_NAME: 'package_debug' + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################ +##################################### BUILD REPORTER ####################################### +############################################################################################ + BuilderReport: + needs: + - BuilderDebRelease + - BuilderDebAsan + - BuilderDebTsan + - BuilderDebUBsan + - BuilderDebMsan + - BuilderDebDebug + runs-on: [self-hosted, style-checker] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Report Builder + env: + TEMP_PATH: ${{runner.temp}}/report_check + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'ClickHouse build check (actions)' + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cd $GITHUB_WORKSPACE/tests/ci + python3 build_report_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################## +########################### FUNCTIONAl STATELESS TESTS ####################################### +############################################################################################## + FunctionalStatelessTestAsan: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (address, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################## +############################ FUNCTIONAl STATEFUL TESTS ####################################### +############################################################################################## + FunctionalStatefulTestDebug: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateful_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateful tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse + KILL_TIMEOUT: 3600 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################## +######################################### STRESS TESTS ####################################### +############################################################################################## + StressTestTsan: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Stress test + env: + TEMP_PATH: ${{runner.temp}}/stress_thread + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stress test (thread, actions)' + REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 stress_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH +############################################################################################# +############################# INTEGRATION TESTS ############################################# +############################################################################################# + IntegrationTestsRelease: + needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FinishCheck: + needs: + - DockerHubPush + - BuilderReport + - FunctionalStatelessTestAsan + - FunctionalStatefulTestDebug + - StressTestTsan + - IntegrationTestsRelease + - CompatibilityCheck + runs-on: [self-hosted, style-checker] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Finish label + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 finish_check.py diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 988db77e62a2..4489585541bf 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -6,7 +6,6 @@ on: # yamllint disable-line rule:truthy - '22.**' - '23.**' - '24.**' - - 'backport/**' jobs: DockerHubPush: runs-on: [self-hosted, style-checker] From 0f83138227bf85b5b085b0867a6b78c7cf9006d6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 30 Nov 2021 19:14:33 +0300 Subject: [PATCH 207/472] Fix yml (cherry picked from commit 88d8eab38ba9bc9afe2bbc3c2f905171ca531ea6) --- .github/workflows/backport_branches.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 98a339276677..b61b74f86d3f 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -207,8 +207,6 @@ jobs: - BuilderDebRelease - BuilderDebAsan - BuilderDebTsan - - BuilderDebUBsan - - BuilderDebMsan - BuilderDebDebug runs-on: [self-hosted, style-checker] steps: From 41a13dbad85a123a4f3e5ae01951c9e6427d80a1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 30 Nov 2021 19:16:04 +0300 Subject: [PATCH 208/472] Fix one more time (cherry picked from commit 76d084cc03f88c85ff92ce75524b3af3e68a0413) --- .github/workflows/backport_branches.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index b61b74f86d3f..859756f07af1 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -331,7 +331,7 @@ jobs: ############################# INTEGRATION TESTS ############################################# ############################################################################################# IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports From 7186da42ab87719e0fb3578952b3de49b705ad57 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 1 Dec 2021 04:02:41 +0000 Subject: [PATCH 209/472] Backport #31823 to 21.9: Fix invalid cast of nullable type when nullable primary key is used --- src/Storages/MergeTree/KeyCondition.cpp | 7 ++++--- .../0_stateless/01410_nullable_key_and_index.reference | 1 + tests/queries/0_stateless/01410_nullable_key_and_index.sql | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 2e1e5f3361db..a21cec08f68f 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1358,8 +1358,9 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context, } if (!key_expr_type_not_null->equals(*common_type)) { - auto common_type_maybe_nullable - = key_expr_type_is_nullable ? DataTypePtr(std::make_shared(common_type)) : common_type; + auto common_type_maybe_nullable = (key_expr_type_is_nullable && !common_type->isNullable()) + ? DataTypePtr(std::make_shared(common_type)) + : common_type; ColumnsWithTypeAndName arguments{ {nullptr, key_expr_type, ""}, {DataTypeString().createColumnConst(1, common_type_maybe_nullable->getName()), common_type_maybe_nullable, ""}}; @@ -1367,7 +1368,7 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context, auto func_cast = func_builder_cast->build(arguments); /// If we know the given range only contains one value, then we treat all functions as positive monotonic. - if (!func_cast || (!single_point && !func_cast->hasInformationAboutMonotonicity())) + if (!single_point && !func_cast->hasInformationAboutMonotonicity()) return false; chain.push_back(func_cast); } diff --git a/tests/queries/0_stateless/01410_nullable_key_and_index.reference b/tests/queries/0_stateless/01410_nullable_key_and_index.reference index 1fc2cf91e627..c5b2ef292ea6 100644 --- a/tests/queries/0_stateless/01410_nullable_key_and_index.reference +++ b/tests/queries/0_stateless/01410_nullable_key_and_index.reference @@ -79,3 +79,4 @@ 2 2 2 1 3 2 +2021-11-11 00:00:00 diff --git a/tests/queries/0_stateless/01410_nullable_key_and_index.sql b/tests/queries/0_stateless/01410_nullable_key_and_index.sql index ba473b5c29ac..fd1712b5d245 100644 --- a/tests/queries/0_stateless/01410_nullable_key_and_index.sql +++ b/tests/queries/0_stateless/01410_nullable_key_and_index.sql @@ -59,3 +59,9 @@ SELECT * FROM nullable_minmax_index WHERE v <= 2; DROP TABLE nullable_key; DROP TABLE nullable_key_without_final_mark; DROP TABLE nullable_minmax_index; + +DROP TABLE IF EXISTS xxxx_null; +CREATE TABLE xxxx_null (`ts` Nullable(DateTime)) ENGINE = MergeTree ORDER BY toStartOfHour(ts) SETTINGS allow_nullable_key = 1; +INSERT INTO xxxx_null SELECT '2021-11-11 00:00:00'; +SELECT * FROM xxxx_null WHERE ts > '2021-10-11 00:00:00'; +DROP TABLE xxxx_null; From 0250c1f35df98f76b44f8268d08b0c05454fd3a0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 1 Dec 2021 16:14:05 +0000 Subject: [PATCH 210/472] Backport #31839 to 21.9: Fix a bug about function transform with decimal args --- src/Functions/transform.cpp | 370 +++++++++++++++++- .../02125_transform_decimal_bug.reference | 50 +++ .../02125_transform_decimal_bug.sql | 11 + 3 files changed, 429 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02125_transform_decimal_bug.reference create mode 100644 tests/queries/0_stateless/02125_transform_decimal_bug.sql diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index cb9f0dfea745..945f3550f74a 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,7 @@ class FunctionTransform : public IFunction + " has signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - return type_x; + return getLeastSupertype({type_x, type_arr_to_nested}); } else { @@ -179,6 +180,8 @@ class FunctionTransform : public IFunction && !executeNum(in, out, default_column) && !executeNum(in, out, default_column) && !executeNum(in, out, default_column) + && !executeDecimal(in, out, default_column) + && !executeDecimal(in, out, default_column) && !executeString(in, out, default_column)) { throw Exception{"Illegal column " + in->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; @@ -228,6 +231,8 @@ class FunctionTransform : public IFunction && !executeNumToNumWithConstDefault(in, out_untyped) && !executeNumToNumWithConstDefault(in, out_untyped) && !executeNumToNumWithConstDefault(in, out_untyped) + && !executeNumToDecimalWithConstDefault(in, out_untyped) + && !executeNumToDecimalWithConstDefault(in, out_untyped) && !executeNumToStringWithConstDefault(in, out_untyped)) { throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), @@ -246,6 +251,8 @@ class FunctionTransform : public IFunction && !executeNumToNumWithNonConstDefault(in, out_untyped, default_untyped) && !executeNumToNumWithNonConstDefault(in, out_untyped, default_untyped) && !executeNumToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeNumToDecimalWithNonConstDefault(in, out_untyped, default_untyped) + && !executeNumToDecimalWithNonConstDefault(in, out_untyped, default_untyped) && !executeNumToStringWithNonConstDefault(in, out_untyped, default_untyped)) { throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), @@ -259,6 +266,69 @@ class FunctionTransform : public IFunction return false; } + template + bool executeDecimal(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * default_untyped) const + { + if (const auto in = checkAndGetColumn>(in_untyped)) + { + if (!default_untyped) + { + auto out = typeid_cast *>(out_untyped); + if (!out) + { + throw Exception{"Illegal column " + out_untyped->getName() + " of elements of array of third argument of function " + getName() + + ", must be " + in->getName(), ErrorCodes::ILLEGAL_COLUMN}; + } + + executeImplNumToNum(in->getData(), out->getData()); + } + else if (isColumnConst(*default_untyped)) + { + if (!executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToNumWithConstDefault(in, out_untyped) + && !executeDecimalToDecimalWithConstDefault(in, out_untyped) + && !executeDecimalToDecimalWithConstDefault(in, out_untyped) + && !executeDecimalToStringWithConstDefault(in, out_untyped)) + { + throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN}; + } + } + else + { + if (!executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault(in, out_untyped, default_untyped) + && !executeDecimalToStringWithNonConstDefault(in, out_untyped, default_untyped)) + { + throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN}; + } + } + + return true; + } + + return false; + } + bool executeString(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * default_untyped) const { if (const auto * in = checkAndGetColumn(in_untyped)) @@ -281,6 +351,8 @@ class FunctionTransform : public IFunction && !executeStringToNumWithConstDefault(in, out_untyped) && !executeStringToNumWithConstDefault(in, out_untyped) && !executeStringToNumWithConstDefault(in, out_untyped) + && !executeStringToDecimalWithConstDefault(in, out_untyped) + && !executeStringToDecimalWithConstDefault(in, out_untyped) && !executeStringToStringWithConstDefault(in, out_untyped)) { throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), @@ -299,6 +371,9 @@ class FunctionTransform : public IFunction && !executeStringToNumWithNonConstDefault(in, out_untyped, default_untyped) && !executeStringToNumWithNonConstDefault(in, out_untyped, default_untyped) && !executeStringToNumWithNonConstDefault(in, out_untyped, default_untyped) + && !executeStringToDecimalWithNonConstDefault(in, out_untyped, default_untyped) + && !executeStringToDecimalWithNonConstDefault(in, out_untyped, default_untyped) + && !executeStringToStringWithNonConstDefault(in, out_untyped, default_untyped)) { throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), @@ -323,6 +398,40 @@ class FunctionTransform : public IFunction return true; } + template + bool executeNumToDecimalWithConstDefault(const ColumnVector * in, IColumn * out_untyped) const + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + executeImplNumToNumWithConstDefault(in->getData(), out->getData(), cache.const_default_value.get()); + return true; + } + + + template + bool executeDecimalToNumWithConstDefault(const ColumnDecimal * in, IColumn * out_untyped) const + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + executeImplNumToNumWithConstDefault(in->getData(), out->getData(), cache.const_default_value.get()); + return true; + } + + template + bool executeDecimalToDecimalWithConstDefault(const ColumnDecimal * in, IColumn * out_untyped) const + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + executeImplNumToNumWithConstDefault(in->getData(), out->getData(), cache.const_default_value.get()); + return true; + } + template bool executeNumToNumWithNonConstDefault(const ColumnVector * in, IColumn * out_untyped, const IColumn * default_untyped) const { @@ -349,6 +458,90 @@ class FunctionTransform : public IFunction return true; } + template + bool executeNumToDecimalWithNonConstDefault(const ColumnVector * in, IColumn * out_untyped, const IColumn * default_untyped) const + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + if (!executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped) + && !executeNumToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped)) + { + throw Exception( + "Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + + return true; + } + + template + bool executeDecimalToNumWithNonConstDefault(const ColumnDecimal * in, IColumn * out_untyped, const IColumn * default_untyped) const + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + if (!executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefaultDecimal2(in, out, default_untyped) + && !executeDecimalToNumWithNonConstDefaultDecimal2(in, out, default_untyped)) + { + throw Exception( + "Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + + return true; + } + + template + bool executeDecimalToDecimalWithNonConstDefault(const ColumnDecimal * in, IColumn * out_untyped, const IColumn * default_untyped) const + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + if (!executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped) + && !executeDecimalToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped)) + { + throw Exception( + "Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + + return true; + } + template bool executeNumToNumWithNonConstDefault2(const ColumnVector * in, ColumnVector * out, const IColumn * default_untyped) const { @@ -360,6 +553,72 @@ class FunctionTransform : public IFunction return true; } + template + bool executeNumToDecimalWithNonConstDefault2(const ColumnVector * in, ColumnDecimal * out, const IColumn * default_untyped) const + { + auto col_default = checkAndGetColumn>(default_untyped); + if (!col_default) + return false; + + executeImplNumToNumWithNonConstDefault(in->getData(), out->getData(), col_default->getData()); + return true; + } + + template + bool executeNumToDecimalWithNonConstDefaultDecimal2(const ColumnVector * in, ColumnDecimal * out, const IColumn * default_untyped) const + { + auto col_default = checkAndGetColumn>(default_untyped); + if (!col_default) + return false; + + executeImplNumToNumWithNonConstDefault(in->getData(), out->getData(), col_default->getData()); + return true; + } + + template + bool executeDecimalToNumWithNonConstDefault2(const ColumnDecimal * in, ColumnVector * out, const IColumn * default_untyped) const + { + auto col_default = checkAndGetColumn>(default_untyped); + if (!col_default) + return false; + + executeImplNumToNumWithNonConstDefault(in->getData(), out->getData(), col_default->getData()); + return true; + } + + template + bool executeDecimalToDecimalWithNonConstDefault2(const ColumnDecimal * in, ColumnDecimal * out, const IColumn * default_untyped) const + { + auto col_default = checkAndGetColumn>(default_untyped); + if (!col_default) + return false; + + executeImplNumToNumWithNonConstDefault(in->getData(), out->getData(), col_default->getData()); + return true; + } + + template + bool executeDecimalToNumWithNonConstDefaultDecimal2(const ColumnDecimal * in, ColumnVector * out, const IColumn * default_untyped) const + { + auto col_default = checkAndGetColumn>(default_untyped); + if (!col_default) + return false; + + executeImplNumToNumWithNonConstDefault(in->getData(), out->getData(), col_default->getData()); + return true; + } + + template + bool executeDecimalToDecimalWithNonConstDefaultDecimal2(const ColumnDecimal * in, ColumnDecimal * out, const IColumn * default_untyped) const + { + auto col_default = checkAndGetColumn>(default_untyped); + if (!col_default) + return false; + + executeImplNumToNumWithNonConstDefault(in->getData(), out->getData(), col_default->getData()); + return true; + } + template bool executeNumToStringWithConstDefault(const ColumnVector * in, IColumn * out_untyped) const { @@ -373,6 +632,19 @@ class FunctionTransform : public IFunction return true; } + template + bool executeDecimalToStringWithConstDefault(const ColumnDecimal * in, IColumn * out_untyped) const + { + auto * out = typeid_cast(out_untyped); + if (!out) + return false; + + const String & default_str = cache.const_default_value.get(); + StringRef default_string_ref{default_str.data(), default_str.size() + 1}; + executeImplNumToStringWithConstDefault(in->getData(), out->getChars(), out->getOffsets(), default_string_ref); + return true; + } + template bool executeNumToStringWithNonConstDefault(const ColumnVector * in, IColumn * out_untyped, const IColumn * default_untyped) const { @@ -395,6 +667,28 @@ class FunctionTransform : public IFunction return true; } + template + bool executeDecimalToStringWithNonConstDefault(const ColumnDecimal * in, IColumn * out_untyped, const IColumn * default_untyped) const + { + auto * out = typeid_cast(out_untyped); + if (!out) + return false; + + const auto * default_col = checkAndGetColumn(default_untyped); + if (!default_col) + { + throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN}; + } + + executeImplNumToStringWithNonConstDefault( + in->getData(), + out->getChars(), out->getOffsets(), + default_col->getChars(), default_col->getOffsets()); + + return true; + } + template bool executeStringToNumWithConstDefault(const ColumnString * in, IColumn * out_untyped) const { @@ -406,6 +700,17 @@ class FunctionTransform : public IFunction return true; } + template + bool executeStringToDecimalWithConstDefault(const ColumnString * in, IColumn * out_untyped) const + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + executeImplStringToNumWithConstDefault(in->getChars(), in->getOffsets(), out->getData(), cache.const_default_value.get()); + return true; + } + template bool executeStringToNumWithNonConstDefault(const ColumnString * in, IColumn * out_untyped, const IColumn * default_untyped) const { @@ -431,6 +736,34 @@ class FunctionTransform : public IFunction return true; } + template + bool executeStringToDecimalWithNonConstDefault(const ColumnString * in, IColumn * out_untyped, const IColumn * default_untyped) const + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + if (!executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefault2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped) + && !executeStringToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped)) + { + throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN}; + } + + return true; + } + + template bool executeStringToNumWithNonConstDefault2(const ColumnString * in, ColumnVector * out, const IColumn * default_untyped) const { @@ -442,6 +775,28 @@ class FunctionTransform : public IFunction return true; } + template + bool executeStringToDecimalWithNonConstDefault2(const ColumnString * in, ColumnDecimal * out, const IColumn * default_untyped) const + { + auto col_default = checkAndGetColumn>(default_untyped); + if (!col_default) + return false; + + executeImplStringToNumWithNonConstDefault(in->getChars(), in->getOffsets(), out->getData(), col_default->getData()); + return true; + } + + template + bool executeStringToDecimalWithNonConstDefaultDecimal2(const ColumnString * in, ColumnDecimal * out, const IColumn * default_untyped) const + { + auto col_default = checkAndGetColumn>(default_untyped); + if (!col_default) + return false; + + executeImplStringToNumWithNonConstDefault(in->getChars(), in->getOffsets(), out->getData(), col_default->getData()); + return true; + } + bool executeStringToString(const ColumnString * in, IColumn * out_untyped) const { auto * out = typeid_cast(out_untyped); @@ -798,7 +1153,18 @@ class FunctionTransform : public IFunction // Field may be of Float type, but for the purpose of bitwise // equality we can treat them as UInt64, hence the reinterpret(). - table[key.reinterpret()] = (*used_to)[i].reinterpret(); + if (to[0].getType() ==Field::Types::Decimal32) + { + table[key.reinterpret()] = (*used_to)[i].reinterpret(); + } + else if (to[0].getType() ==Field::Types::Decimal64) + { + table[key.reinterpret()] = (*used_to)[i].reinterpret(); + } + else + { + table[key.reinterpret()] = (*used_to)[i].reinterpret(); + } } } else diff --git a/tests/queries/0_stateless/02125_transform_decimal_bug.reference b/tests/queries/0_stateless/02125_transform_decimal_bug.reference new file mode 100644 index 000000000000..7f59d0ee7bf6 --- /dev/null +++ b/tests/queries/0_stateless/02125_transform_decimal_bug.reference @@ -0,0 +1,50 @@ +0 +1 +2 +30 +4 +5 +6 +7 +8 +9 +1000 +1000 +1000 +30 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +30 +1000 +50 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +30 +1000 +50 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +30 +1000 +50 +1000 +70 +1000 +1000 diff --git a/tests/queries/0_stateless/02125_transform_decimal_bug.sql b/tests/queries/0_stateless/02125_transform_decimal_bug.sql new file mode 100644 index 000000000000..4ef471ea875e --- /dev/null +++ b/tests/queries/0_stateless/02125_transform_decimal_bug.sql @@ -0,0 +1,11 @@ +SELECT transform(1, [1], [toDecimal32(1, 2)]); -- { serverError 44 } +SELECT transform(toDecimal32(number, 2), [toDecimal32(3, 2)], [toDecimal32(30, 2)]) FROM system.numbers LIMIT 10; +SELECT transform(toDecimal32(number, 2), [toDecimal32(3, 2)], [toDecimal32(30, 2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10; +SELECT transform(number, [3, 5, 11], [toDecimal32(30, 2), toDecimal32(50, 2), toDecimal32(70,2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10; +SELECT transform(number, [3, 5, 11], [toDecimal32(30, 2), toDecimal32(50, 2), toDecimal32(70,2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10; +SELECT transform(toString(number), ['3', '5', '7'], [toDecimal32(30, 2), toDecimal32(50, 2), toDecimal32(70,2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10; + + + + + From e2bd7f29f22ba29b2a58d1bdc2521a5e14638d86 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 1 Dec 2021 19:10:06 +0000 Subject: [PATCH 211/472] Backport #31887 to 21.9: Parse partition key value from `partition_id` when need to create part in empty partition --- src/Parsers/ASTPartition.h | 1 - src/Parsers/ParserPartition.cpp | 5 - .../Impl/ConstantExpressionTemplate.cpp | 5 + src/Storages/MergeTree/MergeTreeData.cpp | 87 +++++++++--------- src/Storages/MergeTree/MergeTreePartition.cpp | 91 +++++++++++++++++++ src/Storages/MergeTree/MergeTreePartition.h | 2 + src/Storages/StorageReplicatedMergeTree.cpp | 16 +++- tests/integration/test_lost_part/test.py | 12 ++- .../01165_lost_part_empty_partition.reference | 0 .../01165_lost_part_empty_partition.sql | 37 ++++++++ .../02009_array_join_partition.sql | 4 +- 11 files changed, 201 insertions(+), 59 deletions(-) create mode 100644 tests/queries/0_stateless/01165_lost_part_empty_partition.reference create mode 100644 tests/queries/0_stateless/01165_lost_part_empty_partition.sql diff --git a/src/Parsers/ASTPartition.h b/src/Parsers/ASTPartition.h index 8a837a10451f..0722da016271 100644 --- a/src/Parsers/ASTPartition.h +++ b/src/Parsers/ASTPartition.h @@ -12,7 +12,6 @@ class ASTPartition : public IAST { public: ASTPtr value; - String fields_str; /// The extent of comma-separated partition expression fields without parentheses. size_t fields_count = 0; String id; diff --git a/src/Parsers/ParserPartition.cpp b/src/Parsers/ParserPartition.cpp index a3ec4943e1c9..c10999361deb 100644 --- a/src/Parsers/ParserPartition.cpp +++ b/src/Parsers/ParserPartition.cpp @@ -35,7 +35,6 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; size_t fields_count; - String fields_str; const auto * tuple_ast = value->as(); bool surrounded_by_parens = false; @@ -58,7 +57,6 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else { fields_count = 1; - fields_str = String(begin->begin, pos->begin - begin->begin); } } else @@ -78,13 +76,10 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) --right_paren; if (right_paren->type != TokenType::ClosingRoundBracket) return false; - - fields_str = String(left_paren->end, right_paren->begin - left_paren->end); } partition->value = value; partition->children.push_back(value); - partition->fields_str = std::move(fields_str); partition->fields_count = fields_count; } diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 1f780a206dd4..72c2447b9c35 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -37,6 +37,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int SYNTAX_ERROR; + extern const int BAD_ARGUMENTS; } @@ -341,6 +342,10 @@ ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & auto syntax_result = TreeRewriter(context).analyze(expression, literals.getNamesAndTypesList()); result_column_name = expression->getColumnName(); actions_on_literals = ExpressionAnalyzer(expression, syntax_result, context).getActions(false); + if (actions_on_literals->hasArrayJoin()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Array joins are not allowed in constant expressions for IN, VALUES, LIMIT and similar sections."); + } size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTPtr & expression, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9bcd1555812f..ce596e201984 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -3242,54 +3244,54 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc /// Re-parse partition key fields using the information about expected field types. auto metadata_snapshot = getInMemoryMetadataPtr(); - size_t fields_count = metadata_snapshot->getPartitionKey().sample_block.columns(); + const Block & key_sample_block = metadata_snapshot->getPartitionKey().sample_block; + size_t fields_count = key_sample_block.columns(); if (partition_ast.fields_count != fields_count) - throw Exception( - "Wrong number of fields in the partition expression: " + toString(partition_ast.fields_count) + - ", must be: " + toString(fields_count), - ErrorCodes::INVALID_PARTITION_VALUE); + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, + "Wrong number of fields in the partition expression: {}, must be: {}", + partition_ast.fields_count, fields_count); - if (auto * f = partition_ast.value->as()) + Row partition_row(fields_count); + if (fields_count == 0) { - assert(f->name == "tuple"); - if (f->arguments && !f->arguments->as()->children.empty()) + /// Function tuple(...) requires at least one argument, so empty key is a special case + assert(!partition_ast.fields_count); + assert(typeid_cast(partition_ast.value.get())); + assert(partition_ast.value->as()->name == "tuple"); + assert(partition_ast.value->as()->arguments); + bool empty_tuple = partition_ast.value->as()->arguments->children.empty(); + if (!empty_tuple) + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Partition key is empty, expected 'tuple()' as partition key"); + } + else if (fields_count == 1) + { + ASTPtr partition_value_ast = partition_ast.value; + if (auto * tuple = partition_value_ast->as()) { - ASTPtr query = partition_ast.value->clone(); - auto syntax_analyzer_result - = TreeRewriter(local_context) - .analyze(query, metadata_snapshot->getPartitionKey().sample_block.getNamesAndTypesList(), {}, {}, false, false); - auto actions = ExpressionAnalyzer(query, syntax_analyzer_result, local_context).getActions(true); - if (actions->hasArrayJoin()) - throw Exception("The partition expression cannot contain array joins", ErrorCodes::INVALID_PARTITION_VALUE); + assert(tuple->name == "tuple"); + assert(tuple->arguments); + assert(tuple->arguments->children.size() == 1); + partition_value_ast = tuple->arguments->children[0]; } + /// Simple partition key, need to evaluate and cast + Field partition_key_value = evaluateConstantExpression(partition_value_ast, local_context).first; + partition_row[0] = convertFieldToTypeOrThrow(partition_key_value, *key_sample_block.getByPosition(0).type); } - - const FormatSettings format_settings; - Row partition_row(fields_count); - - if (fields_count) + else { - ReadBufferFromMemory left_paren_buf("(", 1); - ReadBufferFromMemory fields_buf(partition_ast.fields_str.data(), partition_ast.fields_str.size()); - ReadBufferFromMemory right_paren_buf(")", 1); - ConcatReadBuffer buf({&left_paren_buf, &fields_buf, &right_paren_buf}); + /// Complex key, need to evaluate, untuple and cast + Field partition_key_value = evaluateConstantExpression(partition_ast.value, local_context).first; + if (partition_key_value.getType() != Field::Types::Tuple) + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, + "Expected tuple for complex partition key, got {}", partition_key_value.getTypeName()); - auto input_format = FormatFactory::instance().getInput( - "Values", - buf, - metadata_snapshot->getPartitionKey().sample_block, - local_context, - local_context->getSettingsRef().max_block_size); - auto input_stream = std::make_shared(input_format); - - auto block = input_stream->read(); - if (!block || !block.rows()) - throw Exception( - "Could not parse partition value: `" + partition_ast.fields_str + "`", - ErrorCodes::INVALID_PARTITION_VALUE); + const Tuple & tuple = partition_key_value.get(); + if (tuple.size() != fields_count) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Wrong number of fields in the partition expression: {}, must be: {}", tuple.size(), fields_count); for (size_t i = 0; i < fields_count; ++i) - block.getByPosition(i).column->get(0, partition_row[i]); + partition_row[i] = convertFieldToTypeOrThrow(tuple[i], *key_sample_block.getByPosition(i).type); } MergeTreePartition partition(std::move(partition_row)); @@ -3301,11 +3303,10 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc if (existing_part_in_partition && existing_part_in_partition->partition.value != partition.value) { WriteBufferFromOwnString buf; - writeCString("Parsed partition value: ", buf); - partition.serializeText(*this, buf, format_settings); - writeCString(" doesn't match partition value for an existing part with the same partition ID: ", buf); - writeString(existing_part_in_partition->name, buf); - throw Exception(buf.str(), ErrorCodes::INVALID_PARTITION_VALUE); + partition.serializeText(*this, buf, FormatSettings{}); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Parsed partition value: {} " + "doesn't match partition value for an existing part with the same partition ID: {}", + buf.str(), existing_part_in_partition->name); } } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 0d457971dc6d..c9680c836784 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -20,6 +20,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int INVALID_PARTITION_VALUE; } namespace @@ -191,6 +192,8 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const /// In case all partition fields are represented by integral types, try to produce a human-readable ID. /// Otherwise use a hex-encoded hash. + /// NOTE It will work in unexpected way if some partition key column is Nullable: + /// are_all_integral will be false if some value is NULL. Maybe we should fix it. bool are_all_integral = true; for (const Field & field : value) { @@ -237,6 +240,94 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const return result; } +std::optional MergeTreePartition::tryParseValueFromID(const String & partition_id, const Block & partition_key_sample) +{ + size_t num_keys = partition_key_sample.columns(); + Row res; + res.reserve(num_keys); + + ReadBufferFromString buf(partition_id); + if (num_keys == 0) + { + checkString("all", buf); + assertEOF(buf); + return res; + } + + enum KeyType { DATE, UNSIGNED, SIGNED }; + + std::vector key_types; + key_types.reserve(num_keys); + for (size_t i = 0; i < num_keys; ++i) + { + auto type = partition_key_sample.getByPosition(i).type; + + /// NOTE Sometimes it's possible to parse Nullable key, but easier to ignore it. + if (type->isNullable()) + return {}; + + /// We use Field::Types when serializing partition_id, let's get some Field to check type + Field sample_field = type->getDefault(); + + if (typeid_cast(type.get())) + key_types.emplace_back(DATE); + else if (sample_field.getType() == Field::Types::UInt64) + key_types.emplace_back(UNSIGNED); + else if (sample_field.getType() == Field::Types::Int64) + key_types.emplace_back(SIGNED); + else + return {}; + } + + /// All columns are numeric, will parse partition value + for (size_t i = 0; i < num_keys; ++i) + { + switch (key_types[i]) + { + case DATE: + { + UInt32 date_yyyymmdd; + readText(date_yyyymmdd, buf); + constexpr UInt32 min_yyyymmdd = 10000000; + constexpr UInt32 max_yyyymmdd = 99999999; + if (date_yyyymmdd < min_yyyymmdd || max_yyyymmdd < date_yyyymmdd) + throw Exception( + ErrorCodes::INVALID_PARTITION_VALUE, "Cannot parse partition_id: got unexpected Date: {}", date_yyyymmdd); + + UInt32 date = DateLUT::instance().YYYYMMDDToDayNum(date_yyyymmdd); + res.emplace_back(date); + break; + } + case UNSIGNED: + { + UInt64 value; + readText(value, buf); + res.emplace_back(value); + break; + } + case SIGNED: + { + Int64 value; + readText(value, buf); + res.emplace_back(value); + break; + } + } + + if (i + 1 != num_keys) + assertChar('-', buf); + } + + assertEOF(buf); + + String expected_partition_id = MergeTreePartition{res}.getID(partition_key_sample); + if (expected_partition_id != partition_id) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Partition ID was parsed incorrectly: expected {}, got {}", + expected_partition_id, partition_id); + + return res; +} + void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index 67e7a246c302..c8fa54628c3c 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -33,6 +33,8 @@ struct MergeTreePartition String getID(const MergeTreeData & storage) const; String getID(const Block & partition_key_sample) const; + static std::optional tryParseValueFromID(const String & partition_id, const Block & partition_key_sample); + void serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const; void load(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c06ceaff1a40..96ea15484a58 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7498,13 +7498,23 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP { auto lock = lockParts(); auto parts_in_partition = getDataPartsPartitionRange(new_part_info.partition_id); - if (parts_in_partition.empty()) + if (!parts_in_partition.empty()) { - LOG_WARNING(log, "Empty part {} is not created instead of lost part because there are no parts in partition {} (it's empty), resolve this manually using DROP PARTITION.", lost_part_name, new_part_info.partition_id); + new_data_part->partition = (*parts_in_partition.begin())->partition; + } + else if (auto parsed_partition = MergeTreePartition::tryParseValueFromID( + new_part_info.partition_id, + metadata_snapshot->getPartitionKey().sample_block)) + { + new_data_part->partition = MergeTreePartition(*parsed_partition); + } + else + { + LOG_WARNING(log, "Empty part {} is not created instead of lost part because there are no parts in partition {} (it's empty), " + "resolve this manually using DROP/DETACH PARTITION.", lost_part_name, new_part_info.partition_id); return false; } - new_data_part->partition = (*parts_in_partition.begin())->partition; } new_data_part->minmax_idx = std::move(minmax_idx); diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index 614df52063f2..7b2d54a5ea46 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -178,18 +178,20 @@ def test_lost_part_mutation(start_cluster): def test_lost_last_part(start_cluster): for node in [node1, node2]: node.query( - "CREATE TABLE mt3 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{}') ORDER BY tuple()".format(node.name)) + "CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{}') " + "ORDER BY tuple() PARTITION BY p".format(node.name)) node1.query("SYSTEM STOP MERGES mt3") node2.query("SYSTEM STOP REPLICATION QUEUES") for i in range(1): - node1.query("INSERT INTO mt3 VALUES ({})".format(i)) + node1.query("INSERT INTO mt3 VALUES ({}, 'x')".format(i)) # actually not important node1.query("ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}) - remove_part_from_disk(node1, 'mt3', 'all_0_0_0') + partition_id = node1.query("select partitionId('x')").strip() + remove_part_from_disk(node1, 'mt3', '{}_0_0_0'.format(partition_id)) # other way to detect broken parts node1.query("CHECK TABLE mt3") @@ -199,13 +201,13 @@ def test_lost_last_part(start_cluster): for i in range(10): result = node1.query("SELECT count() FROM system.replication_queue") assert int(result) <= 1, "Have a lot of entries in queue {}".format(node1.query("SELECT * FROM system.replication_queue FORMAT Vertical")) - if node1.contains_in_log("Cannot create empty part") and node1.contains_in_log("DROP PARTITION"): + if node1.contains_in_log("Cannot create empty part") and node1.contains_in_log("DROP/DETACH PARTITION"): break time.sleep(1) else: assert False, "Don't have required messages in node1 log" - node1.query("ALTER TABLE mt3 DROP PARTITION ID 'all'") + node1.query("ALTER TABLE mt3 DROP PARTITION ID '{}'".format(partition_id)) assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0") assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.reference b/tests/queries/0_stateless/01165_lost_part_empty_partition.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql new file mode 100644 index 000000000000..9279de1a5b39 --- /dev/null +++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql @@ -0,0 +1,37 @@ +-- Tags: zookeeper + +create table rmt1 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '1') order by n partition by toYYYYMMDD(d); +create table rmt2 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '2') order by n partition by toYYYYMMDD(d); + +system stop replicated sends rmt1; +insert into rmt1 values (now(), arrayJoin([1, 2])); -- { clientError 36 } +insert into rmt1(n) select * from system.numbers limit arrayJoin([1, 2]); -- { serverError 36 } +insert into rmt1 values (now(), rand()); +drop table rmt1; + +system sync replica rmt2; +drop table rmt2; + + +create table rmt1 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '1') order by n partition by tuple(); +create table rmt2 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '2') order by n partition by tuple(); + +system stop replicated sends rmt1; +insert into rmt1 values (now(), rand()); +drop table rmt1; + +system sync replica rmt2; +drop table rmt2; + + +create table rmt1 (n UInt8, m Int32, d Date, t DateTime) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '1') order by n partition by (n, m, d, t); +create table rmt2 (n UInt8, m Int32, d Date, t DateTime) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '2') order by n partition by (n, m, d, t); + +system stop replicated sends rmt1; +insert into rmt1 values (rand(), rand(), now(), now()); +insert into rmt1 values (rand(), rand(), now(), now()); +insert into rmt1 values (rand(), rand(), now(), now()); +drop table rmt1; + +system sync replica rmt2; +drop table rmt2; diff --git a/tests/queries/0_stateless/02009_array_join_partition.sql b/tests/queries/0_stateless/02009_array_join_partition.sql index a78efe96f66a..b8eedb5592f4 100644 --- a/tests/queries/0_stateless/02009_array_join_partition.sql +++ b/tests/queries/0_stateless/02009_array_join_partition.sql @@ -1,4 +1,4 @@ CREATE TABLE table_2009_part (`i` Int64, `d` Date, `s` String) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY i; -ALTER TABLE table_2009_part ATTACH PARTITION tuple(arrayJoin([0, 1])); -- {serverError 248} -ALTER TABLE table_2009_part ATTACH PARTITION tuple(toYYYYMM(toDate([arrayJoin([arrayJoin([arrayJoin([arrayJoin([3, materialize(NULL), arrayJoin([1025, materialize(NULL), materialize(NULL)]), NULL])])]), materialize(NULL)])], NULL))); -- {serverError 248} +ALTER TABLE table_2009_part ATTACH PARTITION tuple(arrayJoin([0, 1])); -- {serverError 36} +ALTER TABLE table_2009_part ATTACH PARTITION tuple(toYYYYMM(toDate([arrayJoin([arrayJoin([arrayJoin([arrayJoin([3, materialize(NULL), arrayJoin([1025, materialize(NULL), materialize(NULL)]), NULL])])]), materialize(NULL)])], NULL))); -- {serverError 36} From 1e8d053cbb3c2e7fc2f3483f86fc1668583a5385 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Thu, 2 Dec 2021 07:41:15 +0000 Subject: [PATCH 212/472] add ch column to spark row --- .../Parser/CHColumnToSparkRow.cpp | 207 ++++++++++++++++++ .../local-engine/Parser/CHColumnToSparkRow.h | 39 ++++ utils/local-engine/java/pom.xml | 6 + .../io/kyligence/jni/engine/LocalEngine.java | 5 +- .../kyligence/jni/engine/LocalEngineTest.java | 3 + .../local-engine/tests/gtest_local_engine.cpp | 14 ++ 6 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 utils/local-engine/Parser/CHColumnToSparkRow.cpp create mode 100644 utils/local-engine/Parser/CHColumnToSparkRow.h diff --git a/utils/local-engine/Parser/CHColumnToSparkRow.cpp b/utils/local-engine/Parser/CHColumnToSparkRow.cpp new file mode 100644 index 000000000000..efb6442efe58 --- /dev/null +++ b/utils/local-engine/Parser/CHColumnToSparkRow.cpp @@ -0,0 +1,207 @@ +#include "CHColumnToSparkRow.h" +#include +#include +#include + +#define WRITE_VECTOR_COLUMN(TYPE) \ + const auto *uint8_col = checkAndGetColumn>(*col.column);\ + for (auto i = 0; i < num_rows; i++) {\ + Field value;\ + uint8_col->get(i, value);\ + memcpy(buffer_address + offsets[i] + field_offset, &value.get(), sizeof(uint8_t));\ + } + +namespace local_engine +{ +using namespace DB; +int64_t calculateBitSetWidthInBytes(int32_t num_fields) { + return ((num_fields + 64) / 64) * 8; +} + +int64_t calculatedFixeSizePerRow(DB::Block& header, + int64_t num_cols) { + auto fields = header.getNamesAndTypesList(); + // Calculate the decimal col num when the precision >18 + int32_t count = 0; + for (auto i = 0; i < num_cols; i++) { + auto type = fields.getTypes()[i]; + DB::WhichDataType which(type); + if (which.isDecimal128()) { + const auto & dtype = typeid_cast *>(type.get()); + int32_t precision = dtype->getPrecision(); + if (precision > 18) count++; + } + } + + int64_t fixed_size = calculateBitSetWidthInBytes(num_cols) + num_cols * 8; + int64_t decimal_cols_size = count * 16; + return fixed_size + decimal_cols_size; +} + +int64_t roundNumberOfBytesToNearestWord(int64_t numBytes) { + int64_t remainder = numBytes & 0x07; // This is equivalent to `numBytes % 8` + if (remainder == 0) { + return numBytes; + } else { + return numBytes + (8 - remainder); + } +} + +int64_t getFieldOffset(int64_t nullBitsetWidthInBytes, int32_t index) { + return nullBitsetWidthInBytes + 8L * index; +} + +void bitSet(uint8_t* buffer_address, int32_t index) { + int64_t mask = 1L << (index & 0x3f); // mod 64 and shift + int64_t wordOffset = (index >> 6) * 8; + int64_t word; + memcpy(&word, buffer_address + wordOffset, sizeof(int64_t)); + int64_t value = word | mask; + memcpy(buffer_address + wordOffset, &value, sizeof(int64_t)); +} + +void setNullAt(uint8_t* buffer_address, int64_t row_offset, int64_t field_offset, + int32_t col_index) { + bitSet(buffer_address + row_offset, col_index); + // set the value to 0 + memset(buffer_address + row_offset + field_offset, 0, sizeof(int64_t)); +} + +void writeValue(uint8_t* buffer_address, int64_t field_offset, + ColumnWithTypeAndName& col, int32_t col_index, + int64_t num_rows, std::vector& offsets, + std::vector& buffer_cursor) { + + ColumnPtr nested_col = col.column; + if (const auto * nullable_column = checkAndGetColumn(*col.column)) + { + for (auto i = 0; i < num_rows; i++) { + bool is_null = nullable_column->isNullAt(i); + if (is_null) { + setNullAt(buffer_address, offsets[i], field_offset, col_index); + } + } + nested_col = nullable_column->getNestedColumnPtr(); + } + WhichDataType which(nested_col->getDataType()); + if (which.isUInt8()) + { + WRITE_VECTOR_COLUMN(UInt8) + } + else if (which.isInt8()) + { + WRITE_VECTOR_COLUMN(Int8) + } + else if (which.isInt16()) + { + WRITE_VECTOR_COLUMN(Int16) + } + else if (which.isInt32()) + { + WRITE_VECTOR_COLUMN(Int32) + } + else if (which.isInt64()) + { + WRITE_VECTOR_COLUMN(Int64) + } + else if (which.isFloat32()) + { + WRITE_VECTOR_COLUMN(Float32) + } + else if (which.isFloat64()) + { + WRITE_VECTOR_COLUMN(Float64) + } + else if (which.isDate()) + { + WRITE_VECTOR_COLUMN(UInt16) + } + else + { + throw std::runtime_error("doesn't support type "+ nested_col->getDataType().) + } +} + +SparkRowInfo::SparkRowInfo(DB::Block& block) +{ + num_rows_ = block.rows(); + num_cols_ = block.columns(); + nullBitsetWidthInBytes_ = calculateBitSetWidthInBytes(num_cols_); + int64_t fixed_size_per_row = calculatedFixeSizePerRow(block, num_cols_); + // Initialize the offsets_ , lengths_, buffer_cursor_ + for (auto i = 0; i < num_rows_; i++) { + lengths_.push_back(fixed_size_per_row); + offsets_.push_back(0); + buffer_cursor_.push_back(nullBitsetWidthInBytes_ + 8 * num_cols_); + } + // Calculated the lengths_ + for (auto i = 0; i < num_cols_; i++) { + auto col = block.getByPosition(i); + if (isStringOrFixedString(col.type)) { + size_t length; + for (auto j = 0; j < num_rows_; j++) { + length = col.column->getDataAt(j).size; + lengths_[j] += roundNumberOfBytesToNearestWord(length); + } + } + } +} +int64_t local_engine::SparkRowInfo::getNullBitsetWidthInBytes() const +{ + return nullBitsetWidthInBytes_; +} +void local_engine::SparkRowInfo::setNullBitsetWidthInBytes(int64_t nullBitsetWidthInBytes) +{ + nullBitsetWidthInBytes_ = nullBitsetWidthInBytes; +} +int64_t local_engine::SparkRowInfo::getNumCols() const +{ + return num_cols_; +} +void local_engine::SparkRowInfo::setNumCols(int64_t numCols) +{ + num_cols_ = numCols; +} +int64_t local_engine::SparkRowInfo::getNumRows() const +{ + return num_rows_; +} +void local_engine::SparkRowInfo::setNumRows(int64_t numRows) +{ + num_rows_ = numRows; +} +unsigned char * local_engine::SparkRowInfo::getBufferAddress() const +{ + return buffer_address_; +} +void local_engine::SparkRowInfo::setBufferAddress(unsigned char * bufferAddress) +{ + buffer_address_ = bufferAddress; +} +const std::vector & local_engine::SparkRowInfo::getOffsets() const +{ + return offsets_; +} +const std::vector & local_engine::SparkRowInfo::getLengths() const +{ + return lengths_; +} +void local_engine::CHColumnToSparkRow::convertCHColumnToSparkRow(Block & block) +{ + std::unique_ptr spark_row_info = std::make_unique(block); + // Calculated the offsets_ and total memory size based on lengths_ + int64_t total_memory_size = spark_row_info->lengths_[0]; + for (auto i = 1; i < spark_row_info->num_rows_; i++) { + spark_row_info->offsets_[i] = spark_row_info->offsets_[i - 1] + spark_row_info->lengths_[i - 1]; + total_memory_size += spark_row_info->lengths_[i]; + } + + spark_row_info->buffer_address_ = new uint8_t[total_memory_size]; + for (auto i = 0; i < spark_row_info->num_cols_; i++) { + auto array = block.getByPosition(i); + int64_t field_offset = getFieldOffset(spark_row_info->nullBitsetWidthInBytes_, i); + writeValue(spark_row_info->buffer_address_, field_offset, array, i, spark_row_info->num_rows_, spark_row_info->offsets_, + spark_row_info->buffer_cursor_); + } +} +} diff --git a/utils/local-engine/Parser/CHColumnToSparkRow.h b/utils/local-engine/Parser/CHColumnToSparkRow.h new file mode 100644 index 000000000000..811f8021bb04 --- /dev/null +++ b/utils/local-engine/Parser/CHColumnToSparkRow.h @@ -0,0 +1,39 @@ +#pragma once +#include +#include + +namespace local_engine +{ +class CHColumnToSparkRow +{ +public: + void convertCHColumnToSparkRow(DB::Block & block); +}; + +class SparkRowInfo +{ + friend CHColumnToSparkRow; +public: + SparkRowInfo(DB::Block& block); + int64_t getNullBitsetWidthInBytes() const; + void setNullBitsetWidthInBytes(int64_t nullBitsetWidthInBytes); + int64_t getNumCols() const; + void setNumCols(int64_t numCols); + int64_t getNumRows() const; + void setNumRows(int64_t numRows); + unsigned char * getBufferAddress() const; + void setBufferAddress(unsigned char * bufferAddress); + const std::vector & getOffsets() const; + const std::vector & getLengths() const; + +private: + int64_t nullBitsetWidthInBytes_; + int64_t num_cols_; + int64_t num_rows_; + std::vector buffer_cursor_; + uint8_t * buffer_address_; + std::vector offsets_; + std::vector lengths_; +}; +} + diff --git a/utils/local-engine/java/pom.xml b/utils/local-engine/java/pom.xml index 699092dc6e03..cd957ee6a276 100644 --- a/utils/local-engine/java/pom.xml +++ b/utils/local-engine/java/pom.xml @@ -32,6 +32,12 @@ commons-io 2.11.0 + + org.apache.arrow + arrow-vector + 4.0.1 + provided + junit junit diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java index 8d25288f8794..551593afccba 100644 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java @@ -16,8 +16,8 @@ public static void main(String[] args) throws InterruptedException { System.out.println(result); } - public long nativeExecutor; - public byte[] plan; + private long nativeExecutor; + private byte[] plan; public LocalEngine(byte[] plan) { this.plan = plan; @@ -29,6 +29,7 @@ public LocalEngine(byte[] plan) { public native byte[] next(); + @Override public native void close() throws IOException; } diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index e9d76d03ccad..c65c96ee4a27 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -1,6 +1,9 @@ package io.kyligence.jni.engine; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.SeekableReadChannel; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.commons.io.IOUtils; import org.junit.Assert; import org.junit.Before; diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 1013ad2c8537..f4c344c59035 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include "testConfig.h" #include @@ -40,6 +41,19 @@ TEST(TestSelect, ReadRel) } } +TEST(TestSelect, MergeTreeWriteTest) +{ + DB::StorageID id("default", "test"); + std::string relative_path = TEST_DATA(/data/mergetree); + DB::StorageInMemoryMetadata storage_in_memory_metadata; + auto shared = DB::Context::createShared(); + auto global = DB::Context::createGlobal(shared.get()); + auto merging_params = DB::MergeTreeData::MergingParams(); + auto storage_setting = std::make_unique(); + +// DB::MergeTreeData(id, relative_path, storage_in_memory_metadata, global, "", merging_params, std::move(storage_setting), false, false, nullptr); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc,argv); From 13b6deccfa09f6cea36c26503996ade0b3a836d8 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Thu, 2 Dec 2021 12:44:13 +0300 Subject: [PATCH 213/472] Update 01165_lost_part_empty_partition.sql --- .../01165_lost_part_empty_partition.sql | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql index 9279de1a5b39..d578eb66d618 100644 --- a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql +++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql @@ -24,14 +24,14 @@ system sync replica rmt2; drop table rmt2; -create table rmt1 (n UInt8, m Int32, d Date, t DateTime) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '1') order by n partition by (n, m, d, t); -create table rmt2 (n UInt8, m Int32, d Date, t DateTime) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '2') order by n partition by (n, m, d, t); +--create table rmt1 (n UInt8, m Int32, d Date, t DateTime) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '1') order by n partition by (n, m, d, t); +--create table rmt2 (n UInt8, m Int32, d Date, t DateTime) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '2') order by n partition by (n, m, d, t); -system stop replicated sends rmt1; -insert into rmt1 values (rand(), rand(), now(), now()); -insert into rmt1 values (rand(), rand(), now(), now()); -insert into rmt1 values (rand(), rand(), now(), now()); -drop table rmt1; +--system stop replicated sends rmt1; +--insert into rmt1 values (rand(), rand(), now(), now()); +--insert into rmt1 values (rand(), rand(), now(), now()); +--insert into rmt1 values (rand(), rand(), now(), now()); +--drop table rmt1; -system sync replica rmt2; -drop table rmt2; +--system sync replica rmt2; +--drop table rmt2; From e1c72bd4a886e4ba6a03b0a70700a31d107ff690 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 2 Dec 2021 17:44:20 +0300 Subject: [PATCH 214/472] Auto version update to [21.9.6.24] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 0768f3e98d25..32b4fbd8d199 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) SET(VERSION_PATCH 6) -SET(VERSION_GITHASH f78bedb6c1c54627bc68eab774f1a2413bfc4486) -SET(VERSION_DESCRIBE v21.9.6.1-stable) -SET(VERSION_STRING 21.9.6.1) +SET(VERSION_GITHASH 60406317ed11dd6915a77e0b4b539e7aec950626) +SET(VERSION_DESCRIBE v21.9.6.24-stable) +SET(VERSION_STRING 21.9.6.24) # end of autochange From fff889d5a883baf995d568ad7d3901c057b4ae45 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 2 Dec 2021 17:49:30 +0300 Subject: [PATCH 215/472] Auto version update to [21.9.7.1] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 32b4fbd8d199..73f986b14cf6 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) -SET(VERSION_PATCH 6) +SET(VERSION_PATCH 7) SET(VERSION_GITHASH 60406317ed11dd6915a77e0b4b539e7aec950626) -SET(VERSION_DESCRIBE v21.9.6.24-stable) -SET(VERSION_STRING 21.9.6.24) +SET(VERSION_DESCRIBE v21.9.7.1-stable) +SET(VERSION_STRING 21.9.7.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index ff435d6b66c9..47514f9cb65e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.9.6.1) unstable; urgency=low +clickhouse (21.9.7.1) unstable; urgency=low * Modified source code - -- clickhouse-release Tue, 19 Oct 2021 01:51:55 +0300 + -- clickhouse-release Thu, 02 Dec 2021 17:49:27 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 0d82c882d734..35645f26e631 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.6.* +ARG version=21.9.7.* RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index aa2b9bba8cbd..14f1bb972828 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.6.* +ARG version=21.9.7.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 1f51efee0591..4afe9511bd5e 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.9.6.* +ARG version=21.9.7.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 746024bb136161388310910cfe0cdfec79be6899 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 2 Dec 2021 16:14:16 +0000 Subject: [PATCH 216/472] Backport #32063 to 21.9: Fix 'directory exists' error when detaching part --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 15 ++++++++++++--- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- tests/integration/test_partition/test.py | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index db271cc280b0..9d9bd795a503 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1296,7 +1296,7 @@ void IMergeTreeDataPart::projectionRemove(const String & parent_to, bool keep_sh } } -String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix) const +String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached) const { String res; @@ -1305,11 +1305,20 @@ String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix) const * This is done only in the case of `to_detached`, because it is assumed that in this case the exact name does not matter. * No more than 10 attempts are made so that there are not too many junk directories left. */ + + auto full_relative_path = fs::path(storage.relative_data_path); + if (detached) + full_relative_path /= "detached"; + if (detached && parent_part) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot detach projection"); + else if (parent_part) + full_relative_path /= parent_part->relative_path; + for (int try_no = 0; try_no < 10; try_no++) { res = (prefix.empty() ? "" : prefix + "_") + name + (try_no ? "_try" + DB::toString(try_no) : ""); - if (!volume->getDisk()->exists(fs::path(getFullRelativePath()) / res)) + if (!volume->getDisk()->exists(full_relative_path / res)) return res; LOG_WARNING(storage.log, "Directory {} (to detach to) already exists. Will detach to directory with '_tryN' suffix.", res); @@ -1325,7 +1334,7 @@ String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) assert(prefix.empty() || std::find(DetachedPartInfo::DETACH_REASONS.begin(), DetachedPartInfo::DETACH_REASONS.end(), prefix) != DetachedPartInfo::DETACH_REASONS.end()); - return "detached/" + getRelativePathForPrefix(prefix); + return "detached/" + getRelativePathForPrefix(prefix, /* detached */ true); } void IMergeTreeDataPart::renameToDetached(const String & prefix) const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 8b7a15e5da09..fa14c749c035 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -353,7 +353,7 @@ class IMergeTreeDataPart : public std::enable_shared_from_this Date: Thu, 2 Dec 2021 16:15:46 +0000 Subject: [PATCH 217/472] Backport #31800 to 21.9: Function dictGet with type Nullable fix --- src/Functions/FunctionsExternalDictionaries.h | 7 +----- ...02125_dict_get_type_nullable_fix.reference | 1 + .../02125_dict_get_type_nullable_fix.sql | 22 +++++++++++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02125_dict_get_type_nullable_fix.reference create mode 100644 tests/queries/0_stateless/02125_dict_get_type_nullable_fix.sql diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 4f79b06b44a5..d18e146f651a 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -660,18 +660,13 @@ class FunctionDictGetImpl final : public IFunction { auto return_type = impl.getReturnTypeImpl(arguments); - if (!areTypesEqual(return_type, result_type)) + if (!return_type->equals(*result_type)) throw Exception{"Dictionary attribute has different type " + return_type->getName() + " expected " + result_type->getName(), ErrorCodes::TYPE_MISMATCH}; return impl.executeImpl(arguments, return_type, input_rows_count); } - static bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs) - { - return removeNullable(recursiveRemoveLowCardinality(lhs))->equals(*removeNullable(recursiveRemoveLowCardinality(rhs))); - } - const FunctionDictGetNoType impl; }; diff --git a/tests/queries/0_stateless/02125_dict_get_type_nullable_fix.reference b/tests/queries/0_stateless/02125_dict_get_type_nullable_fix.reference new file mode 100644 index 000000000000..af53c9c3c217 --- /dev/null +++ b/tests/queries/0_stateless/02125_dict_get_type_nullable_fix.reference @@ -0,0 +1 @@ +Value diff --git a/tests/queries/0_stateless/02125_dict_get_type_nullable_fix.sql b/tests/queries/0_stateless/02125_dict_get_type_nullable_fix.sql new file mode 100644 index 000000000000..01fea381bf3b --- /dev/null +++ b/tests/queries/0_stateless/02125_dict_get_type_nullable_fix.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS 02125_test_table; +CREATE TABLE 02125_test_table +( + id UInt64, + value Nullable(String) +) +ENGINE=TinyLog; + +INSERT INTO 02125_test_table VALUES (0, 'Value'); + +DROP DICTIONARY IF EXISTS 02125_test_dictionary; +CREATE DICTIONARY 02125_test_dictionary +( + id UInt64, + value Nullable(String) +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE '02125_test_table')) +LAYOUT(DIRECT()); + +SELECT dictGet('02125_test_dictionary', 'value', toUInt64(0)); +SELECT dictGetString('02125_test_dictionary', 'value', toUInt64(0)); --{serverError 53} From 22192daded88c5b38ec0a7011b90c2dbfff53e5c Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Fri, 3 Dec 2021 14:11:16 +0000 Subject: [PATCH 218/472] add some jni util --- utils/local-engine/local_engine_jni.cpp | 283 +++++------------------- 1 file changed, 54 insertions(+), 229 deletions(-) diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index 0608b34f6c8b..d387b61eb95e 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -1,11 +1,11 @@ -#include "include/io_kyligence_jni_engine_LocalEngine.h" -#include #include +#include #include #include #include #include #include +#include "include/io_kyligence_jni_engine_LocalEngine.h" #include #include @@ -18,289 +18,114 @@ #include #include #include +#include #include #include #include #include #include -#include #include +#include #include #include #include +#include "jni_common.h" using namespace DB; using namespace rapidjson; -/** - * SQL example:++ - * - * SELECT min(x1),max(x2),sum(x3),count(x4),avg(x5) FROM table1 WHERE x6=* GROUP BY x7 - * - * table defination - * SQL columns: - * project - * filter - * aggregate - */ -Block getTableHeader(std::map & cols) -{ - auto internalCols = std::make_shared>(); - internalCols->reserve(cols.size()); - for (const auto & [key, value] : cols) - { - ColumnWithTypeAndName col; - auto & data_type_factory = DataTypeFactory::instance(); - auto type = data_type_factory.get(value); - internalCols->push_back(ColumnWithTypeAndName(type->createColumn(), type, key)); - } - return Block(*internalCols); -} - -std::shared_ptr getSource(ReadBuffer & buf, Block & header) -{ - FormatSettings settings; - return std::make_shared(header, buf, RowInputFormatParams{.max_block_size = 100}, false, settings); -} - - -std::shared_ptr> getColumns(Document & config) -{ - auto columns = std::make_shared>(); - auto cols = config["columns"].GetArray(); - for (auto * it = cols.Begin(); it != cols.End(); it++) - { - auto col = it->GetObject(); - if (columns->contains(col["name"].GetString())) - { - throw std::logic_error("duplicate column"); - } - columns->emplace(col["name"].GetString(), col["type"].GetString()); - } - return columns; -} - void registerAllFunctions() { registerFunctions(); registerAggregateFunctions(); } -FunctionOverloadResolverPtr getFunction(const std::string & name, ContextPtr context) -{ - auto & factory = FunctionFactory::instance(); - return factory.get(name, context); -} +bool inside_main = false; +#ifdef __cplusplus +extern "C" { +#endif -AggregateFunctionPtr getAggregateFunction(const std::string & name, DataTypes arg_types) -{ - auto & factory = AggregateFunctionFactory::instance(); - AggregateFunctionProperties properties; - return factory.get(name, arg_types, Array{}, properties); -} +static jfieldID local_engine_plan_field_id; +static jclass local_engine_class; +static jfieldID local_engine_executor_field_id; -ActionsDAG::NodeRawConstPtrs getArguments(ActionsDAG::NodeRawConstPtrs nodes, std::vector & args) +jint JNI_OnLoad(JavaVM * vm, void * reserved) { - ActionsDAG::NodeRawConstPtrs result; - result.reserve(args.size()); - for (const auto & item : nodes) + JNIEnv * env; + if (vm->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_8) != JNI_OK) { - if (std::find(args.begin(), args.end(), item->result_name) != args.end()) - { - result.emplace_back(item); - } + return JNI_ERR; } - return result; -} + io_exception_class = CreateGlobalClassReference(env, "Ljava/io/IOException;"); + runtime_exception_class = CreateGlobalClassReference(env, "Ljava/lang/RuntimeException;"); + unsupportedoperation_exception_class = CreateGlobalClassReference(env, "Ljava/lang/UnsupportedOperationException;"); + illegal_access_exception_class = CreateGlobalClassReference(env, "Ljava/lang/IllegalAccessException;"); + illegal_argument_exception_class = CreateGlobalClassReference(env, "Ljava/lang/IllegalArgumentException;"); -NamesAndTypesList blockToNameAndTypeList(Block & header) -{ - NamesAndTypesList types; - for (const auto & name : header.getNames()) - { - auto column = header.findByName(name); - types.push_back(NameAndTypePair(column->name, column->type)); - } - return types; + local_engine_class = CreateGlobalClassReference(env, "Lio/kyligence/jni/engine/LocalEngine"); + local_engine_plan_field_id = env->GetFieldID(local_engine_class, "plan", "[B"); + local_engine_executor_field_id = env->GetFieldID(local_engine_class, "nativeExecutor", "J"); } -QueryPlanStepPtr buildFilter(Block & header, ContextPtr context) +void JNI_OnUnload(JavaVM * vm, void * reserved) { - auto actions_dag = std::make_shared(std::move(blockToNameAndTypeList(header))); - // auto int_type = std::make_shared(); - // auto const_node = actions_dag->addInput(ColumnWithTypeAndName(int_type->createColumnConst(1, 4), int_type, "_1")); - // actions_dag->addOrReplaceInIndex(const_node); - std::string empty_string; - std::vector args = {"x1", "x2"}; - const auto & filter_node = actions_dag->addFunction( - std::move(getFunction("less", context)), getArguments(actions_dag->getIndex(), args), std::move(empty_string)); - actions_dag->getIndex().push_back(&filter_node); - DataStream input_stream = DataStream{.header = header}; - auto filter = std::make_unique(input_stream, actions_dag, std::move(filter_node.result_name), true); - return std::move(filter); -} - -void buildAgg(Block & header, QueryPlan & query_plan, ContextPtr context) -{ - auto aggregates = AggregateDescriptions(); - auto count = AggregateDescription(); - count.column_name = "count(x2)"; - count.arguments = ColumnNumbers{1}; - count.argument_names = Names{"x2"}; - auto int_type = std::make_shared(); - count.function = getAggregateFunction("count", {int_type}); - aggregates.push_back(count); - Settings settings; - Aggregator::Params params( - header, - ColumnNumbers{0}, - aggregates, - false, - settings.max_rows_to_group_by, - settings.group_by_overflow_mode, - settings.group_by_two_level_threshold, - settings.group_by_two_level_threshold_bytes, - settings.max_bytes_before_external_group_by, - settings.empty_result_for_aggregation_by_empty_set, - context->getTemporaryVolume(), - settings.max_threads, - settings.min_free_disk_space_for_temporary_data, - settings.compile_aggregate_expressions, - settings.min_count_to_compile_aggregate_expression); - - SortDescription group_by_sort_description; + std::cerr << "JNI_OnUnload" << std::endl; + JNIEnv * env; + vm->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_8); - auto merge_threads = 1; - auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads - ? static_cast(settings.aggregation_memory_efficient_merge_threads) - : static_cast(settings.max_threads); - - - auto aggregating_step = std::make_unique( - query_plan.getCurrentDataStream(), - params, - true, - settings.max_block_size, - merge_threads, - temporary_data_merge_threads, - false, - nullptr, - std::move(group_by_sort_description)); - - query_plan.addStep(std::move(aggregating_step)); + env->DeleteGlobalRef(io_exception_class); + env->DeleteGlobalRef(runtime_exception_class); + env->DeleteGlobalRef(unsupportedoperation_exception_class); + env->DeleteGlobalRef(illegal_access_exception_class); + env->DeleteGlobalRef(illegal_argument_exception_class); + env->DeleteGlobalRef(local_engine_class); } -bool inside_main = false; - -void runSamplePipeline() -{ - auto shared_context = Context::createShared(); - auto global_context = Context::createGlobal(shared_context.get()); - registerAllFunctions(); - auto & factory = FunctionFactory::instance(); - std::ifstream ifs("/home/kyligence/Documents/code/ClickHouse/utils/local-engine/table.json"); - IStreamWrapper isw(ifs); - - Document d; - d.ParseStream(isw); - auto cols = getColumns(d); - auto header = getTableHeader(*cols); - - QueryPlan query_plan; - auto file = "/home/kyligence/Documents/code/ClickHouse/utils/local-engine/table.csv"; - auto buf = std::make_unique(file); - - auto source = getSource(*buf, header); - - std::unique_ptr query_pipelines = std::make_unique(); - auto source_step = std::make_unique(Pipe(source), "CSV"); - query_plan.addStep(std::move(source_step)); - - auto filter = buildFilter(header, global_context); - query_plan.addStep(std::move(filter)); - buildAgg(header, query_plan, global_context); - QueryPlanOptimizationSettings optimization_settings{.optimize_plan = false}; - auto query_pipline = query_plan.buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); - - auto buffer = WriteBufferFromFile("/home/kyligence/Documents/code/ClickHouse/output.txt"); - auto output = std::make_shared(buffer, query_pipline->getHeader(), true, RowOutputFormatParams(), FormatSettings()); - query_pipline->setOutputFormat(output); - auto executor = query_pipline->execute(); - executor->execute(1); -} - -int main(int, char **) -{ - inside_main = true; - runSamplePipeline(); - return 0; -} - -JNIEXPORT jlong JNICALL Java_io_kyligence_jni_engine_LocalEngine_test - (JNIEnv *env, jclass, jint a, jint b) +JNIEXPORT jlong JNICALL Java_io_kyligence_jni_engine_LocalEngine_test(JNIEnv * env, jclass, jint a, jint b) { inside_main = true; - std::cout << "start run pipeline." << std::endl; - try - { - runSamplePipeline(); - } - catch (Poco::Exception e) - { - std::cout << e.message() << std::endl; - std::cout << e.displayText() << std::endl; - e.rethrow(); - } - std::cout << "run pipeline success." << std::endl; - std::cout <GetObjectClass(obj); - jfieldID plan_field_id = env->GetFieldID(this_class, "plan", "[B"); - jobject plan_data = env->GetObjectField(obj, plan_field_id); - jbyteArray *plan = reinterpret_cast(&plan_data); + jobject plan_data = env->GetObjectField(obj, local_engine_plan_field_id); + jbyteArray * plan = reinterpret_cast(&plan_data); jsize plan_size = env->GetArrayLength(*plan); - jbyte *plan_address = env->GetByteArrayElements(*plan, nullptr); + jbyte * plan_address = env->GetByteArrayElements(*plan, nullptr); std::string plan_string; plan_string.assign(reinterpret_cast(plan_address), plan_size); auto query_plan = dbms::SerializedPlanParser::parse(plan_string); - dbms::LocalExecutor* executor = new dbms::LocalExecutor(); + dbms::LocalExecutor * executor = new dbms::LocalExecutor(); executor->execute(std::move(query_plan)); - - jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "J"); - env->SetLongField(obj, executor_field_id, reinterpret_cast(executor)); + env->SetLongField(obj, local_engine_executor_field_id, reinterpret_cast(executor)); } -jboolean Java_io_kyligence_jni_engine_LocalEngine_hasNext(JNIEnv *env, jobject obj) +jboolean Java_io_kyligence_jni_engine_LocalEngine_hasNext(JNIEnv * env, jobject obj) { - jclass this_class = env->GetObjectClass(obj); - jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "J"); - jlong executor_address = env->GetLongField(obj, executor_field_id); - dbms::LocalExecutor* executor = reinterpret_cast(executor_address); + jlong executor_address = env->GetLongField(obj, local_engine_executor_field_id); + dbms::LocalExecutor * executor = reinterpret_cast(executor_address); return executor->hasNext(); } -jbyteArray Java_io_kyligence_jni_engine_LocalEngine_next(JNIEnv *env, jobject obj) +jbyteArray Java_io_kyligence_jni_engine_LocalEngine_next(JNIEnv * env, jobject obj) { - jclass this_class = env->GetObjectClass(obj); - jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "J"); - jlong executor_address = env->GetLongField(obj, executor_field_id); - dbms::LocalExecutor* executor = reinterpret_cast(executor_address); + jlong executor_address = env->GetLongField(obj, local_engine_executor_field_id); + dbms::LocalExecutor * executor = reinterpret_cast(executor_address); std::string arrow_batch = executor->next(); jbyteArray result = env->NewByteArray(arrow_batch.size()); env->SetByteArrayRegion(result, 0, arrow_batch.size(), reinterpret_cast(arrow_batch.data())); return result; } -void Java_io_kyligence_jni_engine_LocalEngine_close(JNIEnv *env, jobject obj) +void Java_io_kyligence_jni_engine_LocalEngine_close(JNIEnv * env, jobject obj) { - jclass this_class = env->GetObjectClass(obj); - jfieldID executor_field_id = env->GetFieldID(this_class, "nativeExecutor", "J"); - jlong executor_address = env->GetLongField(obj, executor_field_id); - dbms::LocalExecutor* executor = reinterpret_cast(executor_address); + jlong executor_address = env->GetLongField(obj, local_engine_executor_field_id); + dbms::LocalExecutor * executor = reinterpret_cast(executor_address); delete executor; } +#ifdef __cplusplus +} +#endif From 55099c49a1bc8c1c75c79cb1ea0349fb59863a52 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Fri, 3 Dec 2021 14:15:59 +0000 Subject: [PATCH 219/472] add jni common --- utils/local-engine/jni_common.h | 41 +++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 utils/local-engine/jni_common.h diff --git a/utils/local-engine/jni_common.h b/utils/local-engine/jni_common.h new file mode 100644 index 000000000000..47eaa1bbbc34 --- /dev/null +++ b/utils/local-engine/jni_common.h @@ -0,0 +1,41 @@ +#pragma once + +static jclass io_exception_class; +static jclass runtime_exception_class; +static jclass unsupportedoperation_exception_class; +static jclass illegal_access_exception_class; +static jclass illegal_argument_exception_class; + +jclass CreateGlobalClassReference(JNIEnv* env, const char* class_name) { + jclass local_class = env->FindClass(class_name); + jclass global_class = (jclass)env->NewGlobalRef(local_class); + env->DeleteLocalRef(local_class); + if (global_class == nullptr) { + std::string error_message = + "Unable to createGlobalClassReference for" + std::string(class_name); + env->ThrowNew(illegal_access_exception_class, error_message.c_str()); + } + return global_class; +} + +jmethodID GetMethodID(JNIEnv* env, jclass this_class, const char* name, const char* sig) { + jmethodID ret = env->GetMethodID(this_class, name, sig); + if (ret == nullptr) { + std::string error_message = "Unable to find method " + std::string(name) + + " within signature" + std::string(sig); + env->ThrowNew(illegal_access_exception_class, error_message.c_str()); + } + + return ret; +} + +jmethodID GetStaticMethodID(JNIEnv* env, jclass this_class, const char* name, + const char* sig) { + jmethodID ret = env->GetStaticMethodID(this_class, name, sig); + if (ret == nullptr) { + std::string error_message = "Unable to find static method " + std::string(name) + + " within signature" + std::string(sig); + env->ThrowNew(illegal_access_exception_class, error_message.c_str()); + } + return ret; +} \ No newline at end of file From 29c1f450f2e2d22570c9efcd9bc0d9e7af3dc212 Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Fri, 3 Dec 2021 22:12:51 +0800 Subject: [PATCH 220/472] add spark to row --- .../Parser/CHColumnToSparkRow.cpp | 15 +++++-- .../local-engine/Parser/CHColumnToSparkRow.h | 19 +++++--- .../Parser/SerializedPlanParser.cpp | 43 +++++++------------ .../Parser/SerializedPlanParser.h | 23 ++++++++-- .../io/kyligence/jni/engine/SparkRowInfo.java | 13 ++++++ 5 files changed, 74 insertions(+), 39 deletions(-) create mode 100644 utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java diff --git a/utils/local-engine/Parser/CHColumnToSparkRow.cpp b/utils/local-engine/Parser/CHColumnToSparkRow.cpp index efb6442efe58..e28ccba6f300 100644 --- a/utils/local-engine/Parser/CHColumnToSparkRow.cpp +++ b/utils/local-engine/Parser/CHColumnToSparkRow.cpp @@ -186,7 +186,11 @@ const std::vector & local_engine::SparkRowInfo::getLengths() const { return lengths_; } -void local_engine::CHColumnToSparkRow::convertCHColumnToSparkRow(Block & block) +int64_t SparkRowInfo::getTotalBytes() const +{ + return total_bytes_; +} +std::unique_ptr local_engine::CHColumnToSparkRow::convertCHColumnToSparkRow(Block & block) { std::unique_ptr spark_row_info = std::make_unique(block); // Calculated the offsets_ and total memory size based on lengths_ @@ -195,13 +199,18 @@ void local_engine::CHColumnToSparkRow::convertCHColumnToSparkRow(Block & block) spark_row_info->offsets_[i] = spark_row_info->offsets_[i - 1] + spark_row_info->lengths_[i - 1]; total_memory_size += spark_row_info->lengths_[i]; } - - spark_row_info->buffer_address_ = new uint8_t[total_memory_size]; + spark_row_info->total_bytes_ = total_memory_size; + spark_row_info->buffer_address_ = reinterpret_cast(alloc(total_memory_size)); for (auto i = 0; i < spark_row_info->num_cols_; i++) { auto array = block.getByPosition(i); int64_t field_offset = getFieldOffset(spark_row_info->nullBitsetWidthInBytes_, i); writeValue(spark_row_info->buffer_address_, field_offset, array, i, spark_row_info->num_rows_, spark_row_info->offsets_, spark_row_info->buffer_cursor_); } + return spark_row_info; +} +void CHColumnToSparkRow::freeMem(uint8_t * address, size_t size) +{ + free(address, size); } } diff --git a/utils/local-engine/Parser/CHColumnToSparkRow.h b/utils/local-engine/Parser/CHColumnToSparkRow.h index 811f8021bb04..5eb4ed1c8727 100644 --- a/utils/local-engine/Parser/CHColumnToSparkRow.h +++ b/utils/local-engine/Parser/CHColumnToSparkRow.h @@ -1,14 +1,12 @@ #pragma once #include #include +#include namespace local_engine { -class CHColumnToSparkRow -{ -public: - void convertCHColumnToSparkRow(DB::Block & block); -}; + +class CHColumnToSparkRow; class SparkRowInfo { @@ -25,8 +23,10 @@ class SparkRowInfo void setBufferAddress(unsigned char * bufferAddress); const std::vector & getOffsets() const; const std::vector & getLengths() const; + int64_t getTotalBytes() const; private: + int64_t total_bytes_; int64_t nullBitsetWidthInBytes_; int64_t num_cols_; int64_t num_rows_; @@ -35,5 +35,14 @@ class SparkRowInfo std::vector offsets_; std::vector lengths_; }; + +using SparkRowInfoPtr = std::unique_ptr; + +class CHColumnToSparkRow : private Allocator +{ +public: + std::unique_ptr convertCHColumnToSparkRow(DB::Block & block); + void freeMem(uint8_t * address, size_t size); +}; } diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index b9207bdf2f44..040e6586bc1c 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -184,46 +184,35 @@ void dbms::LocalExecutor::execute(DB::QueryPlanPtr query_plan) this->query_pipeline = query_plan->buildQueryPipeline(optimization_settings, BuildQueryPipelineSettings()); this->executor = std::make_unique(*query_pipeline); this->header = query_plan->getCurrentDataStream().header; - this->ch_column_to_arrow_column = std::make_unique(header, "Arrow", false); + this->ch_column_to_spark_row = std::make_unique(); } -void dbms::LocalExecutor::writeChunkToArrowString(DB::Chunk &chunk, std::string & arrow_chunk) +std::unique_ptr dbms::LocalExecutor::writeBlockToSparkRow(DB::Block &block) { - std::shared_ptr arrow_table; - ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunk, chunk.getNumColumns()); - DB::WriteBufferFromString buf(arrow_chunk); - auto out_stream = std::make_shared(buf); - arrow::Result> writer_status; - writer_status = arrow::ipc::MakeFileWriter(out_stream.get(), arrow_table->schema()); - if (!writer_status.ok()) - throw std::runtime_error("Error while opening a table writer"); - auto writer = *writer_status; - auto write_status = writer->WriteTable(*arrow_table, 1000000); - if (!write_status.ok()) - { - throw std::runtime_error("Error while writing a table"); - } - auto close_status = writer->Close(); - if (!close_status.ok()) - { - throw std::runtime_error("Error while close a table"); - } + return this->ch_column_to_spark_row->convertCHColumnToSparkRow(block); } bool dbms::LocalExecutor::hasNext() { bool has_next; - if (!this->current_chunk || this->current_chunk->empty()) + if (!this->current_chunk || this->current_chunk->rows() == 0) { - this->current_chunk = std::make_unique(); + this->current_chunk = std::make_unique(this->header); has_next = this->executor->pull(*this->current_chunk); } else { has_next = true; } return has_next; } -std::string dbms::LocalExecutor::next() +local_engine::SparkRowInfoPtr dbms::LocalExecutor::next() { - std::string arrow_chunk; - writeChunkToArrowString(*this->current_chunk, arrow_chunk); + local_engine::SparkRowInfoPtr row_info = writeBlockToSparkRow(*this->current_chunk); this->current_chunk.reset(); - return arrow_chunk; + if (this->spark_buffer) + { + this->ch_column_to_spark_row->freeMem(spark_buffer->address, spark_buffer->size); + this->spark_buffer.reset(); + } + this->spark_buffer = std::make_unique(); + this->spark_buffer->address = row_info->getBufferAddress(); + this->spark_buffer->size = row_info->getTotalBytes(); + return row_info; } diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 9d3525d557d0..2a2645c5d61b 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -12,6 +12,7 @@ #include #include #include +#include "CHColumnToSparkRow.h" namespace DB { @@ -70,21 +71,35 @@ class SerializedPlanParser static void parse(DB::QueryPlan & query_plan, const io::substrait::ProjectRel& rel); }; +struct SparkBuffer +{ + uint8_t * address; + size_t size; +}; class LocalExecutor { public: void execute(QueryPlanPtr query_plan); - std::string next(); + local_engine::SparkRowInfoPtr next(); bool hasNext(); + ~LocalExecutor() + { + if (this->spark_buffer) + { + this->ch_column_to_spark_row->freeMem(spark_buffer->address, spark_buffer->size); + this->spark_buffer.reset(); + } + } private: - void writeChunkToArrowString(Chunk& chunk, std::string & arrow_chunk); + std::unique_ptr writeBlockToSparkRow(DB::Block & block); QueryPipelinePtr query_pipeline; std::unique_ptr executor; Block header; - std::unique_ptr ch_column_to_arrow_column; - std::unique_ptr current_chunk; + std::unique_ptr ch_column_to_spark_row; + std::unique_ptr current_chunk; + std::unique_ptr spark_buffer; }; } diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java new file mode 100644 index 000000000000..387cc34bc6a0 --- /dev/null +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java @@ -0,0 +1,13 @@ +package io.kyligence.jni.engine; + +public class SparkRowInfo { + public long[] offsets; + public long[] lengths; + public long memoryAddress; + + public SparkRowInfo(long[] offsets, long[] lengths, long memoryAddress) { + this.offsets = offsets; + this.lengths = lengths; + this.memoryAddress = memoryAddress; + } +} From bd16ad3e6eff5faa618359f021e6bdb68d1e979f Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Fri, 3 Dec 2021 22:39:15 +0800 Subject: [PATCH 221/472] finish unsafe row code --- .../io/kyligence/jni/engine/LocalEngine.java | 2 +- .../kyligence/jni/engine/LocalEngineTest.java | 4 +-- utils/local-engine/local_engine_jni.cpp | 27 +++++++++++++++---- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java index 551593afccba..57c3dca016d3 100644 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java @@ -27,7 +27,7 @@ public LocalEngine(byte[] plan) { public native boolean hasNext(); - public native byte[] next(); + public native SparkRowInfo next(); @Override diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index c65c96ee4a27..21bb5680df52 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -28,7 +28,7 @@ public void testLocalEngine() throws Exception{ LocalEngine localEngine = new LocalEngine(plan.getBytes(StandardCharsets.UTF_8)); localEngine.execute(); Assert.assertTrue(localEngine.hasNext()); - byte[] data = localEngine.next(); - Assert.assertEquals(7106, data.length); + SparkRowInfo data = localEngine.next(); + Assert.assertEquals(150, data.offsets.length); } } diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index d387b61eb95e..6c94697c82cd 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -50,6 +50,9 @@ static jfieldID local_engine_plan_field_id; static jclass local_engine_class; static jfieldID local_engine_executor_field_id; +static jclass spark_row_info_class; +static jmethodID spark_row_info_constructor; + jint JNI_OnLoad(JavaVM * vm, void * reserved) { JNIEnv * env; @@ -66,6 +69,9 @@ jint JNI_OnLoad(JavaVM * vm, void * reserved) local_engine_class = CreateGlobalClassReference(env, "Lio/kyligence/jni/engine/LocalEngine"); local_engine_plan_field_id = env->GetFieldID(local_engine_class, "plan", "[B"); local_engine_executor_field_id = env->GetFieldID(local_engine_class, "nativeExecutor", "J"); + + spark_row_info_class = CreateGlobalClassReference(env, "Lio/kyligence/jni/engine/SparkRowInfo"); + spark_row_info_constructor = GetMethodID(env, spark_row_info_class, "", "([J[JJ)V"); } void JNI_OnUnload(JavaVM * vm, void * reserved) @@ -111,14 +117,25 @@ jboolean Java_io_kyligence_jni_engine_LocalEngine_hasNext(JNIEnv * env, jobject dbms::LocalExecutor * executor = reinterpret_cast(executor_address); return executor->hasNext(); } -jbyteArray Java_io_kyligence_jni_engine_LocalEngine_next(JNIEnv * env, jobject obj) +jobject Java_io_kyligence_jni_engine_LocalEngine_next(JNIEnv * env, jobject obj) { jlong executor_address = env->GetLongField(obj, local_engine_executor_field_id); dbms::LocalExecutor * executor = reinterpret_cast(executor_address); - std::string arrow_batch = executor->next(); - jbyteArray result = env->NewByteArray(arrow_batch.size()); - env->SetByteArrayRegion(result, 0, arrow_batch.size(), reinterpret_cast(arrow_batch.data())); - return result; + local_engine::SparkRowInfoPtr spark_row_info = executor->next(); + + auto *offsets_arr = env->NewLongArray(spark_row_info->getNumRows()); + const auto *offsets_src = reinterpret_cast(spark_row_info->getOffsets().data()); + env->SetLongArrayRegion(offsets_arr, 0, spark_row_info->getNumRows(), offsets_src); + auto *lengths_arr = env->NewLongArray(spark_row_info->getNumRows()); + const auto *lengths_src = reinterpret_cast(spark_row_info->getLengths().data()); + env->SetLongArrayRegion(lengths_arr, 0, spark_row_info->getNumRows(), lengths_src); + int64_t address = reinterpret_cast(spark_row_info->getBufferAddress()); + + jobject spark_row_info_object = env->NewObject( + spark_row_info_class, spark_row_info_constructor, + offsets_arr, lengths_arr, address); + + return spark_row_info_object; } void Java_io_kyligence_jni_engine_LocalEngine_close(JNIEnv * env, jobject obj) { From cefcc8eaff17f3df885e5833d9bfe658a7048fb3 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Fri, 3 Dec 2021 16:06:27 +0000 Subject: [PATCH 222/472] test complete --- utils/local-engine/CMakeLists.txt | 1 + utils/local-engine/Parser/CHColumnToSparkRow.cpp | 2 +- .../test/java/io/kyligence/jni/engine/LocalEngineTest.java | 6 +++++- utils/local-engine/local_engine_jni.cpp | 7 ++++--- utils/local-engine/tests/gtest_local_engine.cpp | 5 +++-- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index ae4a5e4da186..fccaca4a87db 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -23,6 +23,7 @@ set(JNI_NATIVE_SOURCES set(JAVA_MAIN_CLASSES ${JAVA_MAIN_CLASS_PATH}/io/kyligence/jni/engine/LocalEngine.java + ${JAVA_MAIN_CLASS_PATH}/io/kyligence/jni/engine/SparkRowInfo.java ) # Create the jni header file (from the java class). set(JNI_HEADERS_DIR ${PROJECT_SOURCE_DIR}/utils/local-engine/include) diff --git a/utils/local-engine/Parser/CHColumnToSparkRow.cpp b/utils/local-engine/Parser/CHColumnToSparkRow.cpp index e28ccba6f300..6a3ec9209a5f 100644 --- a/utils/local-engine/Parser/CHColumnToSparkRow.cpp +++ b/utils/local-engine/Parser/CHColumnToSparkRow.cpp @@ -118,7 +118,7 @@ void writeValue(uint8_t* buffer_address, int64_t field_offset, } else { - throw std::runtime_error("doesn't support type "+ nested_col->getDataType().) + throw std::runtime_error("doesn't support type "+ std::string(getTypeName(nested_col->getDataType()))); } } diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index 21bb5680df52..0fe2a457d21d 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -7,6 +7,7 @@ import org.apache.commons.io.IOUtils; import org.junit.Assert; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -14,12 +15,14 @@ import java.nio.charset.StandardCharsets; @RunWith(JUnit4.class) +//@Ignore public class LocalEngineTest { @Before public void setup() { + System.out.println("start load"); System.load("/home/kyligence/Documents/code/ClickHouse/cmake-build-debug/utils/local-engine/liblocal_engine_jnid.so"); - + System.out.println("load success"); } @Test @@ -29,6 +32,7 @@ public void testLocalEngine() throws Exception{ localEngine.execute(); Assert.assertTrue(localEngine.hasNext()); SparkRowInfo data = localEngine.next(); + Assert.assertTrue(data.memoryAddress > 0); Assert.assertEquals(150, data.offsets.length); } } diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index 6c94697c82cd..94979f1bd719 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -66,12 +66,13 @@ jint JNI_OnLoad(JavaVM * vm, void * reserved) illegal_access_exception_class = CreateGlobalClassReference(env, "Ljava/lang/IllegalAccessException;"); illegal_argument_exception_class = CreateGlobalClassReference(env, "Ljava/lang/IllegalArgumentException;"); - local_engine_class = CreateGlobalClassReference(env, "Lio/kyligence/jni/engine/LocalEngine"); + local_engine_class = CreateGlobalClassReference(env, "Lio/kyligence/jni/engine/LocalEngine;"); local_engine_plan_field_id = env->GetFieldID(local_engine_class, "plan", "[B"); local_engine_executor_field_id = env->GetFieldID(local_engine_class, "nativeExecutor", "J"); - spark_row_info_class = CreateGlobalClassReference(env, "Lio/kyligence/jni/engine/SparkRowInfo"); - spark_row_info_constructor = GetMethodID(env, spark_row_info_class, "", "([J[JJ)V"); + spark_row_info_class = CreateGlobalClassReference(env, "Lio/kyligence/jni/engine/SparkRowInfo;"); + spark_row_info_constructor = env->GetMethodID(spark_row_info_class, "", "([J[JJ)V"); + return JNI_VERSION_1_8; } void JNI_OnUnload(JavaVM * vm, void * reserved) diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index f4c344c59035..07e5172c6324 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -5,6 +5,7 @@ #include #include "testConfig.h" #include +#include TEST(TestSelect, ReadRel) { @@ -36,8 +37,8 @@ TEST(TestSelect, ReadRel) while(local_executor.hasNext()) { std::cout << "fetch batch" <getNumRows(), 0); } } From d8771b4c91dbcd8ec4c41983dd3cdcedad0e6c77 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 3 Dec 2021 16:13:41 +0000 Subject: [PATCH 223/472] Backport #32157 to 21.9: Fix active replicas count in quorum inserts --- .../MergeTree/ReplicatedMergeTreeSink.cpp | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index c81f587cbbca..e2bc4f6f49bf 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -76,18 +76,24 @@ void ReplicatedMergeTreeSink::checkQuorumPrecondition(zkutil::ZooKeeperPtr & zoo { quorum_info.status_path = storage.zookeeper_path + "/quorum/status"; + Strings replicas = zookeeper->getChildren(fs::path(storage.zookeeper_path) / "replicas"); + std::vector> replicas_status_futures; + replicas_status_futures.reserve(replicas.size()); + for (const auto & replica : replicas) + if (replica != storage.replica_name) + replicas_status_futures.emplace_back(zookeeper->asyncExists(fs::path(storage.zookeeper_path) / "replicas" / replica / "is_active")); + std::future is_active_future = zookeeper->asyncTryGet(storage.replica_path + "/is_active"); std::future host_future = zookeeper->asyncTryGet(storage.replica_path + "/host"); - /// List of live replicas. All of them register an ephemeral node for leader_election. - - Coordination::Stat leader_election_stat; - zookeeper->get(storage.zookeeper_path + "/leader_election", &leader_election_stat); + size_t active_replicas = 1; /// Assume current replica is active (will check below) + for (auto & status : replicas_status_futures) + if (status.get().error == Coordination::Error::ZOK) + ++active_replicas; - if (leader_election_stat.numChildren < static_cast(quorum)) - throw Exception("Number of alive replicas (" - + toString(leader_election_stat.numChildren) + ") is less than requested quorum (" + toString(quorum) + ").", - ErrorCodes::TOO_FEW_LIVE_REPLICAS); + if (active_replicas < quorum) + throw Exception(ErrorCodes::TOO_FEW_LIVE_REPLICAS, "Number of alive replicas ({}) is less than requested quorum ({}).", + active_replicas, quorum); /** Is there a quorum for the last part for which a quorum is needed? * Write of all the parts with the included quorum is linearly ordered. From 97e854368b59ea9e2a7a838251750bcb08b24172 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 5 Dec 2021 19:01:32 +0000 Subject: [PATCH 224/472] Backport #31988 to 21.9: Fix skipping columns while writing protobuf --- src/Formats/ProtobufSerializer.cpp | 618 ++++++++++++------ src/Formats/ProtobufSerializer.h | 4 +- ..._format_skipped_column_in_nested.reference | 27 + ...rotobuf_format_skipped_column_in_nested.sh | 55 ++ ...obuf_format_skipped_column_in_nested.proto | 29 + 5 files changed, 541 insertions(+), 192 deletions(-) create mode 100644 tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference create mode 100755 tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh create mode 100644 tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index baeefa8f98e4..64c7b68f8333 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -28,6 +28,7 @@ # include # include # include +# include # include # include # include @@ -139,6 +140,15 @@ namespace } + WriteBuffer & writeIndent(WriteBuffer & out, size_t size) { return out << String(size * 4, ' '); } + + + [[noreturn]] void wrongNumberOfColumns(size_t number_of_columns, const String & expected) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of columns: expected {}, specified {}", expected, number_of_columns); + } + + struct ProtobufReaderOrWriter { ProtobufReaderOrWriter(ProtobufReader & reader_) : reader(&reader_) {} // NOLINT(google-explicit-constructor) @@ -152,8 +162,12 @@ namespace class ProtobufSerializerSingleValue : public ProtobufSerializer { protected: - ProtobufSerializerSingleValue(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : field_descriptor(field_descriptor_) + ProtobufSerializerSingleValue( + const std::string_view & column_name_, + const FieldDescriptor & field_descriptor_, + const ProtobufReaderOrWriter & reader_or_writer_) + : column_name(column_name_) + , field_descriptor(field_descriptor_) , field_typeid(field_descriptor_.type()) , field_tag(field_descriptor.number()) , reader(reader_or_writer_.reader) @@ -164,13 +178,15 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; } void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]->getPtr(); } @@ -259,14 +275,28 @@ namespace return result; } + [[noreturn]] void incompatibleColumnType(const std::string_view & column_type) const + { + throw Exception( + ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD, + "The column {} ({}) cannot be serialized to the field {} ({}) due to their types are not compatible", + quoteString(column_name), + column_type, + quoteString(field_descriptor.full_name()), + field_descriptor.type_name()); + } + [[noreturn]] void cannotConvertValue(const std::string_view & src_value, const std::string_view & src_type_name, const std::string_view & dest_type_name) const { throw Exception( - "Could not convert value '" + String{src_value} + "' from type " + String{src_type_name} + " to type " + String{dest_type_name} + - " while " + (reader ? "reading" : "writing") + " field " + field_descriptor.name(), + "Could not convert value '" + String{src_value} + "' from type " + String{src_type_name} + " to type " + + String{dest_type_name} + " while " + (reader ? "reading" : "writing") + " field " + + quoteString(field_descriptor.name()) + " " + (reader ? "for inserting into" : "extracted from") + " column " + + quoteString(column_name), ErrorCodes::PROTOBUF_BAD_CAST); } + const String column_name; const FieldDescriptor & field_descriptor; const FieldTypeId field_typeid; const int field_tag; @@ -289,8 +319,8 @@ namespace public: using ColumnType = ColumnVector; - ProtobufSerializerNumber(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + ProtobufSerializerNumber(const std::string_view & column_name_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) { setFunctions(); } @@ -319,6 +349,13 @@ namespace column_vector.insertValue(getDefaultNumber()); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerNumber<" << TypeName << ">: column " << quoteString(column_name) + << " -> field " << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() + << ")\n"; + } + private: void setFunctions() { @@ -469,7 +506,7 @@ namespace case FieldTypeId::TYPE_ENUM: { if (std::is_floating_point_v) - failedToSetFunctions(); + incompatibleColumnType(TypeName); write_function = [this](NumberType value) { @@ -484,18 +521,10 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType(TypeName); } } - [[noreturn]] void failedToSetFunctions() const - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(TypeName), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - NumberType getDefaultNumber() { if (!default_number) @@ -529,10 +558,11 @@ namespace using ColumnType = std::conditional_t; ProtobufSerializerString( + const std::string_view & column_name_, const std::shared_ptr & fixed_string_data_type_, const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) , fixed_string_data_type(fixed_string_data_type_) , n(fixed_string_data_type->getN()) { @@ -542,8 +572,10 @@ namespace } ProtobufSerializerString( - const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + const std::string_view & column_name_, + const google::protobuf::FieldDescriptor & field_descriptor_, + const ProtobufReaderOrWriter & reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) { static_assert(!is_fixed_string, "This constructor for String only"); setFunctions(); @@ -649,6 +681,13 @@ namespace } } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerString<" << (is_fixed_string ? "fixed" : "") << ">: column " + << quoteString(column_name) << " -> field " << quoteString(field_descriptor.full_name()) << " (" + << field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { @@ -799,18 +838,10 @@ namespace } default: - failedToSetFunctions(); + this->incompatibleColumnType(is_fixed_string ? "FixedString" : "String"); } } - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(is_fixed_string ? "FixedString" : "String"), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - const PaddedPODArray & getDefaultString() { if (!default_string) @@ -890,16 +921,24 @@ namespace using BaseClass = ProtobufSerializerNumber; ProtobufSerializerEnum( + const std::string_view & column_name_, const std::shared_ptr & enum_data_type_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : BaseClass(field_descriptor_, reader_or_writer_), enum_data_type(enum_data_type_) + : BaseClass(column_name_, field_descriptor_, reader_or_writer_), enum_data_type(enum_data_type_) { assert(enum_data_type); setFunctions(); prepareEnumMapping(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerEnum<" << TypeName << ">: column " << quoteString(this->column_name) + << " -> field " << quoteString(this->field_descriptor.full_name()) << " (" + << this->field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { @@ -964,18 +1003,10 @@ namespace } default: - failedToSetFunctions(); + this->incompatibleColumnType(enum_data_type->getName()); } } - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(this->field_descriptor.full_name()) + " has an incompatible type " + this->field_descriptor.type_name() - + " for serialization of the data type " + quoteString(enum_data_type->getName()), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - void checkEnumDataTypeValue(NumberType value) { enum_data_type->findByValue(value); /// Throws an exception if the value isn't defined in the DataTypeEnum. @@ -1089,10 +1120,11 @@ namespace using ColumnType = ColumnDecimal; ProtobufSerializerDecimal( + const std::string_view & column_name_, const DataTypeDecimalBase & decimal_data_type_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) , precision(decimal_data_type_.getPrecision()) , scale(decimal_data_type_.getScale()) { @@ -1123,6 +1155,13 @@ namespace column_decimal.insertValue(getDefaultDecimal()); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerDecimal<" << TypeName << ">: column " << quoteString(column_name) + << " -> field " << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() + << ")\n"; + } + private: void setFunctions() { @@ -1227,7 +1266,7 @@ namespace case FieldTypeId::TYPE_BOOL: { if (std::is_same_v) - failedToSetFunctions(); + incompatibleColumnType(TypeName); else { write_function = [this](const DecimalType & decimal) @@ -1281,18 +1320,10 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType(TypeName); } } - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(TypeName), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - DecimalType getDefaultDecimal() { if (!default_decimal) @@ -1349,13 +1380,20 @@ namespace { public: ProtobufSerializerDate( + const std::string_view & column_name_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerNumber(field_descriptor_, reader_or_writer_) + : ProtobufSerializerNumber(column_name_, field_descriptor_, reader_or_writer_) { setFunctions(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerDate: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { @@ -1395,7 +1433,7 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType("Date"); } } @@ -1412,14 +1450,6 @@ namespace readDateText(date, buf); return date; } - - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type 'Date'", - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } }; @@ -1428,15 +1458,22 @@ namespace { public: ProtobufSerializerDateTime( + const std::string_view & column_name_, const DataTypeDateTime & type, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerNumber(field_descriptor_, reader_or_writer_), + : ProtobufSerializerNumber(column_name_, field_descriptor_, reader_or_writer_), date_lut(type.getTimeZone()) { setFunctions(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerDateTime: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + protected: const DateLUTImpl & date_lut; @@ -1478,7 +1515,7 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType("DateTime"); } } @@ -1497,14 +1534,6 @@ namespace tm = 0; return tm; } - - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type 'DateTime'", - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } }; @@ -1513,9 +1542,10 @@ namespace { public: ProtobufSerializerUUID( + const std::string_view & column_name_, const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) { setFunctions(); } @@ -1544,16 +1574,17 @@ namespace column_vector.insertDefault(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerUUID: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES)) - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type UUID", - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } + incompatibleColumnType("UUID"); write_function = [this](UUID value) { @@ -1591,20 +1622,16 @@ namespace { public: ProtobufSerializerAggregateFunction( + const std::string_view & column_name_, const std::shared_ptr & aggregate_function_data_type_, const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) , aggregate_function_data_type(aggregate_function_data_type_) , aggregate_function(aggregate_function_data_type->getFunction()) { if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES)) - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(aggregate_function_data_type->getName()), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } + incompatibleColumnType(aggregate_function_data_type->getName()); } void writeRow(size_t row_num) override @@ -1642,6 +1669,12 @@ namespace column_af.getData().push_back(data); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerAggregateFunction: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + private: void dataToString(ConstAggregateDataPtr data, String & str) const { @@ -1684,7 +1717,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_nullable = assert_cast(*column); ColumnPtr nested_column = column_nullable.getNestedColumnPtr(); @@ -1693,7 +1727,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1744,6 +1779,12 @@ namespace column_nullable.insertDefault(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerNullable ->\n"; + nested_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr nested_serializer; ColumnPtr column; @@ -1761,7 +1802,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); const auto & column_map = assert_cast(*columns[0]); ColumnPtr nested_column = column_map.getNestedColumnPtr(); nested_serializer->setColumns(&nested_column, 1); @@ -1769,7 +1811,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1778,6 +1821,12 @@ namespace void readRow(size_t row_num) override { nested_serializer->readRow(row_num); } void insertDefaults(size_t row_num) override { nested_serializer->insertDefaults(row_num); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerMap ->\n"; + nested_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr nested_serializer; }; @@ -1794,7 +1843,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_lc = assert_cast(*column); ColumnPtr nested_column = column_lc.getDictionary().getNestedColumn(); @@ -1804,7 +1854,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1862,6 +1913,12 @@ namespace column_lc.insertFromFullColumn(*default_value_column, 0); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerLowCardinality ->\n"; + nested_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr nested_serializer; ColumnPtr column; @@ -1882,7 +1939,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_array = assert_cast(*column); ColumnPtr data_column = column_array.getDataPtr(); @@ -1891,7 +1949,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1944,6 +2003,12 @@ namespace column_array.insertDefault(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerArray ->\n"; + element_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr element_serializer; ColumnPtr column; @@ -1955,10 +2020,12 @@ namespace { public: ProtobufSerializerTupleAsArray( + const std::string_view & column_name_, const std::shared_ptr & tuple_data_type_, const FieldDescriptor & field_descriptor_, std::vector> element_serializers_) - : tuple_data_type(tuple_data_type_) + : column_name(column_name_) + , tuple_data_type(tuple_data_type_) , tuple_size(tuple_data_type->getElements().size()) , field_descriptor(field_descriptor_) , element_serializers(std::move(element_serializers_)) @@ -1969,7 +2036,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_tuple = assert_cast(*column); for (size_t i : collections::range(tuple_size)) @@ -1982,7 +2050,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -2006,9 +2075,12 @@ namespace if (current_element_index >= tuple_size) { throw Exception( - "Too many (" + std::to_string(current_element_index) + ") elements was read from the field " - + field_descriptor.full_name() + " to fit in the data type " + tuple_data_type->getName(), - ErrorCodes::PROTOBUF_BAD_CAST); + ErrorCodes::PROTOBUF_BAD_CAST, + "Column {}: More than {} elements was read from the field {} to fit in the data type {}", + quoteString(column_name), + tuple_size, + quoteString(field_descriptor.full_name()), + tuple_data_type->getName()); } element_serializers[current_element_index]->readRow(row_num); @@ -2040,7 +2112,17 @@ namespace } } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerTupleAsArray: column " << quoteString(column_name) << " (" + << tuple_data_type->getName() << ") -> field " << quoteString(field_descriptor.full_name()) << " (" + << field_descriptor.type_name() << ") ->\n"; + for (const auto & element_serializer : element_serializers) + element_serializer->describeTree(out, indent + 1); + } + private: + const String column_name; const std::shared_ptr tuple_data_type; const size_t tuple_size; const FieldDescriptor & field_descriptor; @@ -2062,7 +2144,7 @@ namespace }; ProtobufSerializerMessage( - std::vector field_descs_, + std::vector && field_descs_, const FieldDescriptor * parent_field_descriptor_, bool with_length_delimiter_, const ProtobufReaderOrWriter & reader_or_writer_) @@ -2085,14 +2167,20 @@ namespace void setColumns(const ColumnPtr * columns_, size_t num_columns_) override { + if (!num_columns_) + wrongNumberOfColumns(num_columns_, ">0"); + columns.assign(columns_, columns_ + num_columns_); std::vector field_columns; for (const FieldInfo & info : field_infos) { field_columns.clear(); + field_columns.reserve(info.column_indices.size()); for (size_t column_index : info.column_indices) { + if (column_index >= num_columns_) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong column index {}, expected column indices <{}", column_index, num_columns_); field_columns.emplace_back(columns_[column_index]); } info.field_serializer->setColumns(field_columns.data(), field_columns.size()); @@ -2103,11 +2191,9 @@ namespace missing_column_indices.resize(num_columns_); for (size_t column_index : collections::range(num_columns_)) missing_column_indices[column_index] = column_index; - for (const FieldInfo & info : field_infos) - { - for (size_t column_index : info.column_indices) + for (const auto & field_info : field_infos) + for (size_t column_index : field_info.column_indices) missing_column_indices[column_index] = static_cast(-1); - } boost::range::remove_erase(missing_column_indices, static_cast(-1)); } } @@ -2195,6 +2281,7 @@ namespace reader->endNestedMessage(); else reader->endMessage(false); + addDefaultsToMissingColumns(row_num); } @@ -2205,6 +2292,32 @@ namespace addDefaultsToMissingColumns(row_num); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + size_t num_columns = 0; + for (const auto & field_info : field_infos) + num_columns += field_info.column_indices.size(); + + writeIndent(out, indent) << "ProtobufSerializerMessage: " << num_columns << " columns ->"; + if (parent_field_descriptor) + out << " field " << quoteString(parent_field_descriptor->full_name()) << " (" << parent_field_descriptor->type_name() << ")"; + + for (size_t i = 0; i != field_infos.size(); ++i) + { + out << "\n"; + const auto & field_info = field_infos[i]; + writeIndent(out, indent + 1) << "Columns #"; + for (size_t j = 0; j != field_info.column_indices.size(); ++j) + { + if (j) + out << ", "; + out << field_info.column_indices[j]; + } + out << " ->\n"; + field_info.field_serializer->describeTree(out, indent + 2); + } + } + private: size_t findFieldIndexByFieldTag(int field_tag) { @@ -2229,9 +2342,9 @@ namespace void addDefaultsToMissingColumns(size_t row_num) { - for (size_t column_idx : missing_column_indices) + for (size_t column_index : missing_column_indices) { - auto & column = columns[column_idx]; + auto & column = columns[column_index]; size_t old_size = column->size(); if (row_num >= old_size) column->assumeMutableRef().insertDefault(); @@ -2241,7 +2354,7 @@ namespace struct FieldInfo { FieldInfo( - std::vector column_indices_, + std::vector && column_indices_, const FieldDescriptor & field_descriptor_, std::unique_ptr field_serializer_) : column_indices(std::move(column_indices_)) @@ -2277,14 +2390,15 @@ namespace class ProtobufSerializerTupleAsNestedMessage : public ProtobufSerializer { public: - explicit ProtobufSerializerTupleAsNestedMessage(std::unique_ptr nested_message_serializer_) - : nested_message_serializer(std::move(nested_message_serializer_)) + explicit ProtobufSerializerTupleAsNestedMessage(std::unique_ptr message_serializer_) + : message_serializer(std::move(message_serializer_)) { } void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); const auto & column_tuple = assert_cast(*columns[0]); size_t tuple_size = column_tuple.tupleSize(); assert(tuple_size); @@ -2292,22 +2406,29 @@ namespace element_columns.reserve(tuple_size); for (size_t i : collections::range(tuple_size)) element_columns.emplace_back(column_tuple.getColumnPtr(i)); - nested_message_serializer->setColumns(element_columns.data(), element_columns.size()); + message_serializer->setColumns(element_columns.data(), element_columns.size()); } void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } - void writeRow(size_t row_num) override { nested_message_serializer->writeRow(row_num); } - void readRow(size_t row_num) override { nested_message_serializer->readRow(row_num); } - void insertDefaults(size_t row_num) override { nested_message_serializer->insertDefaults(row_num); } + void writeRow(size_t row_num) override { message_serializer->writeRow(row_num); } + void readRow(size_t row_num) override { message_serializer->readRow(row_num); } + void insertDefaults(size_t row_num) override { message_serializer->insertDefaults(row_num); } + + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerTupleAsNestedMessage ->\n"; + message_serializer->describeTree(out, indent + 1); + } private: - const std::unique_ptr nested_message_serializer; + const std::unique_ptr message_serializer; }; @@ -2317,14 +2438,23 @@ namespace { public: explicit ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages( - std::unique_ptr nested_message_serializer_) - : nested_message_serializer(std::move(nested_message_serializer_)) + const std::vector & column_names_, + const FieldDescriptor * parent_field_descriptor_, + std::unique_ptr message_serializer_, + const std::function & get_root_desc_function_) + : parent_field_descriptor(parent_field_descriptor_) + , message_serializer(std::move(message_serializer_)) + , get_root_desc_function(get_root_desc_function_) { + column_names.reserve(column_names_.size()); + for (const auto & column_name : column_names_) + column_names.emplace_back(column_name); } void setColumns(const ColumnPtr * columns, size_t num_columns) override { - assert(num_columns); + if (!num_columns) + wrongNumberOfColumns(num_columns, ">0"); data_columns.clear(); data_columns.reserve(num_columns); offset_columns.clear(); @@ -2334,13 +2464,28 @@ namespace { const auto & column_array = assert_cast(*columns[i]); data_columns.emplace_back(column_array.getDataPtr()); - offset_columns.emplace_back(column_array.getOffsetsPtr()); - } - std::sort(offset_columns.begin(), offset_columns.end()); - offset_columns.erase(std::unique(offset_columns.begin(), offset_columns.end()), offset_columns.end()); + auto offset_column = column_array.getOffsetsPtr(); + if (std::binary_search(offset_columns.begin(), offset_columns.end(), offset_column)) + continue; + + /// Keep `offset_columns` sorted. + offset_columns.insert(std::upper_bound(offset_columns.begin(), offset_columns.end(), offset_column), offset_column); - nested_message_serializer->setColumns(data_columns.data(), data_columns.size()); + /// All the columns listed in `offset_columns` should have equal offsets. + if (i >= 1) + { + const auto & column_array0 = assert_cast(*columns[0]); + if (!column_array0.hasEqualOffsets(column_array)) + { + throw Exception(ErrorCodes::PROTOBUF_BAD_CAST, + "Column #{} {} and column #{} {} are supposed to have equal offsets according to the following serialization tree:\n{}", + 0, quoteString(column_names[0]), i, quoteString(column_names[i]), get_root_desc_function(0)); + } + } + } + + message_serializer->setColumns(data_columns.data(), data_columns.size()); } void setColumns(const MutableColumnPtr * columns, size_t num_columns) override @@ -2357,14 +2502,8 @@ namespace const auto & offset_column0 = assert_cast(*offset_columns[0]); size_t start_offset = offset_column0.getElement(row_num - 1); size_t end_offset = offset_column0.getElement(row_num); - for (size_t i : collections::range(1, offset_columns.size())) - { - const auto & offset_column = assert_cast(*offset_columns[i]); - if (offset_column.getElement(row_num) != end_offset) - throw Exception("Components of FlattenedNested have different sizes", ErrorCodes::PROTOBUF_BAD_CAST); - } for (size_t i : collections::range(start_offset, end_offset)) - nested_message_serializer->writeRow(i); + message_serializer->writeRow(i); } void readRow(size_t row_num) override @@ -2377,7 +2516,7 @@ namespace try { - nested_message_serializer->readRow(old_data_size); + message_serializer->readRow(old_data_size); size_t data_size = data_columns[0]->size(); if (data_size != old_data_size + 1) throw Exception("Unexpected number of elements of ColumnArray has been read", ErrorCodes::LOGICAL_ERROR); @@ -2432,8 +2571,26 @@ namespace } } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages: columns "; + for (size_t i = 0; i != column_names.size(); ++i) + { + if (i) + out << ", "; + out << "#" << i << " " << quoteString(column_names[i]); + } + out << " ->"; + if (parent_field_descriptor) + out << " field " << quoteString(parent_field_descriptor->full_name()) << " (" << parent_field_descriptor->type_name() << ") ->\n"; + message_serializer->describeTree(out, indent + 1); + } + private: - const std::unique_ptr nested_message_serializer; + Strings column_names; + const FieldDescriptor * parent_field_descriptor; + const std::unique_ptr message_serializer; + const std::function get_root_desc_function; Columns data_columns; Columns offset_columns; }; @@ -2445,24 +2602,33 @@ namespace public: explicit ProtobufSerializerBuilder(const ProtobufReaderOrWriter & reader_or_writer_) : reader_or_writer(reader_or_writer_) {} - std::unique_ptr buildMessageSerializer( + std::unique_ptr buildMessageSerializer( const Strings & column_names, const DataTypes & data_types, std::vector & missing_column_indices, const MessageDescriptor & message_descriptor, bool with_length_delimiter) { + root_serializer_ptr = std::make_shared(); + get_root_desc_function = [root_serializer_ptr = root_serializer_ptr](size_t indent) -> String + { + WriteBufferFromOwnString buf; + (*root_serializer_ptr)->describeTree(buf, indent); + return buf.str(); + }; + std::vector used_column_indices; - auto serializer = buildMessageSerializerImpl( + auto message_serializer = buildMessageSerializerImpl( /* num_columns = */ column_names.size(), column_names.data(), data_types.data(), - used_column_indices, message_descriptor, with_length_delimiter, - /* parent_field_descriptor = */ nullptr); + /* parent_field_descriptor = */ nullptr, + used_column_indices, + /* columns_are_reordered_outside = */ false); - if (!serializer) + if (!message_serializer) { throw Exception( "Not found matches between the names of the columns {" + boost::algorithm::join(column_names, ", ") @@ -2473,10 +2639,18 @@ namespace missing_column_indices.clear(); missing_column_indices.reserve(column_names.size() - used_column_indices.size()); - boost::range::set_difference(collections::range(column_names.size()), used_column_indices, + auto used_column_indices_sorted = std::move(used_column_indices); + std::sort(used_column_indices_sorted.begin(), used_column_indices_sorted.end()); + boost::range::set_difference(collections::range(column_names.size()), used_column_indices_sorted, std::back_inserter(missing_column_indices)); - return serializer; + *root_serializer_ptr = message_serializer.get(); + +#if 0 + LOG_INFO(&Poco::Logger::get("ProtobufSerializer"), "Serialization tree:\n{}", get_root_desc_function(0)); +#endif + + return message_serializer; } private: @@ -2621,24 +2795,63 @@ namespace } /// Builds a serializer for a protobuf message (root or nested). - template + /// + /// Some of the passed columns might be skipped, the function sets `used_column_indices` to + /// the list of those columns which match any fields in the protobuf message. + /// + /// Normally `columns_are_reordered_outside` should be false - if it's false it means that + /// the used column indices will be passed to ProtobufSerializerMessage, which will write/read + /// only those columns and set the rest of columns by default. + /// Set `columns_are_reordered_outside` to true if you're going to reorder columns + /// according to `used_column_indices` returned and pass to + /// ProtobufSerializerMessage::setColumns() only the columns which are actually used. std::unique_ptr buildMessageSerializerImpl( size_t num_columns, - const StringOrStringViewT * column_names, + const String * column_names, const DataTypePtr * data_types, + const MessageDescriptor & message_descriptor, + bool with_length_delimiter, + const FieldDescriptor * parent_field_descriptor, std::vector & used_column_indices, + bool columns_are_reordered_outside) + { + std::vector column_names_sv; + column_names_sv.reserve(num_columns); + for (size_t i = 0; i != num_columns; ++i) + column_names_sv.emplace_back(column_names[i]); + + return buildMessageSerializerImpl( + num_columns, + column_names_sv.data(), + data_types, + message_descriptor, + with_length_delimiter, + parent_field_descriptor, + used_column_indices, + columns_are_reordered_outside); + } + + std::unique_ptr buildMessageSerializerImpl( + size_t num_columns, + const std::string_view * column_names, + const DataTypePtr * data_types, const MessageDescriptor & message_descriptor, bool with_length_delimiter, - const FieldDescriptor * parent_field_descriptor) + const FieldDescriptor * parent_field_descriptor, + std::vector & used_column_indices, + bool columns_are_reordered_outside) { std::vector field_descs; boost::container::flat_map field_descriptors_in_use; used_column_indices.clear(); used_column_indices.reserve(num_columns); + boost::container::flat_set used_column_indices_sorted; + used_column_indices_sorted.reserve(num_columns); + size_t sequential_column_index = 0; auto add_field_serializer = [&](const std::string_view & column_name_, - std::vector column_indices_, + std::vector && column_indices_, const FieldDescriptor & field_descriptor_, std::unique_ptr field_serializer_) { @@ -2652,12 +2865,17 @@ namespace ErrorCodes::MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD); } - for (size_t column_index : column_indices_) + used_column_indices.insert(used_column_indices.end(), column_indices_.begin(), column_indices_.end()); + used_column_indices_sorted.insert(column_indices_.begin(), column_indices_.end()); + + auto column_indices_to_pass_to_message_serializer = std::move(column_indices_); + if (columns_are_reordered_outside) { - /// Keep `used_column_indices` sorted. - used_column_indices.insert(boost::range::upper_bound(used_column_indices, column_index), column_index); + for (auto & index : column_indices_to_pass_to_message_serializer) + index = sequential_column_index++; } - field_descs.push_back({std::move(column_indices_), &field_descriptor_, std::move(field_serializer_)}); + + field_descs.push_back({std::move(column_indices_to_pass_to_message_serializer), &field_descriptor_, std::move(field_serializer_)}); field_descriptors_in_use.emplace(&field_descriptor_, column_name_); }; @@ -2666,7 +2884,7 @@ namespace /// We're going through all the passed columns. for (size_t column_idx : collections::range(num_columns)) { - if (boost::range::binary_search(used_column_indices, column_idx)) + if (used_column_indices_sorted.count(column_idx)) continue; const auto & column_name = column_names[column_idx]; @@ -2702,7 +2920,7 @@ namespace for (size_t j : collections::range(column_idx + 1, num_columns)) { - if (boost::range::binary_search(used_column_indices, j)) + if (used_column_indices_sorted.count(j)) continue; std::string_view other_suffix; if (!columnNameStartsWithFieldName(column_names[j], *field_descriptor, other_suffix)) @@ -2740,10 +2958,15 @@ namespace nested_column_names.size(), nested_column_names.data(), nested_data_types.data(), - used_column_indices_in_nested, *field_descriptor->message_type(), - false, - field_descriptor); + /* with_length_delimiter = */ false, + field_descriptor, + used_column_indices_in_nested, + /* columns_are_reordered_outside = */ true); + + /// `columns_are_reordered_outside` is true because column indices are + /// going to be transformed and then written to the outer message, + /// see add_field_serializer() below. if (nested_message_serializer) { @@ -2774,14 +2997,23 @@ namespace nested_column_names.size(), nested_column_names.data(), nested_data_types.data(), - used_column_indices_in_nested, *field_descriptor->message_type(), - false, - field_descriptor); + /* with_length_delimiter = */ false, + field_descriptor, + used_column_indices_in_nested, + /* columns_are_reordered_outside = */ true); + + /// `columns_are_reordered_outside` is true because column indices are + /// going to be transformed and then written to the outer message, + /// see add_field_serializer() below. if (nested_message_serializer) { - auto field_serializer = std::make_unique(std::move(nested_message_serializer)); + std::vector column_names_used; + for (size_t i : used_column_indices_in_nested) + column_names_used.emplace_back(nested_column_names[i]); + auto field_serializer = std::make_unique( + std::move(column_names_used), field_descriptor, std::move(nested_message_serializer), get_root_desc_function); transformColumnIndices(used_column_indices_in_nested, nested_column_indices); add_field_serializer(column_name, std::move(used_column_indices_in_nested), *field_descriptor, std::move(field_serializer)); break; @@ -2823,34 +3055,34 @@ namespace auto data_type_id = data_type->getTypeId(); switch (data_type_id) { - case TypeIndex::UInt8: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt16: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt32: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt64: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt128: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt256: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int8: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int16: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int32: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int64: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int128: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int256: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Float32: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Float64: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Date: return std::make_unique(field_descriptor, reader_or_writer); - case TypeIndex::DateTime: return std::make_unique(assert_cast(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::DateTime64: return std::make_unique(assert_cast(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::String: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::FixedString: return std::make_unique>(typeid_cast>(data_type), field_descriptor, reader_or_writer); - case TypeIndex::Enum8: return std::make_unique>(typeid_cast>(data_type), field_descriptor, reader_or_writer); - case TypeIndex::Enum16: return std::make_unique>(typeid_cast>(data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal32: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal64: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal128: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal256: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::UUID: return std::make_unique(field_descriptor, reader_or_writer); - case TypeIndex::Interval: return std::make_unique(field_descriptor, reader_or_writer); - case TypeIndex::AggregateFunction: return std::make_unique(typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::UInt8: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt16: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt32: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt64: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt128: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt256: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int8: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int16: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int32: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int64: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int128: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int256: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Float32: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Float64: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Date: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::DateTime: return std::make_unique(column_name, assert_cast(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::DateTime64: return std::make_unique(column_name, assert_cast(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::String: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::FixedString: return std::make_unique>(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::Enum8: return std::make_unique>(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::Enum16: return std::make_unique>(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal32: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal64: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal128: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal256: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::UUID: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Interval: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::AggregateFunction: return std::make_unique(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); case TypeIndex::Nullable: { @@ -2907,16 +3139,17 @@ namespace { /// Try to serialize as a nested message. std::vector used_column_indices; - auto nested_message_serializer = buildMessageSerializerImpl( + auto message_serializer = buildMessageSerializerImpl( size_of_tuple, tuple_data_type.getElementNames().data(), tuple_data_type.getElements().data(), - used_column_indices, *field_descriptor.message_type(), - false, - &field_descriptor); + /* with_length_delimiter = */ false, + &field_descriptor, + used_column_indices, + /* columns_are_reordered_outside = */ false); - if (!nested_message_serializer) + if (!message_serializer) { throw Exception( "Not found matches between the names of the tuple's elements {" @@ -2926,7 +3159,7 @@ namespace ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS); } - return std::make_unique(std::move(nested_message_serializer)); + return std::make_unique(std::move(message_serializer)); } /// Serialize as a repeated field. @@ -2947,6 +3180,7 @@ namespace return nullptr; return std::make_unique( + column_name, typeid_cast>(data_type), field_descriptor, std::move(nested_serializers)); @@ -2973,6 +3207,8 @@ namespace } const ProtobufReaderOrWriter reader_or_writer; + std::function get_root_desc_function; + std::shared_ptr root_serializer_ptr; }; } diff --git a/src/Formats/ProtobufSerializer.h b/src/Formats/ProtobufSerializer.h index 86a2f2f36ddf..3e413a418a11 100644 --- a/src/Formats/ProtobufSerializer.h +++ b/src/Formats/ProtobufSerializer.h @@ -17,7 +17,7 @@ class ProtobufWriter; class IDataType; using DataTypePtr = std::shared_ptr; using DataTypes = std::vector; - +class WriteBuffer; /// Utility class, does all the work for serialization in the Protobuf format. class ProtobufSerializer @@ -32,6 +32,8 @@ class ProtobufSerializer virtual void readRow(size_t row_num) = 0; virtual void insertDefaults(size_t row_num) = 0; + virtual void describeTree(WriteBuffer & out, size_t indent) const = 0; + static std::unique_ptr create( const Strings & column_names, const DataTypes & data_types, diff --git a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference new file mode 100644 index 000000000000..1a80e6401dbc --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference @@ -0,0 +1,27 @@ +e4048ead-30a2-45e5-90be-2af1c7137523 dummy [1] [50639] [58114] [[5393]] [[1]] [[3411]] [[17811]] [[(10,20)]] + +Binary representation: +00000000 44 0a 24 65 34 30 34 38 65 61 64 2d 33 30 61 32 |D.$e4048ead-30a2| +00000010 2d 34 35 65 35 2d 39 30 62 65 2d 32 61 66 31 63 |-45e5-90be-2af1c| +00000020 37 31 33 37 35 32 33 62 1c 10 01 18 cf 8b 03 20 |7137523b....... | +00000030 82 c6 03 5a 10 28 01 30 91 2a 40 93 8b 01 52 05 |...Z.(.0.*@...R.| +00000040 4d 00 00 a0 41 |M...A| +00000045 + +MESSAGE #1 AT 0x00000001 +identifier: "e4048ead-30a2-45e5-90be-2af1c7137523" +modules { + module_id: 1 + supply: 50639 + temp: 58114 + nodes { + node_id: 1 + opening_time: 5393 + current: 17811 + coords { + y: 20 + } + } +} + +Binary representation is as expected diff --git a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh new file mode 100755 index 000000000000..b413385fb77b --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +# https://github.com/ClickHouse/ClickHouse/issues/31160 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +SCHEMADIR=$CURDIR/format_schemas +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eo pipefail + +# Run the client. +$CLICKHOUSE_CLIENT --multiquery < "$BINARY_FILE_PATH" + +# Check the output in the protobuf format +echo +$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage" --input "$BINARY_FILE_PATH" + +# Check the input in the protobuf format (now the table contains the same data twice). +#echo +#$CLICKHOUSE_CLIENT --query "INSERT INTO table_skipped_column_in_nested_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage'" < "$BINARY_FILE_PATH" +#$CLICKHOUSE_CLIENT --query "SELECT * FROM table_skipped_column_in_nested_00825" + +rm "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "DROP TABLE table_skipped_column_in_nested_00825" diff --git a/tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto new file mode 100644 index 000000000000..054de349e245 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto @@ -0,0 +1,29 @@ +syntax = "proto3"; + +message UpdateMessage { + string identifier = 1; + //string unused1 = 100; + + message Module { + uint32 module_id = 2; + uint32 supply = 3; + uint32 temp = 4; + + message ModuleNode { + uint32 node_id = 5; + uint32 opening_time = 6; + uint32 closing_time = 7; // The column in the table is named `closing_time_time` + uint32 current = 8; + + message Coords { + //float x = 8; + float y = 9; + } + Coords coords = 10; + } + + repeated ModuleNode nodes = 11; + } + + repeated Module modules = 12; +} From b16987c00c8af6ec098c79e2b41fe4588cf93000 Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Mon, 6 Dec 2021 13:34:50 +0800 Subject: [PATCH 225/472] add unsafe row test --- .../Parser/CHColumnToSparkRow.cpp | 190 ++++++++++++------ utils/local-engine/java/pom.xml | 14 ++ .../kyligence/jni/engine/LocalEngineTest.java | 2 + 3 files changed, 139 insertions(+), 67 deletions(-) diff --git a/utils/local-engine/Parser/CHColumnToSparkRow.cpp b/utils/local-engine/Parser/CHColumnToSparkRow.cpp index 6a3ec9209a5f..0ddf39b64380 100644 --- a/utils/local-engine/Parser/CHColumnToSparkRow.cpp +++ b/utils/local-engine/Parser/CHColumnToSparkRow.cpp @@ -1,35 +1,50 @@ #include "CHColumnToSparkRow.h" +#include +#include +#include #include #include -#include -#define WRITE_VECTOR_COLUMN(TYPE) \ - const auto *uint8_col = checkAndGetColumn>(*col.column);\ - for (auto i = 0; i < num_rows; i++) {\ - Field value;\ - uint8_col->get(i, value);\ - memcpy(buffer_address + offsets[i] + field_offset, &value.get(), sizeof(uint8_t));\ - } +#define WRITE_VECTOR_COLUMN(TYPE, PRIME_TYPE) \ + const auto * type_col = checkAndGetColumn>(*col.column); \ + for (auto i = 0; i < num_rows; i++) \ + { \ + bool is_null = nullable_column && nullable_column->isNullAt(i); \ + if (is_null) \ + { \ + setNullAt(buffer_address, offsets[i], field_offset, col_index); \ + } \ + else \ + { \ + Field value; \ + type_col->get(i, value); \ + memcpy(buffer_address + offsets[i] + field_offset, &value.get(), sizeof(PRIME_TYPE)); \ + } \ + } namespace local_engine { using namespace DB; -int64_t calculateBitSetWidthInBytes(int32_t num_fields) { - return ((num_fields + 64) / 64) * 8; +int64_t calculateBitSetWidthInBytes(int32_t num_fields) +{ + return ((num_fields + 63) / 64) * 8; } -int64_t calculatedFixeSizePerRow(DB::Block& header, - int64_t num_cols) { +int64_t calculatedFixeSizePerRow(DB::Block & header, int64_t num_cols) +{ auto fields = header.getNamesAndTypesList(); // Calculate the decimal col num when the precision >18 int32_t count = 0; - for (auto i = 0; i < num_cols; i++) { + for (auto i = 0; i < num_cols; i++) + { auto type = fields.getTypes()[i]; DB::WhichDataType which(type); - if (which.isDecimal128()) { + if (which.isDecimal128()) + { const auto & dtype = typeid_cast *>(type.get()); int32_t precision = dtype->getPrecision(); - if (precision > 18) count++; + if (precision > 18) + count++; } } @@ -38,118 +53,151 @@ int64_t calculatedFixeSizePerRow(DB::Block& header, return fixed_size + decimal_cols_size; } -int64_t roundNumberOfBytesToNearestWord(int64_t numBytes) { - int64_t remainder = numBytes & 0x07; // This is equivalent to `numBytes % 8` - if (remainder == 0) { +int64_t roundNumberOfBytesToNearestWord(int64_t numBytes) +{ + int64_t remainder = numBytes & 0x07; // This is equivalent to `numBytes % 8` + if (remainder == 0) + { return numBytes; - } else { + } + else + { return numBytes + (8 - remainder); } } -int64_t getFieldOffset(int64_t nullBitsetWidthInBytes, int32_t index) { +int64_t getFieldOffset(int64_t nullBitsetWidthInBytes, int32_t index) +{ return nullBitsetWidthInBytes + 8L * index; } -void bitSet(uint8_t* buffer_address, int32_t index) { - int64_t mask = 1L << (index & 0x3f); // mod 64 and shift - int64_t wordOffset = (index >> 6) * 8; +void bitSet(uint8_t * buffer_address, int32_t index) +{ + int64_t mask = 1L << (index & 0x3f); // mod 64 and shift + int64_t word_offset = (index >> 6) * 8; int64_t word; - memcpy(&word, buffer_address + wordOffset, sizeof(int64_t)); + memcpy(&word, buffer_address + word_offset, sizeof(int64_t)); int64_t value = word | mask; - memcpy(buffer_address + wordOffset, &value, sizeof(int64_t)); + memcpy(buffer_address + word_offset, &value, sizeof(int64_t)); } -void setNullAt(uint8_t* buffer_address, int64_t row_offset, int64_t field_offset, - int32_t col_index) { +void setNullAt(uint8_t * buffer_address, int64_t row_offset, int64_t field_offset, int32_t col_index) +{ bitSet(buffer_address + row_offset, col_index); // set the value to 0 memset(buffer_address + row_offset + field_offset, 0, sizeof(int64_t)); } -void writeValue(uint8_t* buffer_address, int64_t field_offset, - ColumnWithTypeAndName& col, int32_t col_index, - int64_t num_rows, std::vector& offsets, - std::vector& buffer_cursor) { - +void writeValue( + uint8_t * buffer_address, + int64_t field_offset, + ColumnWithTypeAndName & col, + int32_t col_index, + int64_t num_rows, + std::vector & offsets, + std::vector & buffer_cursor) +{ ColumnPtr nested_col = col.column; - if (const auto * nullable_column = checkAndGetColumn(*col.column)) + const auto * nullable_column = checkAndGetColumn(*col.column); + if (nullable_column) { - for (auto i = 0; i < num_rows; i++) { - bool is_null = nullable_column->isNullAt(i); - if (is_null) { - setNullAt(buffer_address, offsets[i], field_offset, col_index); - } - } nested_col = nullable_column->getNestedColumnPtr(); } WhichDataType which(nested_col->getDataType()); if (which.isUInt8()) { - WRITE_VECTOR_COLUMN(UInt8) + WRITE_VECTOR_COLUMN(UInt8, uint8_t) } - else if (which.isInt8()) + else if (which.isInt8()) { - WRITE_VECTOR_COLUMN(Int8) + WRITE_VECTOR_COLUMN(Int8, int8_t) } - else if (which.isInt16()) + else if (which.isInt16()) { - WRITE_VECTOR_COLUMN(Int16) + WRITE_VECTOR_COLUMN(Int16, int16_t) } - else if (which.isInt32()) + else if (which.isInt32()) { - WRITE_VECTOR_COLUMN(Int32) - } - else if (which.isInt64()) + WRITE_VECTOR_COLUMN(Int32, int32_t) + } + else if (which.isInt64()) { - WRITE_VECTOR_COLUMN(Int64) - } - else if (which.isFloat32()) + WRITE_VECTOR_COLUMN(Int64, int64_t) + } + else if (which.isFloat32()) { - WRITE_VECTOR_COLUMN(Float32) - } - else if (which.isFloat64()) + WRITE_VECTOR_COLUMN(Float32, float_t) + } + else if (which.isFloat64()) { - WRITE_VECTOR_COLUMN(Float64) - } + WRITE_VECTOR_COLUMN(Float64, double_t) + } else if (which.isDate()) { - WRITE_VECTOR_COLUMN(UInt16) + WRITE_VECTOR_COLUMN(UInt16, uint16_t) + } + else if (which.isString()) + { + const auto * string_col = checkAndGetColumn(*col.column); + for (auto i = 0; i < num_rows; i++) + { + bool is_null = nullable_column && nullable_column->isNullAt(i); + if (is_null) + { + setNullAt(buffer_address, offsets[i], field_offset, col_index); + } + else + { + StringRef string_value = string_col->getDataAt(i); + // write the variable value + memcpy(buffer_address + offsets[i] + buffer_cursor[i], string_value.data, string_value.size); + // write the offset and size + int64_t offset_and_size = (buffer_cursor[i] << 32) | string_value.size; + memcpy(buffer_address + offsets[i] + field_offset, &offset_and_size, sizeof(int64_t)); + buffer_cursor[i] += string_value.size; + } + } } else { - throw std::runtime_error("doesn't support type "+ std::string(getTypeName(nested_col->getDataType()))); + throw std::runtime_error("doesn't support type " + std::string(getTypeName(nested_col->getDataType()))); } } -SparkRowInfo::SparkRowInfo(DB::Block& block) +SparkRowInfo::SparkRowInfo(DB::Block & block) { num_rows_ = block.rows(); num_cols_ = block.columns(); nullBitsetWidthInBytes_ = calculateBitSetWidthInBytes(num_cols_); int64_t fixed_size_per_row = calculatedFixeSizePerRow(block, num_cols_); // Initialize the offsets_ , lengths_, buffer_cursor_ - for (auto i = 0; i < num_rows_; i++) { + for (auto i = 0; i < num_rows_; i++) + { lengths_.push_back(fixed_size_per_row); offsets_.push_back(0); buffer_cursor_.push_back(nullBitsetWidthInBytes_ + 8 * num_cols_); } // Calculated the lengths_ - for (auto i = 0; i < num_cols_; i++) { + for (auto i = 0; i < num_cols_; i++) + { auto col = block.getByPosition(i); - if (isStringOrFixedString(col.type)) { + if (isStringOrFixedString(col.type)) + { size_t length; - for (auto j = 0; j < num_rows_; j++) { + for (auto j = 0; j < num_rows_; j++) + { length = col.column->getDataAt(j).size; lengths_[j] += roundNumberOfBytesToNearestWord(length); } } } } + int64_t local_engine::SparkRowInfo::getNullBitsetWidthInBytes() const { return nullBitsetWidthInBytes_; } + void local_engine::SparkRowInfo::setNullBitsetWidthInBytes(int64_t nullBitsetWidthInBytes) { nullBitsetWidthInBytes_ = nullBitsetWidthInBytes; @@ -195,17 +243,25 @@ std::unique_ptr local_engine::CHColumnToSparkRow::convertCHColumnT std::unique_ptr spark_row_info = std::make_unique(block); // Calculated the offsets_ and total memory size based on lengths_ int64_t total_memory_size = spark_row_info->lengths_[0]; - for (auto i = 1; i < spark_row_info->num_rows_; i++) { + for (auto i = 1; i < spark_row_info->num_rows_; i++) + { spark_row_info->offsets_[i] = spark_row_info->offsets_[i - 1] + spark_row_info->lengths_[i - 1]; total_memory_size += spark_row_info->lengths_[i]; } spark_row_info->total_bytes_ = total_memory_size; spark_row_info->buffer_address_ = reinterpret_cast(alloc(total_memory_size)); - for (auto i = 0; i < spark_row_info->num_cols_; i++) { + for (auto i = 0; i < spark_row_info->num_cols_; i++) + { auto array = block.getByPosition(i); int64_t field_offset = getFieldOffset(spark_row_info->nullBitsetWidthInBytes_, i); - writeValue(spark_row_info->buffer_address_, field_offset, array, i, spark_row_info->num_rows_, spark_row_info->offsets_, - spark_row_info->buffer_cursor_); + writeValue( + spark_row_info->buffer_address_, + field_offset, + array, + i, + spark_row_info->num_rows_, + spark_row_info->offsets_, + spark_row_info->buffer_cursor_); } return spark_row_info; } diff --git a/utils/local-engine/java/pom.xml b/utils/local-engine/java/pom.xml index cd957ee6a276..fc6e6ca08fca 100644 --- a/utils/local-engine/java/pom.xml +++ b/utils/local-engine/java/pom.xml @@ -24,6 +24,8 @@ 2.6.5 0.9.2-kylin-r3 20.0 + 2.12 + 3.1.1 @@ -44,6 +46,18 @@ 4.13.2 test + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + + + org.apache.arrow + * + + + test + diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index 0fe2a457d21d..6d6b1ace20f9 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -5,6 +5,7 @@ import org.apache.arrow.vector.ipc.SeekableReadChannel; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.commons.io.IOUtils; +import org.apache.spark.sql.catalyst.expressions.UnsafeRow; import org.junit.Assert; import org.junit.Before; import org.junit.Ignore; @@ -34,5 +35,6 @@ public void testLocalEngine() throws Exception{ SparkRowInfo data = localEngine.next(); Assert.assertTrue(data.memoryAddress > 0); Assert.assertEquals(150, data.offsets.length); + UnsafeRow row = new UnsafeRow(5); } } From 7a110ab04076e5ba28172000d7152137c933df94 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Mon, 6 Dec 2021 07:52:02 +0000 Subject: [PATCH 226/472] add number test --- .../src/test/java/io/kyligence/jni/engine/LocalEngineTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index 6d6b1ace20f9..52cd875bae79 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -36,5 +36,8 @@ public void testLocalEngine() throws Exception{ Assert.assertTrue(data.memoryAddress > 0); Assert.assertEquals(150, data.offsets.length); UnsafeRow row = new UnsafeRow(5); + row.pointTo(null, data.memoryAddress + data.offsets[5], (int) data.lengths[5]); + Assert.assertEquals(5.4, row.getDouble(2), 0.00001); + Assert.assertEquals(0, row.getInt(4)); } } From 0f8d6525d985609d11d52b860600e393a07b4b4c Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Mon, 6 Dec 2021 17:32:15 +0800 Subject: [PATCH 227/472] update test data --- .../kyligence/jni/engine/LocalEngineTest.java | 4 ---- utils/local-engine/tests/data/iris.parquet | Bin 5012 -> 5646 bytes 2 files changed, 4 deletions(-) diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index 52cd875bae79..6938574f1fe6 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -1,14 +1,10 @@ package io.kyligence.jni.engine; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.ipc.SeekableReadChannel; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.commons.io.IOUtils; import org.apache.spark.sql.catalyst.expressions.UnsafeRow; import org.junit.Assert; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; diff --git a/utils/local-engine/tests/data/iris.parquet b/utils/local-engine/tests/data/iris.parquet index 70da577573cd888f9efee3a953b18b402fef3ccc..20979952d618cb03acb0c32d20ed645f312f7f3d 100644 GIT binary patch delta 1200 zcmbu8&rcIk5XYB_tx9VMu+26$q0~?~G`QPRq$FzI7IwQ$DX>U?Y+|tcqh-tPQd~+| zqVfkA^{^g|XAhW2IC?ViWa81ACLAG9DlZ;@iQSuP+|ak-j8zdQ6c786{&$a^FYx!gPOhiWm=gsVVB% zq20TIp_|`hQ8F0G^b-CE)wkuewN-oTo^CZsUwGdae%}***OT};oOn5WHsWm{9J-$& zD8H?&X=a@ax?ck2VKO-C`-?V86r*rhh>S!d#v!=i^W94Pck97u_=tkc#HaJn86t>C zlJd9fmRh%|&;G;JOHY073MONyJhxt}ifck^ zqbXZuTh22og4fao1Kw{dIt1r!UFHp4F*!%`8g;b_=O|`2jX9QNC|0R0rzZ6U9q9AU zWVhG?dSe;(u-G-(6xS3co06)9z1?~x+d<8Q)?5cBI)X)`MR{Ztp6f_FxBL5C0CNB= zph#{5SrwVtCbBR9C0c~yL8-vf$`U#Y$N=nOFNb1zF{AM1hLRSYdPT}uC;@7V0i_dw za3%o10S2siKmg_eXacr-+zZ{}5No>!1EYI9o0lX>Uyym?%Ey6~R2amj#6+SRn$mhuo% zJO#k5;ka;tCV;g_9#0L-0duohl^h0Q*Skk~?5N@O9O&? VdvGjjz(4dqKjLACAQpkh`~nj*Xp#T` delta 722 zcmYjOU2oE06z(wF)GctcO-Q&Pn9Li3rHHyEnw*xew{9SqP-vDcr5&YEy2Siw&c zc6i_ae7CiG^evJ|CN6wutG^oG`?8fh`uYH4$A7jF9B^DB6W_Vf97A*Cf45hQy^j(9 zeKhw{$nse}y~w1}A$d8udh1%(PWm*J=CeW}n@caa_&zk7b>%cH7=O3_m@IFW{D09G z5z+JWv4_zj4<>*JX!VA)GGg|fW&4e1vQcfODrLT#XI9-3pRv*OrsK$c-5KIHSbEh) zEKiIXbD1OyYFnOb%E_1wu1z*z<8$eW#~hwGucH)(op!Bfj9N2YuqR!MJ!KN}&IT-Z zHYi!W29<{jIlWR|j@3$ej<`&L=j2{_=tQ1im>YPW<{<+5#Lyt?Q3wE($Qf1JQ^QLP zzyk@Ox=tlp45-Q+m8epwSb`;msbDBT0Kmmp^wf5<-z}fyHKoaGAZqIdB#<`+Fcqty z1q}oU3vb+aRi)q;rO?8)O?!r54=HhSn}iF%o>3pyr9+X_ z@$cPMYMciH$rNAjicpYOt=YI Date: Mon, 6 Dec 2021 10:04:55 +0000 Subject: [PATCH 228/472] add string test --- .../test/java/io/kyligence/jni/engine/LocalEngineTest.java | 3 ++- utils/local-engine/tests/gtest_local_engine.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index 6938574f1fe6..b1434a4c0d3f 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -31,9 +31,10 @@ public void testLocalEngine() throws Exception{ SparkRowInfo data = localEngine.next(); Assert.assertTrue(data.memoryAddress > 0); Assert.assertEquals(150, data.offsets.length); - UnsafeRow row = new UnsafeRow(5); + UnsafeRow row = new UnsafeRow(6); row.pointTo(null, data.memoryAddress + data.offsets[5], (int) data.lengths[5]); Assert.assertEquals(5.4, row.getDouble(2), 0.00001); Assert.assertEquals(0, row.getInt(4)); + Assert.assertEquals("类型0", row.getUTF8String(5).toString()); } } diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 07e5172c6324..09e5bc4fc55c 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -15,7 +15,7 @@ TEST(TestSelect, ReadRel) .column("sepal_width", "FP64") .column("petal_length", "FP64") .column("petal_width", "FP64") - .column("type", "I64") + .column("type", "I64").column("type_string", "String") .build(); dbms::SerializedPlanBuilder plan_builder; auto plan = plan_builder.files( TEST_DATA(/data/iris.parquet), std::move(schema)).build(); @@ -59,4 +59,4 @@ int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc,argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} From b0c7ae2bd3c0fee1aa1f9a031d716e5fc0b2d217 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 6 Dec 2021 13:07:35 +0000 Subject: [PATCH 229/472] Backport #32117 to 21.9: Dictionaries custom query condition fix --- src/Dictionaries/ExternalQueryBuilder.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index 9ddaaeb573a1..f513c7b2f615 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static constexpr std::string_view CONDITION_PLACEHOLDER_TO_REPLACE_VALUE = "{condition}"; ExternalQueryBuilder::ExternalQueryBuilder( const DictionaryStructure & dict_struct_, @@ -215,7 +216,7 @@ std::string ExternalQueryBuilder::composeUpdateQuery(const std::string & update_ { writeString(query, out); - auto condition_position = query.find("{condition}"); + auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE); if (condition_position == std::string::npos) { writeString(" WHERE ", out); @@ -230,7 +231,7 @@ std::string ExternalQueryBuilder::composeUpdateQuery(const std::string & update_ const auto & condition_value = condition_value_buffer.str(); auto query_copy = query; - query_copy.replace(condition_position, condition_value.size(), condition_value); + query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), condition_value); return query_copy; } @@ -300,7 +301,7 @@ std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector { writeString(query, out); - auto condition_position = query.find("{condition}"); + auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE); if (condition_position == std::string::npos) { writeString(" WHERE ", out); @@ -315,7 +316,7 @@ std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector const auto & condition_value = condition_value_buffer.str(); auto query_copy = query; - query_copy.replace(condition_position, condition_value.size(), condition_value); + query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), condition_value); return query_copy; } @@ -391,7 +392,7 @@ std::string ExternalQueryBuilder::composeLoadKeysQuery( { writeString(query, out); - auto condition_position = query.find("{condition}"); + auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE); if (condition_position == std::string::npos) { writeString(" WHERE ", out); @@ -406,7 +407,7 @@ std::string ExternalQueryBuilder::composeLoadKeysQuery( const auto & condition_value = condition_value_buffer.str(); auto query_copy = query; - query_copy.replace(condition_position, condition_value.size(), condition_value); + query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), condition_value); return query_copy; } From f6d81afc6bee1cd5c35508826ef39108dbe1bdf2 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Tue, 7 Dec 2021 06:26:26 +0000 Subject: [PATCH 230/472] add performance test --- .../local-engine/tests/gtest_local_engine.cpp | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 09e5bc4fc55c..cd0f16a75536 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -42,6 +42,50 @@ TEST(TestSelect, ReadRel) } } +TEST(TestSelect, PerformanceTest) +{ + dbms::SerializedSchemaBuilder schema_builder; + auto schema = schema_builder + .column("l_orderkey", "I64") + .column("l_partkey", "I64") + .column("l_suppkey", "I64") + .column("l_linenumber", "I32") + .column("l_quantity", "FP64") + .column("l_extendedprice", "FP64") + .column("l_discount", "FP64") + .column("l_tax", "FP64") + .column("l_returnflag", "String") + .column("l_linestatus", "String") + .column("l_shipdate_new", "FP64") + .column("l_commitdate_new", "FP64") + .column("l_receiptdate_new", "FP64") + .column("l_shipinstruct", "String") + .column("l_shipmode", "String") + .column("l_comment", "String") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto plan = plan_builder.files("/home/kyligence/Documents/intel-gazelle-test.snappy.parquet", std::move(schema)).build(); + + ASSERT_TRUE(plan->relations(0).has_read()); + ASSERT_EQ(plan->relations_size(), 1); + auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + std::cout << "start execute" <getNumRows(), 0); + std::cout << "fetch batch" << spark_row_info->getNumRows() << " rows" << "" << "" < Date: Tue, 7 Dec 2021 09:15:42 +0000 Subject: [PATCH 231/472] optimize int transform --- .../Parser/CHColumnToSparkRow.cpp | 23 +++++++++---------- .../io/kyligence/jni/engine/LocalEngine.java | 2 +- .../kyligence/jni/engine/LocalEngineTest.java | 2 +- .../local-engine/tests/gtest_local_engine.cpp | 12 +++++----- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/utils/local-engine/Parser/CHColumnToSparkRow.cpp b/utils/local-engine/Parser/CHColumnToSparkRow.cpp index 0ddf39b64380..ba59a5e48024 100644 --- a/utils/local-engine/Parser/CHColumnToSparkRow.cpp +++ b/utils/local-engine/Parser/CHColumnToSparkRow.cpp @@ -5,7 +5,7 @@ #include #include -#define WRITE_VECTOR_COLUMN(TYPE, PRIME_TYPE) \ +#define WRITE_VECTOR_COLUMN(TYPE, PRIME_TYPE, GETTER) \ const auto * type_col = checkAndGetColumn>(*col.column); \ for (auto i = 0; i < num_rows; i++) \ { \ @@ -16,9 +16,8 @@ } \ else \ { \ - Field value; \ - type_col->get(i, value); \ - memcpy(buffer_address + offsets[i] + field_offset, &value.get(), sizeof(PRIME_TYPE)); \ + auto * pointer = reinterpret_cast(buffer_address + offsets[i] + field_offset); \ + pointer[0] = type_col->GETTER(i);\ } \ } @@ -106,35 +105,35 @@ void writeValue( WhichDataType which(nested_col->getDataType()); if (which.isUInt8()) { - WRITE_VECTOR_COLUMN(UInt8, uint8_t) + WRITE_VECTOR_COLUMN(UInt8, uint8_t, get64) } else if (which.isInt8()) { - WRITE_VECTOR_COLUMN(Int8, int8_t) + WRITE_VECTOR_COLUMN(Int8, int8_t, get64) } else if (which.isInt16()) { - WRITE_VECTOR_COLUMN(Int16, int16_t) + WRITE_VECTOR_COLUMN(Int16, int16_t, get64) } else if (which.isInt32()) { - WRITE_VECTOR_COLUMN(Int32, int32_t) + WRITE_VECTOR_COLUMN(Int32, int32_t, get64) } else if (which.isInt64()) { - WRITE_VECTOR_COLUMN(Int64, int64_t) + WRITE_VECTOR_COLUMN(Int64, int64_t, get64) } else if (which.isFloat32()) { - WRITE_VECTOR_COLUMN(Float32, float_t) + WRITE_VECTOR_COLUMN(Float32, float_t, getFloat32) } else if (which.isFloat64()) { - WRITE_VECTOR_COLUMN(Float64, double_t) + WRITE_VECTOR_COLUMN(Float64, double_t, getFloat64) } else if (which.isDate()) { - WRITE_VECTOR_COLUMN(UInt16, uint16_t) + WRITE_VECTOR_COLUMN(UInt16, uint16_t, get64) } else if (which.isString()) { diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java index 57c3dca016d3..6e4f74915da2 100644 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/LocalEngine.java @@ -10,7 +10,7 @@ public class LocalEngine implements Closeable { public static void main(String[] args) throws InterruptedException { System.out.println("start load library"); - System.load("/home/kyligence/Documents/code/ClickHouse/cmake-build-debug/utils/local-engine/liblocal_engine_jnid.so"); + System.load("/home/kyligence/Documents/code/ClickHouse/cmake-build-debug/utils/local-engine/liblocal_engine_jni.so"); System.out.println("start in java"); long result = test(1, 2); System.out.println(result); diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index b1434a4c0d3f..a926f99b8626 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -18,7 +18,7 @@ public class LocalEngineTest { @Before public void setup() { System.out.println("start load"); - System.load("/home/kyligence/Documents/code/ClickHouse/cmake-build-debug/utils/local-engine/liblocal_engine_jnid.so"); + System.load("/home/kyligence/Documents/code/ClickHouse/cmake-build-release/utils/local-engine/liblocal_engine_jni.so"); System.out.println("load success"); } diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index cd0f16a75536..09418afebd14 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -54,17 +54,17 @@ TEST(TestSelect, PerformanceTest) .column("l_extendedprice", "FP64") .column("l_discount", "FP64") .column("l_tax", "FP64") - .column("l_returnflag", "String") - .column("l_linestatus", "String") +// .column("l_returnflag", "String") +// .column("l_linestatus", "String") .column("l_shipdate_new", "FP64") .column("l_commitdate_new", "FP64") .column("l_receiptdate_new", "FP64") - .column("l_shipinstruct", "String") - .column("l_shipmode", "String") - .column("l_comment", "String") +// .column("l_shipinstruct", "String") +// .column("l_shipmode", "String") +// .column("l_comment", "String") .build(); dbms::SerializedPlanBuilder plan_builder; - auto plan = plan_builder.files("/home/kyligence/Documents/intel-gazelle-test.snappy.parquet", std::move(schema)).build(); + auto plan = plan_builder.files("/home/kyligence/Documents/intel_without_string.parquet", std::move(schema)).build(); ASSERT_TRUE(plan->relations(0).has_read()); ASSERT_EQ(plan->relations_size(), 1); From 914b852cc9557b89359d81603c44dffc59b7c37e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 7 Dec 2021 13:10:41 +0000 Subject: [PATCH 232/472] Backport #32289 to 21.9: fix bug when remove unneeded columns in subquery --- src/Interpreters/TreeRewriter.cpp | 6 +++++- .../0_stateless/02131_remove_columns_in_subquery.reference | 1 + .../0_stateless/02131_remove_columns_in_subquery.sql | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02131_remove_columns_in_subquery.reference create mode 100644 tests/queries/0_stateless/02131_remove_columns_in_subquery.sql diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index fe8440eb3a47..8386859cdcda 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -401,9 +401,13 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, ASTFunction * func = elem->as(); /// Never remove untuple. It's result column may be in required columns. - /// It is not easy to analyze untuple here, because types were not calculated yes. + /// It is not easy to analyze untuple here, because types were not calculated yet. if (func && func->name == "untuple") new_elements.push_back(elem); + + /// removing aggregation can change number of rows, so `count()` result in outer sub-query would be wrong + if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name) && !select_query->groupBy()) + new_elements.push_back(elem); } } diff --git a/tests/queries/0_stateless/02131_remove_columns_in_subquery.reference b/tests/queries/0_stateless/02131_remove_columns_in_subquery.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/02131_remove_columns_in_subquery.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02131_remove_columns_in_subquery.sql b/tests/queries/0_stateless/02131_remove_columns_in_subquery.sql new file mode 100644 index 000000000000..f9ca2269aad9 --- /dev/null +++ b/tests/queries/0_stateless/02131_remove_columns_in_subquery.sql @@ -0,0 +1 @@ +select count(1) from (SELECT 1 AS a, count(1) FROM numbers(5)) From 5cc47e808199a131fdbc4ee6ccaca7978cbcc39e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 7 Dec 2021 16:08:05 +0000 Subject: [PATCH 233/472] Backport #32303 to 21.9: Fix division by zero in avgWeighted with Decimal argument --- src/AggregateFunctions/AggregateFunctionAvg.h | 39 +++++-------------- .../AggregateFunctionAvgWeighted.cpp | 6 +-- .../01668_avg_weighted_ubsan.reference | 13 +++++++ .../0_stateless/01668_avg_weighted_ubsan.sql | 4 ++ 4 files changed, 30 insertions(+), 32 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index ad5c67d88d45..900fe429b495 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -20,6 +20,7 @@ namespace DB { + struct Settings; template using DecimalOrVectorCol = std::conditional_t, ColumnDecimal, ColumnVector>; @@ -44,39 +45,19 @@ struct AvgFraction /// Invoked only is either Numerator or Denominator are Decimal. Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale [[maybe_unused]]) const { - if constexpr (IsDecimalNumber && IsDecimalNumber) - { - // According to the docs, num(S1) / denom(S2) would have scale S1 - - if constexpr (std::is_same_v && std::is_same_v) - ///Special case as Decimal256 / Decimal128 = compile error (as Decimal128 is not parametrized by a wide - ///int), but an __int128 instead - return DecimalUtils::convertTo( - numerator / (denominator.template convertTo()), num_scale); - else - return DecimalUtils::convertTo(numerator / denominator, num_scale); - } - - /// Numerator is always casted to Float64 to divide correctly if the denominator is not Float64. - Float64 num_converted; - - if constexpr (IsDecimalNumber) - num_converted = DecimalUtils::convertTo(numerator, num_scale); + Float64 numerator_float; + if constexpr (is_decimal) + numerator_float = DecimalUtils::convertTo(numerator, num_scale); else - num_converted = static_cast(numerator); /// all other types, including extended integral. - - std::conditional_t, - Float64, Denominator> denom_converted; + numerator_float = numerator; - if constexpr (IsDecimalNumber) - denom_converted = DecimalUtils::convertTo(denominator, denom_scale); - else if constexpr (DecimalOrExtendedInt) - /// no way to divide Float64 and extended integral type without an explicit cast. - denom_converted = static_cast(denominator); + Float64 denominator_float; + if constexpr (is_decimal) + denominator_float = DecimalUtils::convertTo(denominator, denom_scale); else - denom_converted = denominator; /// can divide on float, no cast required. + denominator_float = denominator; - return num_converted / denom_converted; + return numerator_float / denominator_float; } Float64 NO_SANITIZE_UNDEFINED divide() const diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp index b7fdb3460e34..ab6fdc8fd7e8 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp @@ -82,17 +82,17 @@ createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & a const bool left_decimal = isDecimal(data_type); const bool right_decimal = isDecimal(data_type_weight); + /// We multiply value by weight, so actual scale of numerator is + if (left_decimal && right_decimal) ptr.reset(create(*data_type, *data_type_weight, argument_types, - getDecimalScale(*data_type), getDecimalScale(*data_type_weight))); + getDecimalScale(*data_type) + getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight))); else if (left_decimal) ptr.reset(create(*data_type, *data_type_weight, argument_types, getDecimalScale(*data_type))); else if (right_decimal) ptr.reset(create(*data_type, *data_type_weight, argument_types, - // numerator is not decimal, so its scale is 0 - 0, getDecimalScale(*data_type_weight))); + getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight))); else ptr.reset(create(*data_type, *data_type_weight, argument_types)); diff --git a/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference b/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference index ec064f61ba77..a8921b27cffc 100644 --- a/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference +++ b/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference @@ -1 +1,14 @@ -0 +nan +nan +1 +2 +3 +4 +5 +6 +7 +8 +9 +nan +nan diff --git a/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql b/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql index 24e7dc0cb90d..1c31c23eaee3 100644 --- a/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql +++ b/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql @@ -1 +1,5 @@ SELECT round(avgWeighted(x, y)) FROM (SELECT 1023 AS x, 1000000000 AS y UNION ALL SELECT 10 AS x, -9223372036854775808 AS y); +select avgWeighted(number, toDecimal128(number, 9)) from numbers(0); +SELECT avgWeighted(a, toDecimal64(c, 9)) OVER (PARTITION BY c) FROM (SELECT number AS a, number AS c FROM numbers(10)); +select avg(toDecimal128(number, 9)) from numbers(0); +select avgWeighted(number, toDecimal128(0, 9)) from numbers(10); From 97326d29a4bf9a4d779e74805aa3d4ad6e283e28 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Wed, 8 Dec 2021 02:14:43 +0000 Subject: [PATCH 234/472] add performance test --- .../local-engine/tests/gtest_local_engine.cpp | 76 ++++++++++--------- 1 file changed, 41 insertions(+), 35 deletions(-) diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 09418afebd14..07134e27dffb 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -29,14 +29,14 @@ TEST(TestSelect, ReadRel) ASSERT_TRUE(plan->relations(0).has_read()); ASSERT_EQ(plan->relations_size(), 1); - auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); std::cout << "start execute" <getNumRows(), 0); } @@ -44,43 +44,49 @@ TEST(TestSelect, ReadRel) TEST(TestSelect, PerformanceTest) { - dbms::SerializedSchemaBuilder schema_builder; - auto schema = schema_builder - .column("l_orderkey", "I64") - .column("l_partkey", "I64") - .column("l_suppkey", "I64") - .column("l_linenumber", "I32") - .column("l_quantity", "FP64") - .column("l_extendedprice", "FP64") - .column("l_discount", "FP64") - .column("l_tax", "FP64") -// .column("l_returnflag", "String") -// .column("l_linestatus", "String") - .column("l_shipdate_new", "FP64") - .column("l_commitdate_new", "FP64") - .column("l_receiptdate_new", "FP64") -// .column("l_shipinstruct", "String") -// .column("l_shipmode", "String") -// .column("l_comment", "String") - .build(); - dbms::SerializedPlanBuilder plan_builder; - auto plan = plan_builder.files("/home/kyligence/Documents/intel_without_string.parquet", std::move(schema)).build(); - ASSERT_TRUE(plan->relations(0).has_read()); - ASSERT_EQ(plan->relations_size(), 1); - auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); - std::cout << "start execute" <getNumRows(), 0); - std::cout << "fetch batch" << spark_row_info->getNumRows() << " rows" << "" << "" <relations(0).has_read()); + ASSERT_EQ(plan->relations_size(), 1); + auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + std::cout << "start execute" << std::endl; + dbms::LocalExecutor local_executor; + + local_executor.execute(std::move(query_plan)); + ASSERT_TRUE(local_executor.hasNext()); + while (local_executor.hasNext()) + { + local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); + ASSERT_GT(spark_row_info->getNumRows(), 0); + std::cout << "fetch batch" << spark_row_info->getNumRows() << " rows" + << "" + << "" << std::endl; + } } auto duration = stopwatch.elapsedMilliseconds(); std::cout <<"duration:" << duration << std::endl; From bd8743df5484bae22951c5229b0cf9681c18f53d Mon Sep 17 00:00:00 2001 From: tavplubix Date: Wed, 8 Dec 2021 13:45:06 +0300 Subject: [PATCH 235/472] Update AggregateFunctionAvg.h --- src/AggregateFunctions/AggregateFunctionAvg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index 900fe429b495..b44df5f38a60 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -46,13 +46,13 @@ struct AvgFraction Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale [[maybe_unused]]) const { Float64 numerator_float; - if constexpr (is_decimal) + if constexpr (IsDecimalNumber) numerator_float = DecimalUtils::convertTo(numerator, num_scale); else numerator_float = numerator; Float64 denominator_float; - if constexpr (is_decimal) + if constexpr (IsDecimalNumber) denominator_float = DecimalUtils::convertTo(denominator, denom_scale); else denominator_float = denominator; From 1afc6d5af55043b569c6c4edf1b262286dd97679 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 8 Dec 2021 13:08:55 +0000 Subject: [PATCH 236/472] Backport #32270 to 21.9: Fix possible Pipeline stuck in case of StrictResize processor. --- src/Processors/ResizeProcessor.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index d652a342150f..f5ee1cb487cb 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -403,12 +403,22 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in /// Close all other waiting for data outputs (there is no corresponding input for them). while (!waiting_outputs.empty()) { - auto & output = output_ports[waiting_outputs.front()]; - waiting_outputs.pop(); + auto & output = output_ports[waiting_outputs.front()]; + waiting_outputs.pop(); + + if (output.status != OutputStatus::Finished) + ++num_finished_outputs; - output.status = OutputStatus::Finished; - output.port->finish(); - ++num_finished_outputs; + output.status = OutputStatus::Finished; + output.port->finish(); + } + + if (num_finished_outputs == outputs.size()) + { + for (auto & input : inputs) + input.close(); + + return Status::Finished; } if (disabled_input_ports.empty()) @@ -418,4 +428,3 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in } } - From 882033c1a6e50d596a67f76b2553ac56bf554fb3 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Thu, 9 Dec 2021 03:06:54 +0000 Subject: [PATCH 237/472] fix memset problem --- utils/local-engine/Parser/CHColumnToSparkRow.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/local-engine/Parser/CHColumnToSparkRow.cpp b/utils/local-engine/Parser/CHColumnToSparkRow.cpp index ba59a5e48024..970ed45807c0 100644 --- a/utils/local-engine/Parser/CHColumnToSparkRow.cpp +++ b/utils/local-engine/Parser/CHColumnToSparkRow.cpp @@ -249,6 +249,7 @@ std::unique_ptr local_engine::CHColumnToSparkRow::convertCHColumnT } spark_row_info->total_bytes_ = total_memory_size; spark_row_info->buffer_address_ = reinterpret_cast(alloc(total_memory_size)); + memset(spark_row_info->buffer_address_, 0, sizeof(int8_t) * spark_row_info->total_bytes_); for (auto i = 0; i < spark_row_info->num_cols_; i++) { auto array = block.getByPosition(i); From d91ba318d230d731a5b7c8522b9d5f052c126c52 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 10 Dec 2021 22:07:49 +0000 Subject: [PATCH 238/472] Backport #32201 to 21.9: Try fix 'Directory tmp_merge_' already exists --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 80 +++++++++++-------- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 4 +- 3 files changed, 49 insertions(+), 36 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 9d9bd795a503..0eb4f7248513 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -417,49 +417,60 @@ void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns) void IMergeTreeDataPart::removeIfNeeded() { - if (state == State::DeleteOnDestroy || is_temp) - { - try - { - auto path = getFullRelativePath(); + if (!is_temp && state != State::DeleteOnDestroy) + return; - if (!volume->getDisk()->exists(path)) - return; + try + { + auto path = getFullRelativePath(); - if (is_temp) - { - String file_name = fileName(relative_path); + if (!volume->getDisk()->exists(path)) + return; - if (file_name.empty()) - throw Exception("relative_path " + relative_path + " of part " + name + " is invalid or not set", ErrorCodes::LOGICAL_ERROR); + if (is_temp) + { + String file_name = fileName(relative_path); - if (!startsWith(file_name, "tmp")) - { - LOG_ERROR(storage.log, "~DataPart() should remove part {} but its name doesn't start with tmp. Too suspicious, keeping the part.", path); - return; - } - } + if (file_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", relative_path, name); - if (parent_part) + if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj")) { - std::optional keep_shared_data = keepSharedDataInDecoupledStorage(); - if (!keep_shared_data.has_value()) - return; - projectionRemove(parent_part->getFullRelativePath(), *keep_shared_data); + LOG_ERROR( + storage.log, + "~DataPart() should remove part {} but its name doesn't start with \"tmp\" or end with \".tmp_proj\". Too " + "suspicious, keeping the part.", + path); + return; } - else - remove(); + } - if (state == State::DeleteOnDestroy) - { - LOG_TRACE(storage.log, "Removed part from old location {}", path); - } + if (parent_part) + { + std::optional keep_shared_data = keepSharedDataInDecoupledStorage(); + if (!keep_shared_data.has_value()) + return; + projectionRemove(parent_part->getFullRelativePath(), *keep_shared_data); } - catch (...) + else + remove(); + + if (state == State::DeleteOnDestroy) { - tryLogCurrentException(__PRETTY_FUNCTION__); + LOG_TRACE(storage.log, "Removed part from old location {}", path); } } + catch (...) + { + /// FIXME If part it temporary, then directory will not be removed for 1 day (temporary_directories_lifetime). + /// If it's tmp_merge_ or tmp_fetch_, + /// then all future attempts to execute part producing operation will fail with "directory already exists". + /// Seems like it's especially important for remote disks, because removal may fail due to network issues. + tryLogCurrentException(__PRETTY_FUNCTION__); + assert(!is_temp); + assert(state != State::DeleteOnDestroy); + assert(state != State::Temporary); + } } @@ -1162,14 +1173,17 @@ void IMergeTreeDataPart::remove() const * And a race condition can happen that will lead to "File not found" error here. */ + /// NOTE We rename part to delete_tmp_ instead of delete_tmp_ to avoid race condition + /// when we try to remove two parts with the same name, but different relative paths, + /// for example all_1_2_1 (in Deleting state) and tmp_merge_all_1_2_1 (in Temporary state). fs::path from = fs::path(storage.relative_data_path) / relative_path; - fs::path to = fs::path(storage.relative_data_path) / ("delete_tmp_" + name); + fs::path to = fs::path(storage.relative_data_path) / ("delete_tmp_" + relative_path); // TODO directory delete_tmp_ is never removed if server crashes before returning from this function auto disk = volume->getDisk(); if (disk->exists(to)) { - LOG_WARNING(storage.log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart. Removing it.", fullPath(disk, to)); + LOG_WARNING(storage.log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart or race condition. Removing it.", fullPath(disk, to)); try { disk->removeSharedRecursive(fs::path(to) / "", *keep_shared_data); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index fa14c749c035..5817fd8b31dc 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -188,6 +188,7 @@ class IMergeTreeDataPart : public std::enable_shared_from_this remove_time { std::numeric_limits::max() }; /// If true, the destructor will delete the directory with the part. + /// FIXME Why do we need this flag? What's difference from Temporary and DeleteOnDestroy state? Can we get rid of this? bool is_temp = false; /// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 96ea15484a58..0791b522b42a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1366,9 +1366,6 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil: const auto storage_settings_ptr = getSettings(); String part_path = fs::path(replica_path) / "parts" / part_name; - //ops.emplace_back(zkutil::makeCheckRequest( - // zookeeper_path + "/columns", expected_columns_version)); - if (storage_settings_ptr->use_minimalistic_part_header_in_zookeeper) { ops.emplace_back(zkutil::makeCreateRequest( @@ -1414,6 +1411,7 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd Coordination::Requests new_ops; for (const String & part_path : absent_part_paths_on_replicas) { + /// NOTE Create request may fail with ZNONODE if replica is being dropped, we will throw an exception new_ops.emplace_back(zkutil::makeCreateRequest(part_path, "", zkutil::CreateMode::Persistent)); new_ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1)); } From 0737fbbecc73298af7912c07b8560d84447f79c8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 11 Dec 2021 07:04:10 +0000 Subject: [PATCH 239/472] Backport #32506 to 21.9: Fix queries with hasColumnInTable constant condition and non existing column --- src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp | 2 +- ...25_constant_if_condition_and_not_existing_column.reference | 4 ++++ .../02125_constant_if_condition_and_not_existing_column.sql | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index c53ee6dfefaa..7619118f28b0 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -51,7 +51,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v } } } - else if (function->name == "toUInt8" || function->name == "toInt8") + else if (function->name == "toUInt8" || function->name == "toInt8" || function->name == "identity") { if (const auto * expr_list = function->arguments->as()) { diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference index 67f2590a0c6b..a7903610a420 100644 --- a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference @@ -6,3 +6,7 @@ 42 42 42 +SELECT + x, + concat(x, \'_\') +FROM test diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql index ad3d417bc26c..d2041a612a68 100644 --- a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql @@ -11,4 +11,7 @@ select if(toUInt8(1), 42, y) from test; select if(toInt8(1), 42, y) from test; select if(toUInt8(toUInt8(0)), y, 42) from test; select if(cast(cast(0, 'UInt8'), 'UInt8'), y, 42) from test; + +explain syntax select x, if((select hasColumnInTable(currentDatabase(), 'test', 'y')), y, x || '_') from test; + drop table if exists t; From 713e56285cdfd23ced7adc60b4290959356d5afc Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 11 Dec 2021 07:05:43 +0000 Subject: [PATCH 240/472] Backport #32508 to 21.9: Handle const column in JoinCommon::removeColumnNullability --- src/Interpreters/join_common.cpp | 6 ++++++ .../queries/0_stateless/02133_issue_32458.reference | 0 tests/queries/0_stateless/02133_issue_32458.sql | 13 +++++++++++++ 3 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/02133_issue_32458.reference create mode 100644 tests/queries/0_stateless/02133_issue_32458.sql diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index b653e4b8f2ee..a5b06155e44e 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -218,7 +218,13 @@ void removeColumnNullability(ColumnWithTypeAndName & column) if (column.column && column.column->isNullable()) { + column.column = column.column->convertToFullColumnIfConst(); const auto * nullable_col = checkAndGetColumn(*column.column); + if (!nullable_col) + { + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' is expected to be nullable", column.dumpStructure()); + } + MutableColumnPtr mutable_column = nullable_col->getNestedColumn().cloneEmpty(); insertFromNullableOrDefault(mutable_column, nullable_col); column.column = std::move(mutable_column); diff --git a/tests/queries/0_stateless/02133_issue_32458.reference b/tests/queries/0_stateless/02133_issue_32458.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02133_issue_32458.sql b/tests/queries/0_stateless/02133_issue_32458.sql new file mode 100644 index 000000000000..16af361db7a6 --- /dev/null +++ b/tests/queries/0_stateless/02133_issue_32458.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (`id` Int32, `key` String) ENGINE = Memory; +CREATE TABLE t2 (`id` Int32, `key` String) ENGINE = Memory; + +INSERT INTO t1 VALUES (0, ''); +INSERT INTO t2 VALUES (0, ''); + +SELECT * FROM t1 ANY INNER JOIN t2 ON ((NULL = t1.key) = t2.id) AND (('' = t1.key) = t2.id); + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; From 9eeac41e4855423d71553d58abfdcbb05a7b5c06 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 12 Dec 2021 10:02:18 +0000 Subject: [PATCH 241/472] Backport #32456 to 21.9: Fix arraySlice with null args. --- src/Functions/array/arraySlice.cpp | 2 +- .../00498_array_functions_concat_slice_push_pop.reference | 2 ++ .../0_stateless/00498_array_functions_concat_slice_push_pop.sql | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Functions/array/arraySlice.cpp b/src/Functions/array/arraySlice.cpp index d6b50f55563a..7a2e97de78a6 100644 --- a/src/Functions/array/arraySlice.cpp +++ b/src/Functions/array/arraySlice.cpp @@ -102,7 +102,7 @@ class FunctionArraySlice : public IFunction { if (!length_column || length_column->onlyNull()) { - return array_column; + return arguments[0].column; } else if (isColumnConst(*length_column)) sink = GatherUtils::sliceFromLeftConstantOffsetBounded(*source, 0, length_column->getInt(0)); diff --git a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference index 1cc425443114..f757a86aeee0 100644 --- a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference +++ b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference @@ -35,6 +35,8 @@ slice [2,NULL,4,5] ['b','c','d'] ['b',NULL,'d'] +[] 1 +[] 1 push back \N [1,1] diff --git a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql index 8f2f08111937..c87d52d24782 100644 --- a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql +++ b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql @@ -36,6 +36,7 @@ select arraySlice([1, 2, 3, 4, 5, 6], 10, 1); select arraySlice([1, 2, Null, 4, 5, 6], 2, 4); select arraySlice(['a', 'b', 'c', 'd', 'e'], 2, 3); select arraySlice([Null, 'b', Null, 'd', 'e'], 2, 3); +select arraySlice([], materialize(NULL), NULL), 1 from numbers(2); select 'push back'; select arrayPushBack(Null, 1); From 5faa32643c5192c17c2705417a472e959c35ed55 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 13 Dec 2021 15:10:08 +0300 Subject: [PATCH 242/472] Sync CI with master --- tests/ci/ast_fuzzer_check.py | 16 +- tests/ci/build_check.py | 97 ++- tests/ci/build_report_check.py | 40 +- tests/ci/cancel_workflow_lambda/Dockerfile | 13 + tests/ci/cancel_workflow_lambda/app.py | 127 ++++ .../cancel_workflow_lambda/requirements.txt | 3 + tests/ci/ci_config.json | 634 +----------------- tests/ci/ci_config.py | 24 +- tests/ci/clickhouse_helper.py | 18 +- tests/ci/compatibility_check.py | 15 +- tests/ci/docker_images_check.py | 7 +- tests/ci/docker_pull_helper.py | 5 + tests/ci/docs_check.py | 37 +- tests/ci/docs_release.py | 8 +- tests/ci/fast_test_check.py | 51 +- tests/ci/finish_check.py | 7 +- tests/ci/functional_test_check.py | 91 ++- tests/ci/integration_test_check.py | 88 ++- tests/ci/metrics_lambda/app.py | 43 +- tests/ci/pr_info.py | 55 +- tests/ci/push_to_artifactory.py | 258 +++++++ tests/ci/pvs_check.py | 26 +- tests/ci/rerun_helper.py | 35 + tests/ci/run_check.py | 9 +- tests/ci/s3_helper.py | 4 +- tests/ci/split_build_smoke_check.py | 15 +- tests/ci/stress_check.py | 30 +- tests/ci/style_check.py | 29 +- tests/ci/tee_popen.py | 38 ++ tests/ci/termination_lambda/app.py | 4 +- tests/ci/unit_tests_check.py | 28 +- tests/ci/worker/init_builder.sh | 30 +- tests/ci/worker/init_func_tester.sh | 30 +- tests/ci/worker/init_fuzzer_unit_tester.sh | 34 + tests/ci/worker/init_stress_tester.sh | 30 +- .../workflow_approve_rerun_lambda/Dockerfile | 13 + tests/ci/workflow_approve_rerun_lambda/app.py | 373 +++++++++++ .../requirements.txt | 3 + 38 files changed, 1477 insertions(+), 891 deletions(-) create mode 100644 tests/ci/cancel_workflow_lambda/Dockerfile create mode 100644 tests/ci/cancel_workflow_lambda/app.py create mode 100644 tests/ci/cancel_workflow_lambda/requirements.txt create mode 100755 tests/ci/push_to_artifactory.py create mode 100644 tests/ci/rerun_helper.py create mode 100644 tests/ci/tee_popen.py create mode 100644 tests/ci/worker/init_fuzzer_unit_tester.sh create mode 100644 tests/ci/workflow_approve_rerun_lambda/Dockerfile create mode 100644 tests/ci/workflow_approve_rerun_lambda/app.py create mode 100644 tests/ci/workflow_approve_rerun_lambda/requirements.txt diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 02c81a4db318..bbf822c38796 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -3,25 +3,25 @@ import logging import subprocess import os -import json import sys from github import Github from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo +from pr_info import PRInfo, get_event from build_download_helper import get_build_name_for_check, get_build_urls from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper IMAGE_NAME = 'clickhouse/fuzzer' def get_run_command(pr_number, sha, download_url, workspace_path, image): return f'docker run --network=host --volume={workspace_path}:/workspace ' \ - '--cap-add syslog --cap-add sys_admin ' \ + '--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE ' \ f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '\ f'{image}' @@ -44,13 +44,15 @@ def get_commit(gh, commit_sha): if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) gh = Github(get_best_robot_token()) + rerun_helper = RerunHelper(gh, pr_info, check_name) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + docker_image = get_image_with_version(temp_path, IMAGE_NAME) build_name = get_build_name_for_check(check_name) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 67b443596a80..36db7d596c96 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -8,12 +8,13 @@ import time from github import Github from s3_helper import S3Helper -from pr_info import PRInfo +from pr_info import PRInfo, get_event from get_robot_token import get_best_robot_token from version_helper import get_version_from_repo, update_version_local from ccache_utils import get_ccache_if_not_exists, upload_ccache from ci_config import CI_CONFIG from docker_pull_helper import get_image_with_version +from tee_popen import TeePopen def get_build_config(build_check_name, build_name): @@ -48,8 +49,6 @@ def get_packager_cmd(build_config, packager_path, output_path, build_version, im cmd += ' --build-type={}'.format(build_config['build_type']) if build_config['sanitizer']: cmd += ' --sanitizer={}'.format(build_config['sanitizer']) - if build_config['bundled'] == 'unbundled': - cmd += ' --unbundled' if build_config['splitted'] == 'splitted': cmd += ' --split-binary' if build_config['tidy'] == 'enable': @@ -59,7 +58,7 @@ def get_packager_cmd(build_config, packager_path, output_path, build_version, im cmd += ' --ccache_dir={}'.format(ccache_path) if 'alien_pkgs' in build_config and build_config['alien_pkgs']: - if pr_info == 0 or 'release' in pr_info.labels: + if pr_info.number == 0 or 'release' in pr_info.labels: cmd += ' --alien-pkgs rpm tgz' cmd += ' --docker-image-version={}'.format(image_version) @@ -71,9 +70,7 @@ def get_packager_cmd(build_config, packager_path, output_path, build_version, im return cmd def get_image_name(build_config): - if build_config['bundled'] != 'bundled': - return 'clickhouse/unbundled-builder' - elif build_config['package_type'] != 'deb': + if build_config['package_type'] != 'deb': return 'clickhouse/binary-builder' else: return 'clickhouse/deb-builder' @@ -81,14 +78,38 @@ def get_image_name(build_config): def build_clickhouse(packager_cmd, logs_path): build_log_path = os.path.join(logs_path, 'build_log.log') - with open(build_log_path, 'w') as log_file: - retcode = subprocess.Popen(packager_cmd, shell=True, stderr=log_file, stdout=log_file).wait() + with TeePopen(packager_cmd, build_log_path) as process: + retcode = process.wait() if retcode == 0: logging.info("Built successfully") else: logging.info("Build failed") return build_log_path, retcode == 0 + +def get_build_results_if_exists(s3_helper, s3_prefix): + try: + content = s3_helper.list_prefix(s3_prefix) + return content + except Exception as ex: + logging.info("Got exception %s listing %s", ex, s3_prefix) + return None + +def create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, elapsed, success): + subprocess.check_call(f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True) + + result = { + "log_url": log_url, + "build_urls": build_urls, + "build_config": build_config, + "elapsed_seconds": elapsed, + "status": success, + } + + with open(os.path.join(temp_path, "build_urls_" + build_name + '.json'), 'w') as build_links: + json.dump(result, build_links) + + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) @@ -103,20 +124,46 @@ def build_clickhouse(packager_cmd, logs_path): if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) logging.info("Repo copy path %s", repo_path) gh = Github(get_best_robot_token()) + s3_helper = S3Helper('https://s3.amazonaws.com') + + version = get_version_from_repo(repo_path) + release_or_pr = None + if 'release' in pr_info.labels or 'release-lts' in pr_info.labels: + # for release pull requests we use branch names prefixes, not pr numbers + release_or_pr = pr_info.head_ref + elif pr_info.number == 0: + # for pushes to master - major version + release_or_pr = ".".join(version.as_tuple()[:2]) + else: + # PR number for anything else + release_or_pr = str(pr_info.number) + + s3_path_prefix = "/".join((release_or_pr, pr_info.sha, build_name)) + + # If this is rerun, then we try to find already created artifacts and just + # put them as github actions artifcat (result) + build_results = get_build_results_if_exists(s3_helper, s3_path_prefix) + if build_results is not None and len(build_results) > 0: + logging.info("Some build results found %s", build_results) + build_urls = [] + log_url = '' + for url in build_results: + if 'build_log.log' in url: + log_url = 'https://s3.amazonaws.com/clickhouse-builds/' + url.replace('+', '%2B').replace(' ', '%20') + else: + build_urls.append('https://s3.amazonaws.com/clickhouse-builds/' + url.replace('+', '%2B').replace(' ', '%20')) + create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, 0, True) + sys.exit(0) image_name = get_image_name(build_config) docker_image = get_image_with_version(os.getenv("IMAGES_PATH"), image_name) image_version = docker_image.version - version = get_version_from_repo(repo_path) logging.info("Got version from repo %s", version.get_version_string()) version_type = 'testing' @@ -128,14 +175,12 @@ def build_clickhouse(packager_cmd, logs_path): logging.info("Updated local files with version") logging.info("Build short name %s", build_name) - subprocess.check_call(f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True) build_output_path = os.path.join(temp_path, build_name) if not os.path.exists(build_output_path): os.makedirs(build_output_path) ccache_path = os.path.join(caches_path, build_name + '_ccache') - s3_helper = S3Helper('https://s3.amazonaws.com') logging.info("Will try to fetch cache for our build") get_ccache_if_not_exists(ccache_path, s3_helper, pr_info.number, temp_path) @@ -162,12 +207,6 @@ def build_clickhouse(packager_cmd, logs_path): logging.info("Will upload cache") upload_ccache(ccache_path, s3_helper, pr_info.number, temp_path) - # for release pull requests we use branch names prefixes, not pr numbers - if 'release' in pr_info.labels or 'release-lts' in pr_info.labels: - s3_path_prefix = pr_info.head_ref + "/" + pr_info.sha + "/" + build_name - else: - s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + build_name - if os.path.exists(log_path): log_url = s3_helper.upload_build_file_to_s3(log_path, s3_path_prefix + "/" + os.path.basename(log_path)) logging.info("Log url %s", log_url) @@ -179,15 +218,9 @@ def build_clickhouse(packager_cmd, logs_path): print("::notice ::Build URLs: {}".format('\n'.join(build_urls))) - result = { - "log_url": log_url, - "build_urls": build_urls, - "build_config": build_config, - "elapsed_seconds": elapsed, - "status": success, - } - print("::notice ::Log URL: {}".format(log_url)) - with open(os.path.join(temp_path, "build_urls_" + build_name + '.json'), 'w') as build_links: - json.dump(result, build_links) + create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, elapsed, success) + # Fail build job if not successeded + if not success: + sys.exit(1) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 402db7c27404..3d97a9730178 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -8,8 +8,10 @@ from report import create_build_html_report from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo +from pr_info import PRInfo, get_event from commit_status_helper import get_commit +from ci_config import CI_CONFIG +from rerun_helper import RerunHelper class BuildResult(): def __init__(self, compiler, build_type, sanitizer, bundled, splitted, status, elapsed_seconds, with_coverage): @@ -68,6 +70,8 @@ def process_report(build_report): return build_results, build_urls, build_logs_urls +def get_build_name_from_file_name(file_name): + return file_name.replace('build_urls_', '').replace('.json', '') if __name__ == "__main__": logging.basicConfig(level=logging.INFO) @@ -80,15 +84,30 @@ def process_report(build_report): build_check_name = sys.argv[1] - build_reports = [] + gh = Github(get_best_robot_token()) + pr_info = PRInfo(get_event()) + rerun_helper = RerunHelper(gh, pr_info, build_check_name) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + + reports_order = CI_CONFIG["builds_report_config"][build_check_name] + logging.info("My reports list %s", reports_order) + + build_reports_map = {} for root, dirs, files in os.walk(reports_path): for f in files: if f.startswith("build_urls_") and f.endswith('.json'): logging.info("Found build report json %s", f) - with open(os.path.join(root, f), 'r') as file_handler: - build_report = json.load(file_handler) - build_reports.append(build_report) + build_name = get_build_name_from_file_name(f) + if build_name in reports_order: + with open(os.path.join(root, f), 'r') as file_handler: + build_report = json.load(file_handler) + build_reports_map[build_name] = build_report + else: + logging.info("Skipping report %s for build %s, it's not in our reports list", f, build_name) + build_reports = [build_reports_map[build_name] for build_name in reports_order if build_name in build_reports_map] build_results = [] build_artifacts = [] @@ -102,13 +121,13 @@ def process_report(build_report): build_logs += build_logs_url logging.info("Totally got %s results", len(build_results)) + if len(build_results) == 0: + logging.info("No builds, failing check") + sys.exit(1) - gh = Github(get_best_robot_token()) s3_helper = S3Helper('https://s3.amazonaws.com') - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - event = json.load(event_file) - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) branch_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/commits/master" branch_name = "master" @@ -151,6 +170,9 @@ def process_report(build_report): if build_result.status == "success": ok_builds += 1 + if ok_builds == 0: + summary_status = "error" + description = "{}/{} builds are OK".format(ok_builds, total_builds) print("::notice ::Report url: {}".format(url)) diff --git a/tests/ci/cancel_workflow_lambda/Dockerfile b/tests/ci/cancel_workflow_lambda/Dockerfile new file mode 100644 index 000000000000..f53be71a8931 --- /dev/null +++ b/tests/ci/cancel_workflow_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/cancel_workflow_lambda/app.py b/tests/ci/cancel_workflow_lambda/app.py new file mode 100644 index 000000000000..e475fcb931a2 --- /dev/null +++ b/tests/ci/cancel_workflow_lambda/app.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 + +import json +import time +import jwt + +import requests +import boto3 + +# https://docs.github.com/en/rest/reference/actions#cancel-a-workflow-run +# +API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse' + +MAX_RETRY = 5 + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def get_token_from_aws(): + private_key, app_id = get_key_and_app_from_aws() + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": app_id, + } + + encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + return get_access_token(encoded_jwt, installation_id) + +def _exec_get_with_retry(url): + for i in range(MAX_RETRY): + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute GET request with retries") + + +def get_workflows_cancel_urls_for_pull_request(pull_request_event): + head_branch = pull_request_event['head']['ref'] + print("PR", pull_request_event['number'], "has head ref", head_branch) + workflows = _exec_get_with_retry(API_URL + f"/actions/runs?branch={head_branch}") + workflows_urls_to_cancel = set([]) + for workflow in workflows['workflow_runs']: + if workflow['status'] != 'completed': + print("Workflow", workflow['url'], "not finished, going to be cancelled") + workflows_urls_to_cancel.add(workflow['cancel_url']) + else: + print("Workflow", workflow['url'], "already finished, will not try to cancel") + + return workflows_urls_to_cancel + +def _exec_post_with_retry(url, token): + headers = { + "Authorization": f"token {token}" + } + for i in range(MAX_RETRY): + try: + response = requests.post(url, headers=headers) + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute POST request with retry") + +def cancel_workflows(urls_to_cancel, token): + for url in urls_to_cancel: + print("Cancelling workflow using url", url) + _exec_post_with_retry(url, token) + print("Workflow cancelled") + +def main(event): + token = get_token_from_aws() + event_data = json.loads(event['body']) + + print("Got event for PR", event_data['number']) + action = event_data['action'] + print("Got action", event_data['action']) + pull_request = event_data['pull_request'] + labels = { l['name'] for l in pull_request['labels'] } + print("PR has labels", labels) + if action == 'closed' or 'do not test' in labels: + print("PR merged/closed or manually labeled 'do not test' will kill workflows") + workflows_to_cancel = get_workflows_cancel_urls_for_pull_request(pull_request) + print(f"Found {len(workflows_to_cancel)} workflows to cancel") + cancel_workflows(workflows_to_cancel, token) + else: + print("Nothing to do") + +def handler(event, _): + main(event) diff --git a/tests/ci/cancel_workflow_lambda/requirements.txt b/tests/ci/cancel_workflow_lambda/requirements.txt new file mode 100644 index 000000000000..c0dcf4a4dde7 --- /dev/null +++ b/tests/ci/cancel_workflow_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json index 52a101728eaa..68a478ae776c 100644 --- a/tests/ci/ci_config.json +++ b/tests/ci/ci_config.json @@ -34,128 +34,6 @@ { "compiler": "clang-11", "build-type": "", - "sanitizer": "address", - "package-type": "deb", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-11", - "build-type": "", - "sanitizer": "undefined", - "package-type": "deb", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-11", - "build-type": "", - "sanitizer": "thread", - "package-type": "deb", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-11", - "build-type": "", - "sanitizer": "memory", - "package-type": "deb", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-11", - "build-type": "debug", - "sanitizer": "", - "package-type": "deb", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "gcc-10", - "build-type": "", - "sanitizer": "", - "package-type": "deb", - "bundled": "unbundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-11", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - } - ], - "special_build_config": [ - { - "compiler": "clang-11", - "build-type": "debug", - "sanitizer": "", - "package-type": "deb", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "enable", - "with_coverage": false - }, - { - "compiler": "clang-11", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "splitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-11-darwin", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-11-aarch64", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-11-freebsd", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-11-darwin-aarch64", - "build-type": "", "sanitizer": "", "package-type": "binary", "bundled": "bundled", @@ -165,163 +43,7 @@ } ], "tests_config": { - "Functional stateful tests (address)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateful tests (thread)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateful tests (memory)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateful tests (ubsan)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "undefined", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateful tests (debug)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "debug", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateful tests (release)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateful tests (release, DatabaseOrdinary)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateful tests (release, DatabaseReplicated)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateless tests (address)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateless tests (thread)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateless tests (memory)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateless tests (ubsan)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "undefined", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateless tests (debug)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "debug", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateless tests (release)": { + "Testflows check": { "required_build_properties": { "compiler": "clang-11", "package_type": "deb", @@ -333,7 +55,7 @@ "with_coverage": false } }, - "Functional stateless tests (pytest)": { + "Release": { "required_build_properties": { "compiler": "clang-11", "package_type": "deb", @@ -345,22 +67,10 @@ "with_coverage": false } }, - "Functional stateless tests (unbundled)": { - "required_build_properties": { - "compiler": "gcc-10", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "unbundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateless tests (release, wide parts enabled)": { + "ClickHouse Keeper Jepsen": { "required_build_properties": { "compiler": "clang-11", - "package_type": "deb", + "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", "bundled": "bundled", @@ -368,342 +78,6 @@ "clang-tidy": "disable", "with_coverage": false } - }, - "Functional stateless tests (release, DatabaseOrdinary)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateless tests (release, DatabaseReplicated)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Stress test (address)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Stress test (thread)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Stress test (undefined)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "undefined", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Stress test (memory)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Stress test (debug)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "debug", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Integration tests (asan)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Integration tests (thread)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Integration tests (release)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Integration tests (memory)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Integration tests flaky check (asan)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Compatibility check": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Split build smoke test": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "splitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Testflows check": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Unit tests release gcc": { - "required_build_properties": { - "compiler": "gcc-10", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Unit tests release clang": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Unit tests ASAN": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Unit tests MSAN": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Unit tests TSAN": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Unit tests UBSAN": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "AST fuzzer (debug)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "debug", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "AST fuzzer (ASan)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "AST fuzzer (MSan)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "memory", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "AST fuzzer (TSan)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "thread", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "AST fuzzer (UBSan)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "undefined", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Release": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Functional stateless tests flaky check (address)": { - "required_build_properties": { - "compiler": "clang-11", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "address", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } } } } diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 65e3c1bfd05c..b40be95b3d7f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -93,11 +93,11 @@ "tidy": "disable", "with_coverage": False }, - "package_tidy": { + "binary_tidy": { "compiler": "clang-11", "build_type": "debug", "sanitizer": "", - "package_type": "deb", + "package_type": "binary", "bundled": "bundled", "splitted": "unsplitted", "tidy": "enable", @@ -164,6 +164,26 @@ "with_coverage": False } }, + "builds_report_config": { + "ClickHouse build check (actions)": [ + "package_release", + "performance", + "package_asan", + "package_ubsan", + "package_tsan", + "package_msan", + "package_debug", + "binary_release" + ], + "ClickHouse special build check (actions)": [ + "binary_tidy", + "binary_splitted", + "binary_darwin", + "binary_arrach64", + "binary_freebsd", + "binary_darwin_aarch64" + ], + }, "tests_config": { "Stateful tests (address, actions)": { "required_build": "package_asan", diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 0b9df6cb8683..58fd8c4aecef 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -8,8 +8,16 @@ class ClickHouseHelper: def __init__(self, url=None, user=None, password=None): + self.url2 = None + self.auth2 = None + if url is None: url = get_parameter_from_ssm("clickhouse-test-stat-url") + self.url2 = get_parameter_from_ssm("clickhouse-test-stat-url2") + self.auth2 = { + 'X-ClickHouse-User': get_parameter_from_ssm("clickhouse-test-stat-login2"), + 'X-ClickHouse-Key': '' + } self.url = url self.auth = { @@ -17,7 +25,8 @@ def __init__(self, url=None, user=None, password=None): 'X-ClickHouse-Key': password if password is not None else get_parameter_from_ssm("clickhouse-test-stat-password") } - def _insert_json_str_info(self, db, table, json_str): + @staticmethod + def _insert_json_str_info_impl(url, auth, db, table, json_str): params = { 'database': db, 'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table), @@ -26,7 +35,7 @@ def _insert_json_str_info(self, db, table, json_str): } for i in range(5): - response = requests.post(self.url, params=params, data=json_str, headers=self.auth, verify=False) + response = requests.post(url, params=params, data=json_str, headers=auth, verify=False) logging.info("Response content '%s'", response.content) @@ -49,6 +58,11 @@ def _insert_json_str_info(self, db, table, json_str): else: raise Exception(error) + def _insert_json_str_info(self, db, table, json_str): + self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str) + if self.url2: + self._insert_json_str_info_impl(self.url2, self.auth2, db, table, json_str) + def insert_event_into(self, db, table, event): event_str = json.dumps(event) self._insert_json_str_info(db, table, event_str) diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index b6a8f67aa5f4..665f399b0400 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -3,20 +3,21 @@ from distutils.version import StrictVersion import logging import os -import json import subprocess +import sys from github import Github from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo +from pr_info import PRInfo, get_event from build_download_helper import download_builds_filter from upload_result_helper import upload_results from docker_pull_helper import get_images_with_versions from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper IMAGE_UBUNTU = "clickhouse/test-old-ubuntu" IMAGE_CENTOS = "clickhouse/test-old-centos" @@ -106,13 +107,15 @@ def get_run_commands(build_path, result_folder, server_log_folder, image_centos, repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) reports_path = os.getenv("REPORTS_PATH", "./reports") - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) gh = Github(get_best_robot_token()) + rerun_helper = RerunHelper(gh, pr_info, CHECK_NAME) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + docker_images = get_images_with_versions(reports_path, [IMAGE_CENTOS, IMAGE_UBUNTU]) packages_path = os.path.join(temp_path, "packages") diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0482f05f284d..5e05cbaecd7c 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -7,7 +7,7 @@ import shutil from github import Github from s3_helper import S3Helper -from pr_info import PRInfo +from pr_info import PRInfo, get_event from get_robot_token import get_best_robot_token, get_parameter_from_ssm from upload_result_helper import upload_results from commit_status_helper import get_commit @@ -167,10 +167,7 @@ def process_test_results(s3_client, test_results, s3_path_prefix): if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event, False, True) + pr_info = PRInfo(get_event(), need_changed_files=True) changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha diff --git a/tests/ci/docker_pull_helper.py b/tests/ci/docker_pull_helper.py index f98047448208..50354da68011 100644 --- a/tests/ci/docker_pull_helper.py +++ b/tests/ci/docker_pull_helper.py @@ -25,6 +25,11 @@ def get_images_with_versions(reports_path, required_image, pull=True): images_path = os.path.join(root, 'changed_images.json') break + if not images_path: + logging.info("Images file not found") + else: + logging.info("Images file path %s", images_path) + if images_path is not None and os.path.exists(images_path): logging.info("Images file exists") with open(images_path, 'r', encoding='utf-8') as images_fd: diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index 11ff68e0286c..87c327f27763 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -2,17 +2,18 @@ import logging import subprocess import os -import json import sys from github import Github from s3_helper import S3Helper -from pr_info import PRInfo +from pr_info import PRInfo, get_event from get_robot_token import get_best_robot_token from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status, get_commit from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper +from tee_popen import TeePopen NAME = "Docs Check (actions)" @@ -25,12 +26,15 @@ temp_path = os.path.join(os.getenv("TEMP_PATH")) repo_path = os.path.join(os.getenv("REPO_COPY")) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event, need_changed_files=True) + pr_info = PRInfo(get_event(), need_changed_files=True) gh = Github(get_best_robot_token()) + + rerun_helper = RerunHelper(gh, pr_info, NAME) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + if not pr_info.has_changes_in_documentation(): logging.info ("No changes in documentation") commit = get_commit(gh, pr_info.sha) @@ -52,17 +56,16 @@ run_log_path = os.path.join(test_output, 'runlog.log') - with open(run_log_path, 'w', encoding='utf-8') as log: - with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - status = "success" - description = "Docs check passed" - else: - description = "Docs check failed (non zero exit code)" - status = "failure" - logging.info("Run failed") + with TeePopen(cmd, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + status = "success" + description = "Docs check passed" + else: + description = "Docs check failed (non zero exit code)" + status = "failure" + logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) files = os.listdir(test_output) diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index 6ca45d638582..04922e8c5abc 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -2,13 +2,12 @@ import logging import subprocess import os -import json import sys from github import Github from s3_helper import S3Helper -from pr_info import PRInfo +from pr_info import PRInfo, get_event from get_robot_token import get_best_robot_token from ssh import SSHKey from upload_result_helper import upload_results @@ -23,10 +22,7 @@ temp_path = os.path.join(os.getenv("TEMP_PATH")) repo_path = os.path.join(os.getenv("REPO_COPY")) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event, need_changed_files=True) + pr_info = PRInfo(get_event(), need_changed_files=True) gh = Github(get_best_robot_token()) if not pr_info.has_changes_in_documentation(): diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 2734102be3f5..26247dfd0b94 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -3,10 +3,11 @@ import logging import subprocess import os -import json import csv +import sys + from github import Github -from pr_info import PRInfo +from pr_info import PRInfo, get_event from s3_helper import S3Helper from get_robot_token import get_best_robot_token from upload_result_helper import upload_results @@ -14,6 +15,9 @@ from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper +from tee_popen import TeePopen +from ccache_utils import get_ccache_if_not_exists, upload_ccache NAME = 'Fast test (actions)' @@ -35,17 +39,23 @@ def process_results(result_folder): test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] additional_files = [os.path.join(result_folder, f) for f in test_files] + status = [] status_path = os.path.join(result_folder, "check_status.tsv") - logging.info("Found test_results.tsv") - status = list(csv.reader(open(status_path, 'r'), delimiter='\t')) + if os.path.exists(status_path): + logging.info("Found test_results.tsv") + with open(status_path, 'r', encoding='utf-8') as status_file: + status = list(csv.reader(status_file, delimiter='\t')) if len(status) != 1 or len(status[0]) != 2: + logging.info("Files in result folder %s", os.listdir(result_folder)) return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] results_path = os.path.join(result_folder, "test_results.tsv") - test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t')) + if os.path.exists(results_path): + with open(results_path, 'r', encoding='utf-8') as results_file: + test_results = list(csv.reader(results_file, delimiter='\t')) if len(test_results) == 0: - raise Exception("Empty results") + return "error", "Empty test_results.tsv", test_results, additional_files return state, description, test_results, additional_files @@ -62,13 +72,15 @@ def process_results(result_folder): if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) gh = Github(get_best_robot_token()) + rerun_helper = RerunHelper(gh, pr_info, NAME) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + docker_image = get_image_with_version(temp_path, 'clickhouse/fasttest') s3_helper = S3Helper('https://s3.amazonaws.com') @@ -82,7 +94,12 @@ def process_results(result_folder): os.makedirs(output_path) cache_path = os.path.join(caches_path, "fasttest") + + logging.info("Will try to fetch cache for our build") + get_ccache_if_not_exists(cache_path, s3_helper, pr_info.number, temp_path) + if not os.path.exists(cache_path): + logging.info("cache was not fetched, will create empty dir") os.makedirs(cache_path) repo_path = os.path.join(temp_path, "fasttest-repo") @@ -97,8 +114,8 @@ def process_results(result_folder): os.makedirs(logs_path) run_log_path = os.path.join(logs_path, 'runlog.log') - with open(run_log_path, 'w') as log: - retcode = subprocess.Popen(run_cmd, shell=True, stderr=log, stdout=log).wait() + with TeePopen(run_cmd, run_log_path) as process: + retcode = process.wait() if retcode == 0: logging.info("Run successfully") else: @@ -133,6 +150,9 @@ def process_results(result_folder): else: state, description, test_results, additional_logs = process_results(output_path) + logging.info("Will upload cache") + upload_ccache(cache_path, s3_helper, pr_info.number, temp_path) + ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, NAME, test_results) @@ -142,3 +162,10 @@ def process_results(result_folder): prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) + + # Refuse other checks to run if fast test failed + if state != 'success': + if 'force-tests' in pr_info.labels: + print("'force-tests' enabled, will report success") + else: + sys.exit(1) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index c38b3c094485..576b97058c71 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -1,9 +1,8 @@ #!/usr/bin/env python3 import logging -import json import os from github import Github -from pr_info import PRInfo +from pr_info import PRInfo, get_event from get_robot_token import get_best_robot_token from commit_status_helper import get_commit @@ -26,10 +25,8 @@ def filter_statuses(statuses): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - event = json.load(event_file) - pr_info = PRInfo(event, need_orgs=True) + pr_info = PRInfo(get_event(), need_orgs=True) gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index dc91ec071639..e7d4d49f3e7f 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -4,21 +4,36 @@ import logging import subprocess import os -import json import sys from github import Github from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo +from pr_info import PRInfo, get_event from build_download_helper import download_all_deb_packages from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status, get_commit from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper +from tee_popen import TeePopen +def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): + result = [] + if 'DatabaseReplicated' in check_name: + result.append("USE_DATABASE_REPLICATED=1") + if 'DatabaseOrdinary' in check_name: + result.append("USE_DATABASE_ORDINARY=1") + if 'wide parts enabled' in check_name: + result.append("USE_POLYMORPHIC_PARTS=1") + + if run_by_hash_total != 0: + result.append(f"RUN_BY_HASH_NUM={run_by_hash_num}") + result.append(f"RUN_BY_HASH_TOTAL={run_by_hash_total}") + + return result def get_image_name(check_name): if 'stateless' in check_name.lower(): @@ -78,20 +93,30 @@ def process_results(result_folder, server_log_path): server_log_files = [f for f in os.listdir(server_log_path) if os.path.isfile(os.path.join(server_log_path, f))] additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files] + status = [] status_path = os.path.join(result_folder, "check_status.tsv") - logging.info("Found test_results.tsv") - with open(status_path, 'r', encoding='utf-8') as status_file: - status = list(csv.reader(status_file, delimiter='\t')) + if os.path.exists(status_path): + logging.info("Found test_results.tsv") + with open(status_path, 'r', encoding='utf-8') as status_file: + status = list(csv.reader(status_file, delimiter='\t')) if len(status) != 1 or len(status[0]) != 2: + logging.info("Files in result folder %s", os.listdir(result_folder)) return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] results_path = os.path.join(result_folder, "test_results.tsv") + + if os.path.exists(results_path): + logging.info("Found test_results.tsv") + else: + logging.info("Files in result folder %s", os.listdir(result_folder)) + return "error", "Not found test_results.tsv", test_results, additional_files + with open(results_path, 'r', encoding='utf-8') as results_file: test_results = list(csv.reader(results_file, delimiter='\t')) if len(test_results) == 0: - raise Exception("Empty results") + return "error", "Empty test_results.tsv", test_results, additional_files return state, description, test_results, additional_files @@ -107,25 +132,37 @@ def process_results(result_folder, server_log_path): check_name = sys.argv[1] kill_timeout = int(sys.argv[2]) + flaky_check = 'flaky' in check_name.lower() + gh = Github(get_best_robot_token()) + + pr_info = PRInfo(get_event(), need_changed_files=flaky_check) + + if 'RUN_BY_HASH_NUM' in os.environ: + run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) + run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) + check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' + else: + run_by_hash_num = 0 + run_by_hash_total = 0 + check_name_with_group = check_name + + rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: - event = json.load(event_file) - - gh = Github(get_best_robot_token()) - pr_info = PRInfo(event, need_changed_files=flaky_check) tests_to_run = [] if flaky_check: tests_to_run = get_tests_to_run(pr_info) if not tests_to_run: commit = get_commit(gh, pr_info.sha) - commit.create_status(context=check_name, description='Not found changed stateless tests', state='success') + commit.create_status(context=check_name_with_group, description='Not found changed stateless tests', state='success') sys.exit(0) - image_name = get_image_name(check_name) docker_image = get_image_with_version(reports_path, image_name) @@ -145,16 +182,16 @@ def process_results(result_folder, server_log_path): run_log_path = os.path.join(result_path, "runlog.log") - run_command = get_run_command(packages_path, result_path, server_log_path, kill_timeout, [], docker_image, flaky_check, tests_to_run) + additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total) + run_command = get_run_command(packages_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run) logging.info("Going to run func tests: %s", run_command) - with open(run_log_path, 'w', encoding='utf-8') as log: - with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - else: - logging.info("Run failed") + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) @@ -164,10 +201,16 @@ def process_results(result_folder, server_log_path): ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) + + if state != 'success': + if 'force-tests' in pr_info.labels: + print("'force-tests' enabled, will report success") + else: + sys.exit(1) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index f6a46e72e848..e124635667e9 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -11,39 +11,41 @@ from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo +from pr_info import PRInfo, get_event from build_download_helper import download_all_deb_packages from upload_result_helper import upload_results from docker_pull_helper import get_images_with_versions from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper +from tee_popen import TeePopen -DOWNLOAD_RETRIES_COUNT = 5 - IMAGES = [ - "yandex/clickhouse-integration-tests-runner", - "yandex/clickhouse-mysql-golang-client", - "yandex/clickhouse-mysql-java-client", - "yandex/clickhouse-mysql-js-client", - "yandex/clickhouse-mysql-php-client", - "yandex/clickhouse-postgresql-java-client", - "yandex/clickhouse-integration-test", - "yandex/clickhouse-kerberos-kdc", - "yandex/clickhouse-integration-helper", + "clickhouse/integration-tests-runner", + "clickhouse/mysql-golang-client", + "clickhouse/mysql-java-client", + "clickhouse/mysql-js-client", + "clickhouse/mysql-php-client", + "clickhouse/postgresql-java-client", + "clickhouse/integration-test", + "clickhouse/kerberos-kdc", + "clickhouse/integration-helper", ] -def get_json_params_dict(check_name, commit_sha, pr_number, docker_images): +def get_json_params_dict(check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num): return { 'context_name': check_name, - 'commit': commit_sha, - 'pull_request': pr_number, - 'pr_info': None, + 'commit': pr_info.sha, + 'pull_request': pr_info.number, + 'pr_info': {'changed_files' : list(pr_info.changed_files)}, 'docker_images_with_versions': docker_images, 'shuffle_test_groups': False, 'use_tmpfs': False, 'disable_net_host': True, + 'run_by_hash_total': run_by_hash_total, + 'run_by_hash_num': run_by_hash_num, } def get_env_for_runner(build_path, repo_path, result_path, work_path): @@ -74,23 +76,24 @@ def process_results(result_folder): test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] additional_files = [os.path.join(result_folder, f) for f in test_files] + status = [] status_path = os.path.join(result_folder, "check_status.tsv") if os.path.exists(status_path): logging.info("Found test_results.tsv") with open(status_path, 'r', encoding='utf-8') as status_file: status = list(csv.reader(status_file, delimiter='\t')) - else: - status = [] if len(status) != 1 or len(status[0]) != 2: + logging.info("Files in result folder %s", os.listdir(result_folder)) return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] results_path = os.path.join(result_folder, "test_results.tsv") - with open(results_path, 'r', encoding='utf-8') as results_file: - test_results = list(csv.reader(results_file, delimiter='\t')) + if os.path.exists(results_path): + with open(results_path, 'r', encoding='utf-8') as results_file: + test_results = list(csv.reader(results_file, delimiter='\t')) if len(test_results) == 0: - raise Exception("Empty results") + return "error", "Empty test_results.tsv", test_results, additional_files return state, description, test_results, additional_files @@ -105,17 +108,29 @@ def process_results(result_folder): check_name = sys.argv[1] + if 'RUN_BY_HASH_NUM' in os.environ: + run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) + run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) + check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' + else: + run_by_hash_num = 0 + run_by_hash_total = 0 + check_name_with_group = check_name + if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event) + is_flaky_check = 'flaky' in check_name + pr_info = PRInfo(get_event(), need_changed_files=is_flaky_check) gh = Github(get_best_robot_token()) - images = get_images_with_versions(temp_path, IMAGES) + rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + + images = get_images_with_versions(reports_path, IMAGES) images_with_versions = {i.name: i.version for i in images} result_path = os.path.join(temp_path, "output_dir") if not os.path.exists(result_path): @@ -135,20 +150,19 @@ def process_results(result_folder): json_path = os.path.join(work_path, 'params.json') with open(json_path, 'w', encoding='utf-8') as json_params: - json_params.write(json.dumps(get_json_params_dict(check_name, pr_info.sha, pr_info.number, images_with_versions))) + json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions, run_by_hash_total, run_by_hash_num))) output_path_log = os.path.join(result_path, "main_script_log.txt") runner_path = os.path.join(repo_path, "tests/integration", "ci-runner.py") run_command = f"sudo -E {runner_path} | tee {output_path_log}" - with open(output_path_log, 'w', encoding='utf-8') as log: - with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log, env=my_env) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run tests successfully") - else: - logging.info("Some tests failed") + with TeePopen(run_command, output_path_log, my_env) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run tests successfully") + else: + logging.info("Some tests failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) @@ -158,9 +172,9 @@ def process_results(result_folder): mark_flaky_tests(ch_helper, check_name, test_results) s3_helper = S3Helper('https://s3.amazonaws.com') - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name, False) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name_with_group, False) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py index d776aa2be49f..dff0a7d715e3 100644 --- a/tests/ci/metrics_lambda/app.py +++ b/tests/ci/metrics_lambda/app.py @@ -7,6 +7,30 @@ import json import time from collections import namedtuple +import boto3 + +def get_dead_runners_in_ec2(runners): + ids = {runner.name: runner for runner in runners if runner.offline == True and runner.busy == False} + if not ids: + return [] + + client = boto3.client('ec2') + + print("Checking ids", list(ids.keys())) + instances_statuses = client.describe_instance_status(InstanceIds=list(ids.keys())) + found_instances = set([]) + print("Response", instances_statuses) + for instance_status in instances_statuses['InstanceStatuses']: + if instance_status['InstanceState']['Name'] in ('pending', 'running'): + found_instances.add(instance_status['InstanceId']) + + print("Found instances", found_instances) + result_to_delete = [] + for instance_id, runner in ids.items(): + if instance_id not in found_instances: + print("Instance", instance_id, "is not alive, going to remove it") + result_to_delete.append(runner) + return result_to_delete def get_key_and_app_from_aws(): import boto3 @@ -23,7 +47,7 @@ def get_key_and_app_from_aws(): def handler(event, context): private_key, app_id = get_key_and_app_from_aws() - main(private_key, app_id, True, False) + main(private_key, app_id, True, True) def get_installation_id(jwt_token): headers = { @@ -74,12 +98,13 @@ def list_runners(access_token): desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, offline=runner['status']=='offline', busy=runner['busy']) result.append(desc) + return result def group_runners_by_tag(listed_runners): result = {} - RUNNER_TYPE_LABELS = ['style-checker', 'builder', 'func-tester', 'stress-tester'] + RUNNER_TYPE_LABELS = ['style-checker', 'builder', 'func-tester', 'stress-tester', 'fuzzer-unit-tester'] for runner in listed_runners: for tag in runner.tags: if tag in RUNNER_TYPE_LABELS: @@ -95,10 +120,9 @@ def group_runners_by_tag(listed_runners): def push_metrics_to_cloudwatch(listed_runners, namespace): - import boto3 client = boto3.client('cloudwatch') metrics_data = [] - busy_runners = sum(1 for runner in listed_runners if runner.busy) + busy_runners = sum(1 for runner in listed_runners if runner.busy and not runner.offline) metrics_data.append({ 'MetricName': 'BusyRunners', 'Value': busy_runners, @@ -154,6 +178,7 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_ru grouped_runners = group_runners_by_tag(runners) for group, group_runners in grouped_runners.items(): if push_to_cloudwatch: + print(group) push_metrics_to_cloudwatch(group_runners, 'RunnersMetrics/' + group) else: print(group, f"({len(group_runners)})") @@ -162,12 +187,10 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_ru if delete_offline_runners: print("Going to delete offline runners") - for runner in runners: - if runner.offline and not runner.busy: - print("Deleting runner", runner) - delete_runner(access_token, runner) - - + dead_runners = get_dead_runners_in_ec2(runners) + for runner in dead_runners: + print("Deleting runner", runner) + delete_runner(access_token, runner) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Get list of runners and their states') diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 3df2b0909ef4..88d4595bc666 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import json import os import urllib @@ -6,7 +7,7 @@ from unidiff import PatchSet -DIFF_IN_DOCUMENTATION_EXT = [".html", ".md", ".yml", ".txt", ".css", ".js", ".xml", ".ico", ".conf", ".svg", ".png", ".jpg", ".py", ".sh"] +DIFF_IN_DOCUMENTATION_EXT = [".html", ".md", ".yml", ".txt", ".css", ".js", ".xml", ".ico", ".conf", ".svg", ".png", ".jpg", ".py", ".sh", ".json"] def get_pr_for_commit(sha, ref): try_get_pr_url = f"https://api.github.com/repos/{os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse')}/commits/{sha}/pulls" @@ -28,9 +29,14 @@ def get_pr_for_commit(sha, ref): print("Cannot fetch PR info from commit", ex) return None + +def get_event(): + with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as ef: + return json.load(ef) + + class PRInfo: def __init__(self, github_event, need_orgs=False, need_changed_files=False): - print("EVENT", github_event) if 'pull_request' in github_event: # pull request and other similar events self.number = github_event['number'] if 'after' in github_event: @@ -133,10 +139,53 @@ def has_changes_in_documentation(self): for f in self.changed_files: _, ext = os.path.splitext(f) - if ext in DIFF_IN_DOCUMENTATION_EXT or 'Dockerfile' in f: + path_in_docs = 'docs' in f + path_in_website = 'website' in f + if (ext in DIFF_IN_DOCUMENTATION_EXT and (path_in_docs or path_in_website)) or 'docker/docs' in f: return True return False + def can_skip_builds_and_use_version_from_master(self): + if 'force tests' in self.labels: + return False + + if self.changed_files is None or not self.changed_files: + return False + + for f in self.changed_files: + if (not f.startswith('tests/queries') + or not f.startswith('tests/integration') + or not f.startswith('tests/performance')): + return False + + return True + + def can_skip_integration_tests(self): + if 'force tests' in self.labels: + return False + + if self.changed_files is None or not self.changed_files: + return False + + for f in self.changed_files: + if not f.startswith('tests/queries') or not f.startswith('tests/performance'): + return False + + return True + + def can_skip_functional_tests(self): + if 'force tests' in self.labels: + return False + + if self.changed_files is None or not self.changed_files: + return False + + for f in self.changed_files: + if not f.startswith('tests/integration') or not f.startswith('tests/performance'): + return False + + return True + class FakePRInfo: def __init__(self): diff --git a/tests/ci/push_to_artifactory.py b/tests/ci/push_to_artifactory.py new file mode 100755 index 000000000000..f7ee495b7df1 --- /dev/null +++ b/tests/ci/push_to_artifactory.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import argparse +import logging +import os +import re + +from artifactory import ArtifactorySaaSPath +from build_download_helper import dowload_build_with_progress + + +# Necessary ENV variables +def getenv(name, default=None): + env = os.getenv(name, default) + if env is not None: + return env + raise KeyError(f"Necessary {name} environment is not set") + + +TEMP_PATH = getenv("TEMP_PATH", ".") +# One of the following ENVs is necessary +JFROG_API_KEY = getenv("JFROG_API_KEY", "") +JFROG_TOKEN = getenv("JFROG_TOKEN", "") + + +class Packages: + rpm_arch = dict(all="noarch", amd64="x86_64") + packages = ( + ("clickhouse-client", "all"), + ("clickhouse-common-static", "amd64"), + ("clickhouse-common-static-dbg", "amd64"), + ("clickhouse-server", "all"), + ("clickhouse-test", "all"), + ) + + def __init__(self, version: str): + self.deb = tuple( + "_".join((name, version, arch + ".deb")) for name, arch in self.packages + ) + + rev = "2" + self.rpm = tuple( + "-".join((name, version, rev + "." + self.rpm_arch[arch] + ".rpm")) + for name, arch in self.packages + ) + + def arch(self, deb_pkg: str) -> str: + if deb_pkg not in self.deb: + raise ValueError("{} not in {}".format(deb_pkg, self.deb)) + return deb_pkg.removesuffix(".deb").split("_")[-1] + + @staticmethod + def path(package): + return os.path.join(TEMP_PATH, package) + + +class S3: + template = ( + "https://s3.amazonaws.com/" + # "clickhouse-builds/" + "{bucket_name}/" + # "33333/" or "21.11/" from --release, if pull request is omitted + "{pr}/" + # "2bef313f75e4cacc6ea2ef2133e8849ecf0385ec/" + "{commit}/" + # "package_release/" + "{check_name}/" + # "clickhouse-common-static_21.11.5.0_amd64.deb" + "{package}" + ) + + def __init__( + self, + bucket_name: str, + pr: int, + commit: str, + check_name: str, + version: str, + ): + self._common = dict( + bucket_name=bucket_name, + pr=pr, + commit=commit, + check_name=check_name, + ) + self.packages = Packages(version) + + def download_package(self, package): + url = self.template.format_map({**self._common, "package": package}) + dowload_build_with_progress(url, Packages.path(package)) + + def download_deb(self): + for package in self.packages.deb: + self.download_package(package) + + def download_rpm(self): + for package in self.packages.rpm: + self.download_package(package) + + +class Release: + def __init__(self, name: str) -> str: + r = re.compile(r"^v\d{2}[.]\d+[.]\d+[.]\d+-(testing|prestable|stable|lts)$") + if not r.match(name): + raise argparse.ArgumentTypeError( + "release name does not match " + "v12.1.2.15-(testing|prestable|stable|lts) pattern" + ) + self._name = name + self._version = self._name.removeprefix("v") + self._version = self.version.split("-")[0] + self._version_parts = tuple(self.version.split(".")) + self._type = self._name.split("-")[-1] + + @property + def version(self) -> str: + return self._version + + @property + def version_parts(self) -> str: + return self._version_parts + + @property + def type(self) -> str: + return self._type + + +class Artifactory: + def __init__(self, url: str, release: str, deb_repo="deb", rpm_repo="rpm"): + self._url = url + self._release = release + self._deb_url = "/".join((self._url, deb_repo, "pool", self._release)) + "/" + self._rpm_url = "/".join((self._url, rpm_repo, self._release)) + "/" + # check the credentials ENVs for early exit + self.__path_helper("_deb", "") + + def deploy_deb(self, packages: Packages): + for package in packages.deb: + path = packages.path(package) + dist = self._release + comp = "main" + arch = packages.arch(package) + logging.info( + "Deploy %s(distribution=%s;component=%s;architecture=%s) to artifactory", + path, + dist, + comp, + arch, + ) + self.deb(package).deploy_deb(path, dist, comp, arch) + + def deploy_rpm(self, packages: Packages): + for package in packages.rpm: + path = packages.path(package) + logging.info("Deploy %s to artifactory", path) + self.rpm(package).deploy_file(path) + + def __path_helper(self, name, package) -> ArtifactorySaaSPath: + url = "/".join((getattr(self, name + "_url"), package)) + path = None + if JFROG_API_KEY: + path = ArtifactorySaaSPath(url, apikey=JFROG_API_KEY) + elif JFROG_TOKEN: + path = ArtifactorySaaSPath(url, token=JFROG_TOKEN) + else: + raise KeyError("Neither JFROG_API_KEY nor JFROG_TOKEN env are defined") + return path + + def deb(self, package) -> ArtifactorySaaSPath: + return self.__path_helper("_deb", package) + + def rpm(self, package) -> ArtifactorySaaSPath: + return self.__path_helper("_rpm", package) + + +def commit(name): + r = re.compile(r"^([0-9]|[a-f]){40}$") + if not r.match(name): + raise argparse.ArgumentTypeError( + "commit hash should contain exactly 40 hex characters" + ) + return name + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Program to download artifacts from S3 and push them to " + "artifactory. ENV variables JFROG_API_KEY and JFROG_TOKEN are used " + "for authentication in the given order", + ) + parser.add_argument( + "--release", + required=True, + type=Release, + help="release name, e.g. v12.13.14.15-prestable", + ) + parser.add_argument( + "--pull-request", + type=int, + default=0, + help="pull request number; if PR is omitted, the first two numbers " + "from release will be used, e.g. 12.11", + ) + parser.add_argument( + "--commit", required=True, type=commit, help="commit hash for S3 bucket" + ) + parser.add_argument( + "--bucket-name", + default="clickhouse-builds", + help="AWS S3 bucket name", + ) + parser.add_argument( + "--check-name", + default="package_release", + help="check name, a part of bucket path, " + "will be converted to lower case with spaces->underscore", + ) + parser.add_argument( + "--deb", action="store_true", help="if Debian packages should be processed" + ) + parser.add_argument( + "--rpm", action="store_true", help="if RPM packages should be processed" + ) + parser.add_argument( + "--artifactory-url", default="https://clickhousedb.jfrog.io/artifactory" + ) + + args = parser.parse_args() + if not args.deb and not args.rpm: + parser.error("at least one of --deb and --rpm should be specified") + args.check_name = args.check_name.lower().replace(" ", "_") + if args.pull_request == 0: + args.pull_request = ".".join(args.release.version_parts[:2]) + return args + + +def main(): + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + args = parse_args() + s3 = S3( + args.bucket_name, + args.pull_request, + args.commit, + args.check_name, + args.release.version, + ) + art_client = Artifactory(args.artifactory_url, args.release.type) + if args.deb: + s3.download_deb() + art_client.deploy_deb(s3.packages) + if args.rpm: + s3.download_rpm() + art_client.deploy_rpm(s3.packages) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index c55ef4dd5694..aa4a130902b0 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -2,19 +2,20 @@ # pylint: disable=line-too-long -import subprocess import os import json import logging import sys from github import Github from s3_helper import S3Helper -from pr_info import PRInfo +from pr_info import PRInfo, get_event from get_robot_token import get_best_robot_token, get_parameter_from_ssm from upload_result_helper import upload_results from commit_status_helper import get_commit from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper +from tee_popen import TeePopen NAME = 'PVS Studio (actions)' LICENCE_NAME = 'Free license: ClickHouse, Yandex' @@ -44,13 +45,15 @@ def _process_txt_report(path): repo_path = os.path.join(os.getenv("REPO_COPY", os.path.abspath("../../"))) temp_path = os.path.join(os.getenv("TEMP_PATH")) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - event = json.load(event_file) - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) # this check modify repository so copy it to the temp directory logging.info("Repo copy path %s", repo_path) gh = Github(get_best_robot_token()) + rerun_helper = RerunHelper(gh, pr_info, NAME) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/pvs-test' @@ -70,9 +73,16 @@ def _process_txt_report(path): cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" commit = get_commit(gh, pr_info.sha) - try: - subprocess.check_output(cmd, shell=True) - except: + run_log_path = os.path.join(temp_path, 'run_log.log') + + with TeePopen(cmd, run_log_path) as process: + retcode = process.wait() + if retcode != 0: + logging.info("Run failed") + else: + logging.info("Run Ok") + + if retcode != 0: commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") sys.exit(1) diff --git a/tests/ci/rerun_helper.py b/tests/ci/rerun_helper.py new file mode 100644 index 000000000000..0ba50334d284 --- /dev/null +++ b/tests/ci/rerun_helper.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +from commit_status_helper import get_commit + +def _filter_statuses(statuses): + """ + Squash statuses to latest state + 1. context="first", state="success", update_time=1 + 2. context="second", state="success", update_time=2 + 3. context="first", stat="failure", update_time=3 + =========> + 1. context="second", state="success" + 2. context="first", stat="failure" + """ + filt = {} + for status in sorted(statuses, key=lambda x: x.updated_at): + filt[status.context] = status + return filt.values() + + +class RerunHelper: + + def __init__(self, gh, pr_info, check_name): + self.gh = gh + self.pr_info = pr_info + self.check_name = check_name + self.pygh_commit = get_commit(gh, self.pr_info.sha) + self.statuses = _filter_statuses(self.pygh_commit.get_statuses()) + + def is_already_finished_by_status(self): + # currently we agree even for failed statuses + for status in self.statuses: + if self.check_name in status.context and status.state in ('success', 'failure'): + return True + return False diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 99a99ad30630..3739cf882483 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -1,10 +1,9 @@ #!/usr/bin/env python3 import os -import json import sys import logging from github import Github -from pr_info import PRInfo +from pr_info import PRInfo, get_event from get_robot_token import get_best_robot_token from commit_status_helper import get_commit @@ -37,7 +36,7 @@ "codyrobert", # Flickerbox engineer "damozhaeva", # DOCSUP "den-crane", - "gyuton", # DOCSUP + "flickerbox-tom", # Flickerbox "gyuton", # technical writer, Yandex "hagen1778", # Roman Khavronenko, seasoned contributor "hczhcz", @@ -105,10 +104,8 @@ def should_run_checks_for_pr(pr_info): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - event = json.load(event_file) - pr_info = PRInfo(event, need_orgs=True) + pr_info = PRInfo(get_event(), need_orgs=True) can_run, description = should_run_checks_for_pr(pr_info) gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 82791234f1a7..7c1ee8ad9ee9 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -49,7 +49,9 @@ def _upload_file_to_s3(self, bucket_name, file_path, s3_path): else: logging.info("No content type provied for %s", file_path) else: - if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + is_log = s3_path.endswith("log") or ".log." in s3_path + is_text = s3_path.endswith("txt") or is_log or s3_path.endswith("err") or s3_path.endswith("out") + if not s3_path.endswith('.gz') and (is_text or is_log): logging.info("Going to compress file log file %s to %s", file_path, file_path + ".gz") compress_file_fast(file_path, file_path + ".gz") file_path += ".gz" diff --git a/tests/ci/split_build_smoke_check.py b/tests/ci/split_build_smoke_check.py index 28eb554d90e2..57b4376eb113 100644 --- a/tests/ci/split_build_smoke_check.py +++ b/tests/ci/split_build_smoke_check.py @@ -2,20 +2,21 @@ import os import logging -import json import subprocess +import sys from github import Github from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo +from pr_info import PRInfo, get_event from build_download_helper import download_shared_build from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper DOCKER_IMAGE = "clickhouse/split-build-smoke-test" @@ -63,13 +64,15 @@ def get_run_command(build_path, result_folder, server_log_folder, docker_image): repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) reports_path = os.getenv("REPORTS_PATH", "./reports") - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) gh = Github(get_best_robot_token()) + rerun_helper = RerunHelper(gh, pr_info, CHECK_NAME) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + for root, _, files in os.walk(reports_path): for f in files: if f == 'changed_images.json': diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 4b3adfad23f3..911d370a594b 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -4,24 +4,25 @@ import logging import subprocess import os -import json import sys from github import Github from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo +from pr_info import PRInfo, get_event from build_download_helper import download_all_deb_packages from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper +from tee_popen import TeePopen def get_run_command(build_path, result_folder, server_log_folder, image): - cmd = "docker run -e S3_URL='https://clickhouse-datasets.s3.amazonaws.com' " + \ + cmd = "docker run --cap-add=SYS_PTRACE -e S3_URL='https://clickhouse-datasets.s3.amazonaws.com' " + \ f"--volume={build_path}:/package_folder " \ f"--volume={result_folder}:/test_output " \ f"--volume={server_log_folder}:/var/log/clickhouse-server {image}" @@ -77,13 +78,15 @@ def process_results(result_folder, server_log_path, run_log_path): if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) gh = Github(get_best_robot_token()) + rerun_helper = RerunHelper(gh, pr_info, check_name) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + docker_image = get_image_with_version(reports_path, 'clickhouse/stress-test') packages_path = os.path.join(temp_path, "packages") @@ -105,13 +108,12 @@ def process_results(result_folder, server_log_path, run_log_path): run_command = get_run_command(packages_path, result_path, server_log_path, docker_image) logging.info("Going to run func tests: %s", run_command) - with open(run_log_path, 'w', encoding='utf-8') as log: - with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - else: - logging.info("Run failed") + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 8e11b2958277..72ec58144a0f 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -3,17 +3,18 @@ import subprocess import os import csv -import json +import sys + from github import Github from s3_helper import S3Helper -from pr_info import PRInfo +from pr_info import PRInfo, get_event from get_robot_token import get_best_robot_token from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch - +from rerun_helper import RerunHelper NAME = "Style Check (actions)" @@ -27,10 +28,14 @@ def process_result(result_folder): test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] additional_files = [os.path.join(result_folder, f) for f in test_files] + status = [] status_path = os.path.join(result_folder, "check_status.tsv") - logging.info("Found test_results.tsv") - status = list(csv.reader(open(status_path, 'r'), delimiter='\t')) + if os.path.exists(status_path): + logging.info("Found test_results.tsv") + with open(status_path, 'r', encoding='utf-8') as status_file: + status = list(csv.reader(status_file, delimiter='\t')) if len(status) != 1 or len(status[0]) != 2: + logging.info("Files in result folder %s", os.listdir(result_folder)) return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] @@ -46,6 +51,7 @@ def process_result(result_folder): state, description = "error", "Failed to read test_results.tsv" return state, description, test_results, additional_files + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) @@ -54,15 +60,18 @@ def process_result(result_folder): repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - event = json.load(event_file) - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) + + gh = Github(get_best_robot_token()) + + rerun_helper = RerunHelper(gh, pr_info, NAME) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) if not os.path.exists(temp_path): os.makedirs(temp_path) - gh = Github(get_best_robot_token()) - docker_image = get_image_with_version(temp_path, 'clickhouse/style-test') s3_helper = S3Helper('https://s3.amazonaws.com') diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py new file mode 100644 index 000000000000..cbb915e6de74 --- /dev/null +++ b/tests/ci/tee_popen.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 + +from subprocess import Popen, PIPE, STDOUT +import sys +import os + + +# Very simple tee logic implementation. You can specify shell command, output +# logfile and env variables. After TeePopen is created you can only wait until +# it finishes. stderr and stdout will be redirected both to specified file and +# stdout. +class TeePopen: + # pylint: disable=W0102 + def __init__(self, command, log_file, env=os.environ.copy()): + self.command = command + self.log_file = log_file + self.env = env + + def __enter__(self): + # pylint: disable=W0201 + self.process = Popen(self.command, shell=True, universal_newlines=True, env=self.env, stderr=STDOUT, stdout=PIPE, bufsize=1) + self.log_file = open(self.log_file, 'w', encoding='utf-8') + return self + + def __exit__(self, t, value, traceback): + for line in self.process.stdout: + sys.stdout.write(line) + self.log_file.write(line) + + self.process.wait() + self.log_file.close() + + def wait(self): + for line in self.process.stdout: + sys.stdout.write(line) + self.log_file.write(line) + + return self.process.wait() diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py index cd7d51ae8eb5..5de3d1531f2c 100644 --- a/tests/ci/termination_lambda/app.py +++ b/tests/ci/termination_lambda/app.py @@ -139,7 +139,7 @@ def delete_runner(access_token, runner): response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) response.raise_for_status() - print(f"Response code deleting {runner.name} is {response.status_code}") + print(f"Response code deleting {runner.name} with id {runner.id} is {response.status_code}") return response.status_code == 204 @@ -197,7 +197,7 @@ def main(github_secret_key, github_app_id, event): print("Going to delete runners:", ', '.join([runner.name for runner in to_delete_runners])) for runner in to_delete_runners: if delete_runner(access_token, runner): - print(f"Runner {runner.name} successfuly deleted from github") + print(f"Runner with name {runner.name} and id {runner.id} successfuly deleted from github") instances_to_kill.append(runner.name) else: print(f"Cannot delete {runner.name} from github") diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index 21aa63e3b19c..abccbcd4512d 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -4,19 +4,20 @@ import os import sys import subprocess -import json from github import Github from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo +from pr_info import PRInfo, get_event from build_download_helper import download_unit_tests from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch +from rerun_helper import RerunHelper +from tee_popen import TeePopen IMAGE_NAME = 'clickhouse/unit-test' @@ -102,13 +103,15 @@ def process_result(result_folder): if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file: - event = json.load(event_file) - - pr_info = PRInfo(event) + pr_info = PRInfo(get_event()) gh = Github(get_best_robot_token()) + rerun_helper = RerunHelper(gh, pr_info, check_name) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + docker_image = get_image_with_version(reports_path, IMAGE_NAME) download_unit_tests(check_name, reports_path, temp_path) @@ -126,13 +129,12 @@ def process_result(result_folder): logging.info("Going to run func tests: %s", run_command) - with open(run_log_path, 'w', encoding='utf-8') as log: - with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - else: - logging.info("Run failed") + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) diff --git a/tests/ci/worker/init_builder.sh b/tests/ci/worker/init_builder.sh index dc3f777bccaa..8fd00c1db0a6 100644 --- a/tests/ci/worker/init_builder.sh +++ b/tests/ci/worker/init_builder.sh @@ -1,20 +1,34 @@ #!/usr/bin/env bash -set -euo pipefail +set -uo pipefail echo "Running init script" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner -echo "Receiving token" -export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` export RUNNER_URL="https://github.com/ClickHouse" # Funny fact, but metadata service has fixed IP export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` -cd $RUNNER_HOME +while true; do + runner_pid=`pgrep run.sh` + echo "Got runner pid $runner_pid" -echo "Going to configure runner" -sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,builder' --work _work + cd $RUNNER_HOME + if [ -z "$runner_pid" ]; then + echo "Receiving token" + RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` -echo "Run" -sudo -u ubuntu ./run.sh + echo "Will try to remove runner" + sudo -u ubuntu ./config.sh remove --token $RUNNER_TOKEN ||: + + echo "Going to configure runner" + sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,builder' --work _work + + echo "Run" + sudo -u ubuntu ./run.sh & + sleep 15 + else + echo "Runner is working with pid $runner_pid, nothing to do" + sleep 10 + fi +done diff --git a/tests/ci/worker/init_func_tester.sh b/tests/ci/worker/init_func_tester.sh index b117f11556d7..d3ee3cb3d7fb 100644 --- a/tests/ci/worker/init_func_tester.sh +++ b/tests/ci/worker/init_func_tester.sh @@ -1,20 +1,34 @@ #!/usr/bin/env bash -set -euo pipefail +set -uo pipefail echo "Running init script" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner -echo "Receiving token" -export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` export RUNNER_URL="https://github.com/ClickHouse" # Funny fact, but metadata service has fixed IP export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` -cd $RUNNER_HOME +while true; do + runner_pid=`pgrep run.sh` + echo "Got runner pid $runner_pid" -echo "Going to configure runner" -sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,func-tester' --work _work + cd $RUNNER_HOME + if [ -z "$runner_pid" ]; then + echo "Receiving token" + RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` -echo "Run" -sudo -u ubuntu ./run.sh + echo "Will try to remove runner" + sudo -u ubuntu ./config.sh remove --token $RUNNER_TOKEN ||: + + echo "Going to configure runner" + sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,func-tester' --work _work + + echo "Run" + sudo -u ubuntu ./run.sh & + sleep 15 + else + echo "Runner is working with pid $runner_pid, nothing to do" + sleep 10 + fi +done diff --git a/tests/ci/worker/init_fuzzer_unit_tester.sh b/tests/ci/worker/init_fuzzer_unit_tester.sh new file mode 100644 index 000000000000..2fbedba9e40b --- /dev/null +++ b/tests/ci/worker/init_fuzzer_unit_tester.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -uo pipefail + +echo "Running init script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_HOME=/home/ubuntu/actions-runner + +export RUNNER_URL="https://github.com/ClickHouse" +# Funny fact, but metadata service has fixed IP +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` + +while true; do + runner_pid=`pgrep run.sh` + echo "Got runner pid $runner_pid" + + cd $RUNNER_HOME + if [ -z "$runner_pid" ]; then + echo "Receiving token" + RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` + + echo "Will try to remove runner" + sudo -u ubuntu ./config.sh remove --token $RUNNER_TOKEN ||: + + echo "Going to configure runner" + sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,fuzzer-unit-tester' --work _work + + echo "Run" + sudo -u ubuntu ./run.sh & + sleep 15 + else + echo "Runner is working with pid $runner_pid, nothing to do" + sleep 10 + fi +done diff --git a/tests/ci/worker/init_stress_tester.sh b/tests/ci/worker/init_stress_tester.sh index 54ed944b2749..234f035e1eaf 100644 --- a/tests/ci/worker/init_stress_tester.sh +++ b/tests/ci/worker/init_stress_tester.sh @@ -1,20 +1,34 @@ #!/usr/bin/env bash -set -euo pipefail +set -uo pipefail echo "Running init script" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner -echo "Receiving token" -export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` export RUNNER_URL="https://github.com/ClickHouse" # Funny fact, but metadata service has fixed IP export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` -cd $RUNNER_HOME +while true; do + runner_pid=`pgrep run.sh` + echo "Got runner pid $runner_pid" -echo "Going to configure runner" -sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,stress-tester' --work _work + cd $RUNNER_HOME + if [ -z "$runner_pid" ]; then + echo "Receiving token" + RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` -echo "Run" -sudo -u ubuntu ./run.sh + echo "Will try to remove runner" + sudo -u ubuntu ./config.sh remove --token $RUNNER_TOKEN ||: + + echo "Going to configure runner" + sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,stress-tester' --work _work + + echo "Run" + sudo -u ubuntu ./run.sh & + sleep 15 + else + echo "Runner is working with pid $runner_pid, nothing to do" + sleep 10 + fi +done diff --git a/tests/ci/workflow_approve_rerun_lambda/Dockerfile b/tests/ci/workflow_approve_rerun_lambda/Dockerfile new file mode 100644 index 000000000000..f53be71a8931 --- /dev/null +++ b/tests/ci/workflow_approve_rerun_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py new file mode 100644 index 000000000000..8c54414b63b3 --- /dev/null +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python3 + +import json +import time +import fnmatch +from collections import namedtuple +import jwt + +import requests +import boto3 + +API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse' + +SUSPICIOUS_CHANGED_FILES_NUMBER = 200 + +SUSPICIOUS_PATTERNS = [ + "tests/ci/*", + "docs/tools/*", + ".github/*", + "utils/release/*", + "docker/*", + "release", +] + +MAX_RETRY = 5 +MAX_WORKFLOW_RERUN = 5 + +WorkflowDescription = namedtuple('WorkflowDescription', + ['name', 'action', 'run_id', 'event', 'workflow_id', 'conclusion', 'status', 'api_url', + 'fork_owner_login', 'fork_branch', 'rerun_url', 'jobs_url', 'attempt', 'url']) + +TRUSTED_WORKFLOW_IDS = { + 14586616, # Cancel workflows, always trusted +} + +TRUSTED_ORG_IDS = { + 7409213, # yandex + 28471076, # altinity + 54801242, # clickhouse +} + +NEED_RERUN_WORKFLOWS = { + 13241696, # PR + 15834118, # Docs + 15522500, # MasterCI +} + +# Individual trusted contirbutors who are not in any trusted organization. +# Can be changed in runtime: we will append users that we learned to be in +# a trusted org, to save GitHub API calls. +TRUSTED_CONTRIBUTORS = { + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloundFlare + "BohuTANG", + "damozhaeva", # DOCSUP + "den-crane", + "gyuton", # DOCSUP + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasfl", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober" # Developer of YT +} + + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + + +def is_trusted_contributor(pr_user_login, pr_user_orgs): + if pr_user_login in TRUSTED_CONTRIBUTORS: + print(f"User '{pr_user_login}' is trusted") + return True + + print(f"User '{pr_user_login}' is not trusted") + + for org_id in pr_user_orgs: + if org_id in TRUSTED_ORG_IDS: + print(f"Org '{org_id}' is trusted; will mark user {pr_user_login} as trusted") + return True + print(f"Org '{org_id}' is not trusted") + + return False + +def _exec_get_with_retry(url): + for i in range(MAX_RETRY): + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute GET request with retries") + +def _exec_post_with_retry(url, token, data=None): + headers = { + "Authorization": f"token {token}" + } + for i in range(MAX_RETRY): + try: + if data: + response = requests.post(url, headers=headers, json=data) + else: + response = requests.post(url, headers=headers) + if response.status_code == 403: + data = response.json() + if 'message' in data and data['message'] == 'This workflow run is not waiting for approval': + print("Workflow doesn't need approval") + return data + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute POST request with retry") + +def _get_pull_requests_from(owner, branch): + url = f"{API_URL}/pulls?head={owner}:{branch}" + return _exec_get_with_retry(url) + +def get_workflow_description_from_event(event): + action = event['action'] + run_id = event['workflow_run']['id'] + event_type = event['workflow_run']['event'] + fork_owner = event['workflow_run']['head_repository']['owner']['login'] + fork_branch = event['workflow_run']['head_branch'] + name = event['workflow_run']['name'] + workflow_id = event['workflow_run']['workflow_id'] + conclusion = event['workflow_run']['conclusion'] + attempt = event['workflow_run']['run_attempt'] + status = event['workflow_run']['status'] + jobs_url = event['workflow_run']['jobs_url'] + rerun_url = event['workflow_run']['rerun_url'] + url = event['workflow_run']['html_url'] + api_url = event['workflow_run']['url'] + return WorkflowDescription( + name=name, + action=action, + run_id=run_id, + event=event_type, + fork_owner_login=fork_owner, + fork_branch=fork_branch, + workflow_id=workflow_id, + conclusion=conclusion, + attempt=attempt, + status=status, + jobs_url=jobs_url, + rerun_url=rerun_url, + url=url, + api_url=api_url + ) + +def get_pr_author_and_orgs(pull_request): + author = pull_request['user']['login'] + orgs = _exec_get_with_retry(pull_request['user']['organizations_url']) + return author, [org['id'] for org in orgs] + +def get_changed_files_for_pull_request(pull_request): + number = pull_request['number'] + + changed_files = set([]) + for i in range(1, 31): + print("Requesting changed files page", i) + url = f"{API_URL}/pulls/{number}/files?page={i}&per_page=100" + data = _exec_get_with_retry(url) + print(f"Got {len(data)} changed files") + if len(data) == 0: + print("No more changed files") + break + + for change in data: + #print("Adding changed file", change['filename']) + changed_files.add(change['filename']) + + if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: + print(f"More than {len(changed_files)} changed files. Will stop fetching new files.") + break + + return changed_files + +def check_suspicious_changed_files(changed_files): + if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: + print(f"Too many files changed {len(changed_files)}, need manual approve") + return True + + for path in changed_files: + for pattern in SUSPICIOUS_PATTERNS: + if fnmatch.fnmatch(path, pattern): + print(f"File {path} match suspicious pattern {pattern}, will not approve automatically") + return True + + print("No changed files match suspicious patterns, run will be approved") + return False + +def approve_run(run_id, token): + url = f"{API_URL}/actions/runs/{run_id}/approve" + _exec_post_with_retry(url, token) + +def label_manual_approve(pull_request, token): + number = pull_request['number'] + url = f"{API_URL}/issues/{number}/labels" + data = {"labels" : "manual approve"} + + _exec_post_with_retry(url, token, data) + +def get_token_from_aws(): + private_key, app_id = get_key_and_app_from_aws() + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": app_id, + } + + encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + return get_access_token(encoded_jwt, installation_id) + +def get_workflow_jobs(workflow_description): + jobs_url = workflow_description.api_url + f"/attempts/{workflow_description.attempt}/jobs" + jobs = [] + i = 1 + while True: + got_jobs = _exec_get_with_retry(jobs_url + f"?page={i}") + if len(got_jobs['jobs']) == 0: + break + + jobs += got_jobs['jobs'] + i += 1 + + return jobs + +def check_need_to_rerun(workflow_description): + if workflow_description.attempt >= MAX_WORKFLOW_RERUN: + print("Not going to rerun workflow because it's already tried more than two times") + return False + print("Going to check jobs") + + jobs = get_workflow_jobs(workflow_description) + print("Got jobs", len(jobs)) + for job in jobs: + if job['conclusion'] not in ('success', 'skipped'): + print("Job", job['name'], "failed, checking steps") + for step in job['steps']: + # always the last job + if step['name'] == 'Complete job': + print("Found Complete job step for job", job['name']) + break + else: + print("Checked all steps and doesn't found Complete job, going to rerun") + return True + + return False + +def rerun_workflow(workflow_description, token): + print("Going to rerun workflow") + _exec_post_with_retry(workflow_description.rerun_url, token) + +def main(event): + token = get_token_from_aws() + event_data = json.loads(event['body']) + workflow_description = get_workflow_description_from_event(event_data) + + print("Got workflow description", workflow_description) + if workflow_description.action == 'completed' and workflow_description.conclusion == 'failure': + print("Workflow", workflow_description.url, "completed and failed, let's check for rerun") + + if workflow_description.workflow_id not in NEED_RERUN_WORKFLOWS: + print("Workflow", workflow_description.workflow_id, "not in list of rerunable workflows") + return + + if check_need_to_rerun(workflow_description): + rerun_workflow(workflow_description, token) + return + + if workflow_description.action != "requested": + print("Exiting, event action is", workflow_description.action) + return + + if workflow_description.workflow_id in TRUSTED_WORKFLOW_IDS: + print("Workflow in trusted list, approving run") + approve_run(workflow_description.run_id, token) + return + + pull_requests = _get_pull_requests_from(workflow_description.fork_owner_login, workflow_description.fork_branch) + + print("Got pull requests for workflow", len(pull_requests)) + if len(pull_requests) > 1: + raise Exception("Received more than one PR for workflow run") + + if len(pull_requests) < 1: + raise Exception("Cannot find any pull requests for workflow run") + + pull_request = pull_requests[0] + print("Pull request for workflow number", pull_request['number']) + + author, author_orgs = get_pr_author_and_orgs(pull_request) + if is_trusted_contributor(author, author_orgs): + print("Contributor is trusted, approving run") + approve_run(workflow_description.run_id, token) + return + + changed_files = get_changed_files_for_pull_request(pull_request) + print(f"Totally have {len(changed_files)} changed files in PR:", changed_files) + if check_suspicious_changed_files(changed_files): + print(f"Pull Request {pull_request['number']} has suspicious changes, label it for manuall approve") + label_manual_approve(pull_request, token) + else: + print(f"Pull Request {pull_request['number']} has no suspicious changes") + approve_run(workflow_description.run_id, token) + +def handler(event, _): + main(event) diff --git a/tests/ci/workflow_approve_rerun_lambda/requirements.txt b/tests/ci/workflow_approve_rerun_lambda/requirements.txt new file mode 100644 index 000000000000..c0dcf4a4dde7 --- /dev/null +++ b/tests/ci/workflow_approve_rerun_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography From 27ab11f3d74268501d69669e34454d9d8fcfff5a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 13 Dec 2021 15:01:55 +0300 Subject: [PATCH 243/472] Split tests for release branches --- .github/workflows/backport_branches.yml | 75 +++- .github/workflows/release_branches.yml | 458 +++++++++++++++++++++++- 2 files changed, 512 insertions(+), 21 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 859756f07af1..f990bc439a49 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -235,7 +235,7 @@ jobs: ############################################################################################## ########################### FUNCTIONAl STATELESS TESTS ####################################### ############################################################################################## - FunctionalStatelessTestAsan: + FunctionalStatelessTestAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] steps: @@ -252,6 +252,39 @@ jobs: CHECK_NAME: 'Stateless tests (address, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (address, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -330,7 +363,37 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsRelease: + IntegrationTestsRelease0: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: @@ -346,6 +409,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -362,10 +427,12 @@ jobs: needs: - DockerHubPush - BuilderReport - - FunctionalStatelessTestAsan + - FunctionalStatelessTestAsan0 + - FunctionalStatelessTestAsan1 - FunctionalStatefulTestDebug - StressTestTsan - - IntegrationTestsRelease + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 4489585541bf..0cdcc5260fa0 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -345,7 +345,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestAsan: + FunctionalStatelessTestAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] steps: @@ -362,6 +362,8 @@ jobs: CHECK_NAME: 'Stateless tests (address, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -374,7 +376,38 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan: + FunctionalStatelessTestAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (address, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -391,6 +424,70 @@ jobs: CHECK_NAME: 'Stateless tests (thread, actions)' REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -432,7 +529,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan: + FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -449,6 +546,8 @@ jobs: CHECK_NAME: 'Stateless tests (memory, actions)' REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -461,7 +560,69 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug: + FunctionalStatelessTestMsan1: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan2: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -478,6 +639,70 @@ jobs: CHECK_NAME: 'Stateless tests (debug, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -813,8 +1038,68 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan: - needs: [BuilderDebAsan, FunctionalStatelessTestAsan] + IntegrationTestsAsan0: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan2: + needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -829,6 +1114,98 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (asan, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan0: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -841,8 +1218,8 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan: - needs: [BuilderDebTsan, FunctionalStatelessTestTsan] + IntegrationTestsTsan3: + needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -857,6 +1234,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (thread, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 3 + RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -869,8 +1248,38 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + IntegrationTestsRelease0: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -885,6 +1294,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -901,11 +1312,18 @@ jobs: needs: - DockerHubPush - BuilderReport - - FunctionalStatelessTestDebug + - FunctionalStatelessTestDebug0 + - FunctionalStatelessTestDebug1 + - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan + - FunctionalStatelessTestAsan0 + - FunctionalStatelessTestAsan1 + - FunctionalStatelessTestTsan0 + - FunctionalStatelessTestTsan1 + - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestMsan0 + - FunctionalStatelessTestMsan1 + - FunctionalStatelessTestMsan2 - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease @@ -918,9 +1336,15 @@ jobs: - StressTestTsan - StressTestMsan - StressTestUBsan - - IntegrationTestsAsan - - IntegrationTestsRelease - - IntegrationTestsTsan + - IntegrationTestsAsan0 + - IntegrationTestsAsan1 + - IntegrationTestsAsan2 + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 + - IntegrationTestsTsan0 + - IntegrationTestsTsan1 + - IntegrationTestsTsan2 + - IntegrationTestsTsan3 - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: From 6e38e67a078a940d0d3b485e2a6a87d937369136 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 13 Dec 2021 13:16:59 +0000 Subject: [PATCH 244/472] Backport #31656 to 21.9: Fix use quota bug --- src/Access/EnabledQuota.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index 2945a205c182..f4ebd6f8546f 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -65,9 +65,7 @@ struct EnabledQuota::Impl end = end + duration * n; if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) { - /// We reset counters only if the interval's end has been calculated before. - /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. - need_reset_counters = (end_loaded.count() != 0); + need_reset_counters = true; break; } end = std::chrono::system_clock::time_point{end_loaded}; @@ -93,10 +91,18 @@ struct EnabledQuota::Impl { for (const auto & interval : intervals.intervals) { + if (!interval.end_of_interval.load().count()) + { + /// We need to calculate end of the interval if it hasn't been calculated before. + bool dummy; + getEndOfInterval(interval, current_time, dummy); + } + ResourceAmount used = (interval.used[resource_type] += amount); ResourceAmount max = interval.max[resource_type]; if (!max) continue; + if (used > max) { bool counters_were_reset = false; @@ -121,10 +127,18 @@ struct EnabledQuota::Impl { for (const auto & interval : intervals.intervals) { + if (!interval.end_of_interval.load().count()) + { + /// We need to calculate end of the interval if it hasn't been calculated before. + bool dummy; + getEndOfInterval(interval, current_time, dummy); + } + ResourceAmount used = interval.used[resource_type]; ResourceAmount max = interval.max[resource_type]; if (!max) continue; + if (used > max) { bool counters_were_reset = false; From 1d91918a217e7c39477944ffccdcba5ce49d4695 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 30 Nov 2021 05:49:03 +0300 Subject: [PATCH 245/472] Merge pull request #31802 from pkit/fix_setcap fix setcap in docker (cherry picked from commit 42787cfa60b610f5a9f254e82df78c61371aff63) --- programs/install/Install.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 8013b6cf991d..56a15bc5c841 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -676,12 +676,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv) #if defined(__linux__) fmt::print("Setting capabilities for clickhouse binary. This is optional.\n"); std::string command = fmt::format("command -v setcap >/dev/null" - " && echo > {0} && chmod a+x {0} && {0} && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0} && {0} && rm {0}" - " && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {1}" + " && command -v capsh >/dev/null" + " && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice+ep >/dev/null 2>&1" + " && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0}" " || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary." " This is optional. Taskstats accounting will be disabled." " To enable taskstats accounting you may add the required capability later manually.\"", - "/tmp/test_setcap.sh", fs::canonical(main_bin_path).string()); + fs::canonical(main_bin_path).string()); executeScript(command); #endif From a8cd6fd67783c08f27538095c582209aff4121a8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 13 Dec 2021 16:24:28 +0000 Subject: [PATCH 246/472] Backport #32359 to 21.9: Fix usage of non-materialized skip indexes --- src/Storages/MergeTree/MergeTreeIndices.h | 6 ++++-- .../02131_skip_index_not_materialized.reference | 1 + .../02131_skip_index_not_materialized.sql | 12 ++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02131_skip_index_not_materialized.reference create mode 100644 tests/queries/0_stateless/02131_skip_index_not_materialized.sql diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 557af891b746..768630ddbcbb 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -113,9 +113,11 @@ struct IMergeTreeIndex /// Returns extension for deserialization. /// /// Return pair. - virtual MergeTreeIndexFormat getDeserializedFormat(const DiskPtr, const std::string & /* relative_path_prefix */) const + virtual MergeTreeIndexFormat getDeserializedFormat(const DiskPtr disk, const std::string & relative_path_prefix) const { - return {1, ".idx"}; + if (disk->exists(relative_path_prefix + ".idx")) + return {1, ".idx"}; + return {0 /*unknown*/, ""}; } /// Checks whether the column is in data skipping index. diff --git a/tests/queries/0_stateless/02131_skip_index_not_materialized.reference b/tests/queries/0_stateless/02131_skip_index_not_materialized.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/02131_skip_index_not_materialized.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02131_skip_index_not_materialized.sql b/tests/queries/0_stateless/02131_skip_index_not_materialized.sql new file mode 100644 index 000000000000..cae0b1d9fb39 --- /dev/null +++ b/tests/queries/0_stateless/02131_skip_index_not_materialized.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t_index_non_materialized; + +CREATE TABLE t_index_non_materialized (a UInt32) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_index_non_materialized VALUES (1); + +ALTER TABLE t_index_non_materialized ADD INDEX ind_set (a) TYPE set(1) GRANULARITY 1; +ALTER TABLE t_index_non_materialized ADD INDEX ind_minmax (a) TYPE minmax() GRANULARITY 1; + +SELECT count() FROM t_index_non_materialized WHERE a = 1; + +DROP TABLE t_index_non_materialized; From 73179b3f47c288c29a35c2bad7d67dc1aa17f642 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 13 Dec 2021 16:29:03 +0000 Subject: [PATCH 247/472] Backport #27822 to 21.9: Fix data race in ProtobufSchemas --- src/Formats/ProtobufSchemas.cpp | 1 + src/Formats/ProtobufSchemas.h | 2 + .../message_with_repeated.proto | 19 ++ .../message_with_repeated_pb2.py | 180 ++++++++++++++++++ tests/integration/test_storage_kafka/test.py | 119 ++++++++++++ 5 files changed, 321 insertions(+) create mode 100644 tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto create mode 100644 tests/integration/test_storage_kafka/message_with_repeated_pb2.py diff --git a/src/Formats/ProtobufSchemas.cpp b/src/Formats/ProtobufSchemas.cpp index 9c6ed76ef275..6d1d48158a5d 100644 --- a/src/Formats/ProtobufSchemas.cpp +++ b/src/Formats/ProtobufSchemas.cpp @@ -73,6 +73,7 @@ ProtobufSchemas::~ProtobufSchemas() = default; const google::protobuf::Descriptor * ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info) { + std::lock_guard lock(mutex); auto it = importers.find(info.schemaDirectory()); if (it == importers.end()) it = importers.emplace(info.schemaDirectory(), std::make_unique(info.schemaDirectory())).first; diff --git a/src/Formats/ProtobufSchemas.h b/src/Formats/ProtobufSchemas.h index 05778a853438..e21bffe952c4 100644 --- a/src/Formats/ProtobufSchemas.h +++ b/src/Formats/ProtobufSchemas.h @@ -4,6 +4,7 @@ #if USE_PROTOBUF #include +#include #include #include #include @@ -39,6 +40,7 @@ class ProtobufSchemas : private boost::noncopyable private: class ImporterWithSourceTree; std::unordered_map> importers; + std::mutex mutex; }; } diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto new file mode 100644 index 000000000000..791a5086866b --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto @@ -0,0 +1,19 @@ +syntax = "proto3"; +option optimize_for = SPEED; +message Message { + uint32 tnow = 1; + string server = 2; + string clien = 3; + uint32 sPort = 4; + uint32 cPort = 5; + repeated dd r = 6; + string method = 7; +} + +message dd { + string name = 1; + uint32 class = 2; + uint32 type = 3; + uint64 ttl = 4; + bytes data = 5; +} \ No newline at end of file diff --git a/tests/integration/test_storage_kafka/message_with_repeated_pb2.py b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py new file mode 100644 index 000000000000..69702307e7fd --- /dev/null +++ b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py @@ -0,0 +1,180 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: clickhouse_path/format_schemas/message_with_repeated.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='clickhouse_path/format_schemas/message_with_repeated.proto', + package='', + syntax='proto3', + serialized_options=_b('H\001'), + serialized_pb=_b('\n:clickhouse_path/format_schemas/message_with_repeated.proto\"t\n\x07Message\x12\x0c\n\x04tnow\x18\x01 \x01(\r\x12\x0e\n\x06server\x18\x02 \x01(\t\x12\r\n\x05\x63lien\x18\x03 \x01(\t\x12\r\n\x05sPort\x18\x04 \x01(\r\x12\r\n\x05\x63Port\x18\x05 \x01(\r\x12\x0e\n\x01r\x18\x06 \x03(\x0b\x32\x03.dd\x12\x0e\n\x06method\x18\x07 \x01(\t\"J\n\x02\x64\x64\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05\x63lass\x18\x02 \x01(\r\x12\x0c\n\x04type\x18\x03 \x01(\r\x12\x0b\n\x03ttl\x18\x04 \x01(\x04\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\x42\x02H\x01\x62\x06proto3') +) + + + + +_MESSAGE = _descriptor.Descriptor( + name='Message', + full_name='Message', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='tnow', full_name='Message.tnow', index=0, + number=1, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='server', full_name='Message.server', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='clien', full_name='Message.clien', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='sPort', full_name='Message.sPort', index=3, + number=4, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='cPort', full_name='Message.cPort', index=4, + number=5, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='r', full_name='Message.r', index=5, + number=6, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='method', full_name='Message.method', index=6, + number=7, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=62, + serialized_end=178, +) + + +_DD = _descriptor.Descriptor( + name='dd', + full_name='dd', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='dd.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='class', full_name='dd.class', index=1, + number=2, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='type', full_name='dd.type', index=2, + number=3, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='ttl', full_name='dd.ttl', index=3, + number=4, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='data', full_name='dd.data', index=4, + number=5, type=12, cpp_type=9, label=1, + has_default_value=False, default_value=_b(""), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=180, + serialized_end=254, +) + +_MESSAGE.fields_by_name['r'].message_type = _DD +DESCRIPTOR.message_types_by_name['Message'] = _MESSAGE +DESCRIPTOR.message_types_by_name['dd'] = _DD +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Message = _reflection.GeneratedProtocolMessageType('Message', (_message.Message,), dict( + DESCRIPTOR = _MESSAGE, + __module__ = 'clickhouse_path.format_schemas.message_with_repeated_pb2' + # @@protoc_insertion_point(class_scope:Message) + )) +_sym_db.RegisterMessage(Message) + +dd = _reflection.GeneratedProtocolMessageType('dd', (_message.Message,), dict( + DESCRIPTOR = _DD, + __module__ = 'clickhouse_path.format_schemas.message_with_repeated_pb2' + # @@protoc_insertion_point(class_scope:dd) + )) +_sym_db.RegisterMessage(dd) + + +DESCRIPTOR._options = None +# @@protoc_insertion_point(module_scope) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 21d6c7c10ab6..d0487ff0c7e0 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -35,6 +35,7 @@ from . import kafka_pb2 from . import social_pb2 +from . import message_with_repeated_pb2 # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. @@ -3040,6 +3041,124 @@ def test_kafka_consumer_failover(kafka_cluster): prev_count = wait_for_new_data('test.destination', prev_count) +# https://github.com/ClickHouse/ClickHouse/issues/26643 +def test_issue26643(kafka_cluster): + + # for backporting: + # admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") + admin_client = KafkaAdminClient(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port)) + producer = KafkaProducer(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer) + + topic_list = [] + topic_list.append(NewTopic(name="test_issue26643", num_partitions=4, replication_factor=1)) + admin_client.create_topics(new_topics=topic_list, validate_only=False) + + msg = message_with_repeated_pb2.Message( + tnow=1629000000, + server='server1', + clien='host1', + sPort=443, + cPort=50000, + r=[ + message_with_repeated_pb2.dd(name='1', type=444, ttl=123123, data=b'adsfasd'), + message_with_repeated_pb2.dd(name='2') + ], + method='GET' + ) + + data = b'' + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + + msg = message_with_repeated_pb2.Message( + tnow=1629000002 + ) + + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + + producer.send(topic="test_issue26643", value=data) + + data = _VarintBytes(len(serialized_msg)) + serialized_msg + producer.send(topic="test_issue26643", value=data) + producer.flush() + + instance.query(''' + CREATE TABLE IF NOT EXISTS test.test_queue + ( + `tnow` UInt32, + `server` String, + `client` String, + `sPort` UInt16, + `cPort` UInt16, + `r.name` Array(String), + `r.class` Array(UInt16), + `r.type` Array(UInt16), + `r.ttl` Array(UInt32), + `r.data` Array(String), + `method` String + ) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'test_issue26643', + kafka_group_name = 'test_issue26643_group', + kafka_format = 'Protobuf', + kafka_schema = 'message_with_repeated.proto:Message', + kafka_num_consumers = 4, + kafka_skip_broken_messages = 10000; + + SET allow_suspicious_low_cardinality_types=1; + + CREATE TABLE test.log + ( + `tnow` DateTime CODEC(DoubleDelta, LZ4), + `server` LowCardinality(String), + `client` LowCardinality(String), + `sPort` LowCardinality(UInt16), + `cPort` UInt16 CODEC(T64, LZ4), + `r.name` Array(String), + `r.class` Array(LowCardinality(UInt16)), + `r.type` Array(LowCardinality(UInt16)), + `r.ttl` Array(LowCardinality(UInt32)), + `r.data` Array(String), + `method` LowCardinality(String) + ) + ENGINE = MergeTree + PARTITION BY toYYYYMMDD(tnow) + ORDER BY (tnow, server) + TTL toDate(tnow) + toIntervalMonth(1000) + SETTINGS index_granularity = 16384, merge_with_ttl_timeout = 7200; + + CREATE MATERIALIZED VIEW test.test_consumer TO test.log AS + SELECT + toDateTime(a.tnow) AS tnow, + a.server AS server, + a.client AS client, + a.sPort AS sPort, + a.cPort AS cPort, + a.`r.name` AS `r.name`, + a.`r.class` AS `r.class`, + a.`r.type` AS `r.type`, + a.`r.ttl` AS `r.ttl`, + a.`r.data` AS `r.data`, + a.method AS method + FROM test.test_queue AS a; + ''') + + instance.wait_for_log_line("Committed offset") + result = instance.query('SELECT * FROM test.log') + + expected = '''\ +2021-08-15 07:00:00 server1 443 50000 ['1','2'] [0,0] [444,0] [123123,0] ['adsfasd',''] GET +2021-08-15 07:00:02 0 0 [] [] [] [] [] +2021-08-15 07:00:02 0 0 [] [] [] [] [] +''' + assert TSV(result) == TSV(expected) + + # kafka_cluster.open_bash_shell('instance') + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From 06026d9a7e7924e29d32e4711c89b8e6591e7274 Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Tue, 14 Dec 2021 10:49:22 +0800 Subject: [PATCH 248/472] add SparkColumnToCHColumn.h --- .../local-engine/Parser/CHColumnToSparkRow.h | 8 +- .../Parser/SerializedPlanParser.cpp | 4 + .../Parser/SerializedPlanParser.h | 2 + .../Parser/SparkColumnToCHColumn.cpp | 117 ++++++++++++++ .../Parser/SparkColumnToCHColumn.h | 148 ++++++++++++++++++ .../local-engine/tests/gtest_local_engine.cpp | 5 + 6 files changed, 282 insertions(+), 2 deletions(-) create mode 100644 utils/local-engine/Parser/SparkColumnToCHColumn.cpp create mode 100644 utils/local-engine/Parser/SparkColumnToCHColumn.h diff --git a/utils/local-engine/Parser/CHColumnToSparkRow.h b/utils/local-engine/Parser/CHColumnToSparkRow.h index 5eb4ed1c8727..7c080612bbbc 100644 --- a/utils/local-engine/Parser/CHColumnToSparkRow.h +++ b/utils/local-engine/Parser/CHColumnToSparkRow.h @@ -6,13 +6,18 @@ namespace local_engine { +int64_t calculateBitSetWidthInBytes(int32_t num_fields); + class CHColumnToSparkRow; +class SparkColumnToCHColumn; class SparkRowInfo { friend CHColumnToSparkRow; + friend SparkColumnToCHColumn; + public: - SparkRowInfo(DB::Block& block); + SparkRowInfo(DB::Block & block); int64_t getNullBitsetWidthInBytes() const; void setNullBitsetWidthInBytes(int64_t nullBitsetWidthInBytes); int64_t getNumCols() const; @@ -45,4 +50,3 @@ class CHColumnToSparkRow : private Allocator void freeMem(uint8_t * address, size_t size); }; } - diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 040e6586bc1c..8ebdea194ca2 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -216,3 +216,7 @@ local_engine::SparkRowInfoPtr dbms::LocalExecutor::next() this->spark_buffer->size = row_info->getTotalBytes(); return row_info; } +DB::Block & dbms::LocalExecutor::getHeader() const +{ + return header; +} diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 2a2645c5d61b..83cd3a71cbd5 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -92,6 +92,8 @@ class LocalExecutor } } + Block & getHeader() const; + private: std::unique_ptr writeBlockToSparkRow(DB::Block & block); QueryPipelinePtr query_pipeline; diff --git a/utils/local-engine/Parser/SparkColumnToCHColumn.cpp b/utils/local-engine/Parser/SparkColumnToCHColumn.cpp new file mode 100644 index 000000000000..68a4798fe48d --- /dev/null +++ b/utils/local-engine/Parser/SparkColumnToCHColumn.cpp @@ -0,0 +1,117 @@ +#include "SparkColumnToCHColumn.h" +#include +#include + +using namespace DB; + +namespace local_engine +{ +int64_t getStringColumnTotalSize(int ordinal, SparkRowInfo & spark_row_info) +{ + SparkRowReader reader(spark_row_info.getNumCols()); + int64_t size = 0; + for (int64_t i = 0; i < spark_row_info.getNumRows(); i++) + { + reader.pointTo(reinterpret_cast(spark_row_info.getBufferAddress() + spark_row_info.getOffsets()[i]), spark_row_info.getLengths()[i]); + size += reader.getStringSize(ordinal); + } + return size; +} + +static void writeRowToColumns(ColumnsWithTypeAndName & columns, SparkRowReader & spark_row_reader, SparkRowInfo & spark_row_info) +{ + int32_t num_fields = columns.size(); + for (int32_t i = 0; i < num_fields; i++) + { + auto column = columns[i]; + WhichDataType which(column.type); + if (which.isUInt8()) + { + auto & column_data = assert_cast &>(column.column).getData(); + column_data.emplace_back(spark_row_reader.getUnsignedByte(i)); + } + else if (which.isInt8()) + { + auto & column_data = assert_cast &>(column.column).getData(); + column_data.emplace_back(spark_row_reader.getByte(i)); + } + else if (which.isInt16()) + { + auto & column_data = assert_cast &>(column.column).getData(); + column_data.emplace_back(spark_row_reader.getShort(i)); } + else if (which.isInt32()) + { + auto & column_data = assert_cast &>(column.column).getData(); + column_data.emplace_back(spark_row_reader.getInt(i)); + } + else if (which.isInt64()) + { + auto & column_data = assert_cast &>(column.column).getData(); + column_data.emplace_back(spark_row_reader.getLong(i)); + } + else if (which.isFloat32()) + { + auto & column_data = assert_cast &>(column.column).getData(); + column_data.emplace_back(spark_row_reader.getFloat(i)); + } + else if (which.isFloat64()) + { + auto & column_data = assert_cast &>(column.column).getData(); + column_data.emplace_back(spark_row_reader.getDouble(i)); + } + else if (which.isDate()) + { + auto & column_data = assert_cast &>(column.column).getData(); + column_data.emplace_back(spark_row_reader.getUnsignedShort(i)); + } + else if (which.isString()) + { + PaddedPODArray & column_chars_t = assert_cast(column.column).getChars(); + PaddedPODArray & column_offsets = assert_cast(column.column).getOffsets(); + if (static_cast(column_offsets.capacity()) < spark_row_info.getNumRows()) + { + auto total_size = getStringColumnTotalSize(i, spark_row_info); + column_chars_t.reserve(total_size); + column_offsets.reserve(spark_row_info.getNumRows()); + } + std::string data = spark_row_reader.getString(i); + column_chars_t.insert_assume_reserved(data.data(), data.size()); + column_chars_t.emplace_back('\0'); + column_offsets.emplace_back(column_chars_t.size()); + } + else + { + throw std::runtime_error("doesn't support type " + std::string(getTypeName(column.type->getTypeId()))); + } + } +} + +std::unique_ptr +local_engine::SparkColumnToCHColumn::convertCHColumnToSparkRow(local_engine::SparkRowInfo & spark_row_info, DB::Block& header) +{ + auto columns_list = std::make_unique(); + columns_list->reserve(header.columns()); + for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) + { + const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); + MutableColumnPtr read_column = header_column.type->createColumn(); + read_column->reserve(spark_row_info.getNumRows()); + ColumnWithTypeAndName column; + column.name = header_column.name; + column.type = header_column.type; + column.column = std::move(read_column); + columns_list->push_back(std::move(column)); + } + SparkRowReader row_reader(header.columns()); + for (int64_t i = 0; i < spark_row_info.getNumRows(); i++) + { + row_reader.pointTo( + reinterpret_cast(spark_row_info.getBufferAddress() + spark_row_info.getOffsets()[i]), spark_row_info.getLengths()[i]); + writeRowToColumns(*columns_list, row_reader, spark_row_info); + } + return std::make_unique(*std::move(columns_list)); +} + + + +} diff --git a/utils/local-engine/Parser/SparkColumnToCHColumn.h b/utils/local-engine/Parser/SparkColumnToCHColumn.h new file mode 100644 index 000000000000..492562a2c5f1 --- /dev/null +++ b/utils/local-engine/Parser/SparkColumnToCHColumn.h @@ -0,0 +1,148 @@ +#pragma once + +#include +#include "CHColumnToSparkRow.h" + +namespace local_engine +{ +using namespace DB; + + + +class SparkColumnToCHColumn +{ +public: + std::unique_ptr convertCHColumnToSparkRow(SparkRowInfo & spark_row_info, Block& header); +}; +} + +class SparkRowReader +{ +public: + template Type getValue(int64_t address) + { + return *reinterpret_cast(address); + } + + template int64_t getValue(int64_t address); + template int32_t getValue(int64_t address); + template int16_t getValue(int64_t address); + template int8_t getValue(int64_t address); + template float_t getValue(int64_t address); + template double_t getValue(int64_t address); + + + bool isSet(int index) + { + assert(index >= 0); + int64_t mask = 1 << (index & 63); + int64_t word_offset = base_offset + static_cast(index >> 6) * 8L; + int64_t word = getValue(word_offset); + return (word & mask) != 0; + } + + inline void assertIndexIsValid(int index) const + { + assert(index >= 0); + assert(index < num_fields); + } + + bool isNullAt(int ordinal) + { + assertIndexIsValid(ordinal); + return isSet(ordinal); + } + + int8_t getByte(int ordinal) + { + assertIndexIsValid(ordinal); + return getValue(getFieldOffset(ordinal)); + } + + uint8_t getUnsignedByte(int ordinal) + { + assertIndexIsValid(ordinal); + return getValue(getFieldOffset(ordinal)); + } + + + int16_t getShort(int ordinal) + { + assertIndexIsValid(ordinal); + return getValue(getFieldOffset(ordinal)); + } + + uint16_t getUnsignedShort(int ordinal) + { + assertIndexIsValid(ordinal); + return getValue(getFieldOffset(ordinal)); + } + + int32_t getInt(int ordinal) + { + assertIndexIsValid(ordinal); + return getValue(getFieldOffset(ordinal)); + } + + uint32_t getUnsignedInt(int ordinal) + { + assertIndexIsValid(ordinal); + return getValue(getFieldOffset(ordinal)); + } + + int64_t getLong(int ordinal) + { + assertIndexIsValid(ordinal); + return getValue(getFieldOffset(ordinal)); + } + + float_t getFloat(int ordinal) + { + assertIndexIsValid(ordinal); + return getValue(getFieldOffset(ordinal)); + } + + double_t getDouble(int ordinal) + { + assertIndexIsValid(ordinal); + return getValue(getFieldOffset(ordinal)); + } + + std::string getString(int ordinal) + { + assertIndexIsValid(ordinal); + int64_t offset_and_size = getLong(ordinal); + int32_t offset = static_cast(offset_and_size >> 32); + int32_t size = static_cast(offset_and_size); + return std::string(reinterpret_cast(offset), size); + } + + int32_t getStringSize(int ordinal) + { + assertIndexIsValid(ordinal); + return static_cast(getLong(ordinal)); + } + + void pointTo(int64_t base_offset_, int32_t size_in_bytes_) + { + this->base_offset = base_offset_; + this->size_in_bytes = size_in_bytes_; + } + + explicit SparkRowReader(int32_t numFields) + : num_fields(numFields) + { + this->bit_set_width_in_bytes = local_engine::calculateBitSetWidthInBytes(numFields); + } + +private: + int64_t getFieldOffset(int ordinal) const + { + return base_offset + bit_set_width_in_bytes + ordinal * 8L; + } + + int64_t base_offset; + int32_t num_fields; + int32_t size_in_bytes; + int32_t bit_set_width_in_bytes; +}; diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 07134e27dffb..833c8b9a90ad 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -5,8 +5,10 @@ #include #include "testConfig.h" #include +#include #include + TEST(TestSelect, ReadRel) { dbms::SerializedSchemaBuilder schema_builder; @@ -39,6 +41,9 @@ TEST(TestSelect, ReadRel) std::cout << "fetch batch" << std::endl; local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); ASSERT_GT(spark_row_info->getNumRows(), 0); + local_engine::SparkColumnToCHColumn converter; + auto block = converter.convertCHColumnToSparkRow(*spark_row_info, local_executor.getHeader()); + ASSERT_GT(spark_row_info->getNumRows(), block->rows()); } } From 936e2a76579884cfbf0ca8b2704bac6068521bbc Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 14 Dec 2021 13:47:55 +0300 Subject: [PATCH 249/472] Restore old logic --- tests/clickhouse-test | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c627810a550a..f8c2cd27a0f2 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -21,6 +21,8 @@ from subprocess import TimeoutExpired from datetime import datetime from time import time, sleep from errno import ESRCH +# for crc32 +import zlib try: import termcolor @@ -61,6 +63,11 @@ MAX_RETRIES = 3 TEST_FILE_EXTENSIONS = ['.sql', '.sql.j2', '.sh', '.py', '.expect'] +def stringhash(s): + # default hash() function consistent + # only during process invocation https://stackoverflow.com/a/42089311 + return zlib.crc32(s.encode('utf-8')) + class Terminated(KeyboardInterrupt): pass @@ -985,7 +992,7 @@ def render_test_template(j2env, suite_dir, test_name): return test_name -def get_selected_tests(suite_dir, patterns): +def get_selected_tests(suite_dir, patterns, filter_func): """ Find all files with tests, filter, render templates """ @@ -1002,16 +1009,18 @@ def get_selected_tests(suite_dir, patterns): continue if USE_JINJA and test_name.endswith(".gen.sql"): continue + if not filter_func(test_name): + continue test_name = render_test_template(j2env, suite_dir, test_name) yield test_name -def get_tests_list(suite_dir, patterns, test_runs, sort_key): +def get_tests_list(suite_dir, patterns, test_runs, sort_key, filter_func): """ Return list of tests file names to run """ - all_tests = list(get_selected_tests(suite_dir, patterns)) + all_tests = list(get_selected_tests(suite_dir, patterns, filter_func)) all_tests = all_tests * test_runs all_tests.sort(key=sort_key) return all_tests @@ -1141,8 +1150,16 @@ def main(args): print("Won't run stateful tests because they were manually disabled.") continue + filter_func = lambda x: True + + if args.run_by_hash_num is not None and args.run_by_hash_total is not None: + if args.run_by_hash_num > args.run_by_hash_total: + raise Exception(f"Incorrect run by hash, value {args.run_by_hash_num} bigger than total {args.run_by_hash_total}") + + filter_func = lambda x: stringhash(x) % args.run_by_hash_total == args.run_by_hash_num + all_tests = get_tests_list( - suite_dir, args.test, args.test_runs, tests_in_suite_key_func) + suite_dir, args.test, args.test_runs, tests_in_suite_key_func, filter_func) jobs = args.jobs From e0de41c49cb47a8f02af04c550cd3890bd2ef9aa Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 14 Dec 2021 14:27:16 +0300 Subject: [PATCH 250/472] Fix ci runner --- tests/integration/ci-runner.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 25d09a8c4c53..c8745294c5b6 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -10,6 +10,8 @@ import random import json import csv +# for crc32 +import zlib MAX_RETRY = 3 @@ -26,6 +28,9 @@ MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes TASK_TIMEOUT = 8 * 60 * 60 # 8 hours +def stringhash(s): + return zlib.crc32(s.encode('utf-8')) + def get_tests_to_run(pr_info): result = set([]) changed_files = pr_info['changed_files'] @@ -183,6 +188,13 @@ def __init__(self, result_path, params): self.start_time = time.time() self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) + if 'run_by_hash_total' in self.params: + self.run_by_hash_total = self.params['run_by_hash_total'] + self.run_by_hash_num = self.params['run_by_hash_num'] + else: + self.run_by_hash_total = 0 + self.run_by_hash_num = 0 + def path(self): return self.result_path @@ -576,6 +588,15 @@ def run_impl(self, repo_path, build_path): self._install_clickhouse(build_path) logging.info("Dump iptables before run %s", subprocess.check_output("sudo iptables -L", shell=True)) all_tests = self._get_all_tests(repo_path) + + if self.run_by_hash_total != 0: + grouped_tests = self.group_test_by_file(all_tests) + all_filtered_by_hash_tests = [] + for group, tests_in_group in grouped_tests.items(): + if stringhash(group) % self.run_by_hash_total == self.run_by_hash_num: + all_filtered_by_hash_tests += tests_in_group + all_tests = all_filtered_by_hash_tests + parallel_skip_tests = self._get_parallel_tests_skip_list(repo_path) logging.info("Found %s tests first 3 %s", len(all_tests), ' '.join(all_tests[:3])) filtered_sequential_tests = list(filter(lambda test: test in all_tests, parallel_skip_tests)) From 6f2571e89b597c0a89928dc5c68772a5e7de0067 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 14 Dec 2021 16:01:24 +0300 Subject: [PATCH 251/472] Add missed argument --- tests/clickhouse-test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index f8c2cd27a0f2..6b106b98ed59 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1330,6 +1330,9 @@ if __name__ == '__main__': parser.add_argument('--client-option', nargs='+', help='Specify additional client argument') parser.add_argument('--print-time', action='store_true', dest='print_time', help='Print test time') + parser.add_argument('--run-by-hash-num', type=int, help='Run tests matching crc32(test_name) % run_by_hash_total == run_by_hash_num') + parser.add_argument('--run-by-hash-total', type=int, help='Total test groups for crc32(test_name) % run_by_hash_total == run_by_hash_num') + group = parser.add_mutually_exclusive_group(required=False) group.add_argument('--zookeeper', action='store_true', default=None, dest='zookeeper', help='Run zookeeper related tests') group.add_argument('--no-zookeeper', action='store_false', default=None, dest='zookeeper', help='Do not run zookeeper related tests') From 5cc8c54c5d89d59d69e731825e274abf677d7262 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 14 Dec 2021 19:34:44 +0300 Subject: [PATCH 252/472] Forward args to client --- docker/test/stateless/run.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index e5ef72e747ae..51e45d4a1a39 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -94,6 +94,13 @@ function run_tests() ADDITIONAL_OPTIONS+=('8') fi + if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then + ADDITIONAL_OPTIONS+=('--run-by-hash-num') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM") + ADDITIONAL_OPTIONS+=('--run-by-hash-total') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL") + fi + clickhouse-test --testname --shard --zookeeper --hung-check --print-time \ --use-skip-list --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ From 3bb99f5924ae3c5323dfa26a88a556a70c782950 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 15 Dec 2021 07:10:13 +0000 Subject: [PATCH 253/472] Backport #32755 to 21.9: fix crash fuzzbits with multiply same fixedstring --- src/Functions/fuzzBits.cpp | 13 ++++++++++++- .../queries/0_stateless/02148_issue_32737.reference | 0 tests/queries/0_stateless/02148_issue_32737.sql | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02148_issue_32737.reference create mode 100644 tests/queries/0_stateless/02148_issue_32737.sql diff --git a/src/Functions/fuzzBits.cpp b/src/Functions/fuzzBits.cpp index 120655462389..07168dd5f9f2 100644 --- a/src/Functions/fuzzBits.cpp +++ b/src/Functions/fuzzBits.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int DECIMAL_OVERFLOW; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int LOGICAL_ERROR; } @@ -142,6 +143,7 @@ class FunctionFuzzBits : public IFunction else if (const ColumnFixedString * col_in_fixed = checkAndGetColumn(col_in_untyped.get())) { const auto n = col_in_fixed->getN(); + const auto col_in_rows = col_in_fixed->size(); auto col_to = ColumnFixedString::create(n); ColumnFixedString::Chars & chars_to = col_to->getChars(); @@ -153,7 +155,16 @@ class FunctionFuzzBits : public IFunction const auto * ptr_in = col_in_fixed->getChars().data(); auto * ptr_to = chars_to.data(); - fuzzBits(ptr_in, ptr_to, chars_to.size(), inverse_probability); + + if (col_in_rows >= input_rows_count) + fuzzBits(ptr_in, ptr_to, chars_to.size(), inverse_probability); + else if (col_in_rows != 1) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "1 != col_in_rows {} < input_rows_count {}", col_in_rows, input_rows_count); + else + for (size_t i = 0; i < input_rows_count; ++i) + fuzzBits(ptr_in, ptr_to + i * n, n, inverse_probability); return col_to; } diff --git a/tests/queries/0_stateless/02148_issue_32737.reference b/tests/queries/0_stateless/02148_issue_32737.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02148_issue_32737.sql b/tests/queries/0_stateless/02148_issue_32737.sql new file mode 100644 index 000000000000..c8fbac457e73 --- /dev/null +++ b/tests/queries/0_stateless/02148_issue_32737.sql @@ -0,0 +1,3 @@ +SELECT fuzzBits(toFixedString('', 200), 0.99) from numbers(1) FORMAT Null; +SELECT fuzzBits(toFixedString('', 200), 0.99) from numbers(128) FORMAT Null; +SELECT fuzzBits(toFixedString('', 200), 0.99) from numbers(60000) FORMAT Null; From 324833a36c6ed2f1479538189a0166c6818e50a5 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 15 Dec 2021 11:34:57 +0300 Subject: [PATCH 254/472] Update Dockerfile --- docker/test/integration/runner/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 5b77248427b2..470e4a83af08 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -74,7 +74,7 @@ RUN python3 -m pip install \ minio \ protobuf \ psycopg2-binary==2.8.6 \ - pymongo \ + pymongo==3.11.0 \ pytest \ pytest-timeout \ pytest-xdist \ From 7d607a7eafd01a52d25bbfca618ea1cdb9855b52 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Wed, 15 Dec 2021 08:51:25 +0000 Subject: [PATCH 255/472] add benchmark code --- CMakeLists.txt | 4 +- contrib/benchmark | 1 + utils/local-engine/CMakeLists.txt | 2 +- .../Parser/SerializedPlanParser.cpp | 2 +- .../Parser/SerializedPlanParser.h | 2 +- .../Parser/SparkColumnToCHColumn.cpp | 54 +++--- .../Parser/SparkColumnToCHColumn.h | 5 +- utils/local-engine/tests/CMakeLists.txt | 41 +++-- .../tests/benchmark_local_engine.cpp | 171 ++++++++++++++++++ .../local-engine/tests/gtest_local_engine.cpp | 19 +- 10 files changed, 253 insertions(+), 48 deletions(-) create mode 160000 contrib/benchmark create mode 100644 utils/local-engine/tests/benchmark_local_engine.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 875a6d1ab618..2827c7104e53 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -595,8 +595,10 @@ macro (add_executable target) if (${type} STREQUAL EXECUTABLE) # disabled for TSAN and gcc since libtsan.a provides overrides too if (TARGET clickhouse_new_delete) + if (NOT ${target} STREQUAL Git::Git) # operator::new/delete for executables (MemoryTracker stuff) - target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES}) + target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES}) + endif() endif() endif() endmacro() diff --git a/contrib/benchmark b/contrib/benchmark new file mode 160000 index 000000000000..4a56d88aba3a --- /dev/null +++ b/contrib/benchmark @@ -0,0 +1 @@ +Subproject commit 4a56d88aba3ae3a793e7f113d61be938dba7a7c5 diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index fccaca4a87db..e89120b1ba79 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 3.11) if(CMAKE_VERSION VERSION_LESS 3.11) message(FATAL_ERROR "Building local engine JNI bindings requires CMake version >= 3.11") endif() - +include (${ClickHouse_SOURCE_DIR}/cmake/warnings.cmake) # Find java/jni include(FindJava) include(UseJava) diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 8ebdea194ca2..ca11f3c89098 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -216,7 +216,7 @@ local_engine::SparkRowInfoPtr dbms::LocalExecutor::next() this->spark_buffer->size = row_info->getTotalBytes(); return row_info; } -DB::Block & dbms::LocalExecutor::getHeader() const +DB::Block & dbms::LocalExecutor::getHeader() { return header; } diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 83cd3a71cbd5..f497e95f4c8b 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -92,7 +92,7 @@ class LocalExecutor } } - Block & getHeader() const; + Block & getHeader(); private: std::unique_ptr writeBlockToSparkRow(DB::Block & block); diff --git a/utils/local-engine/Parser/SparkColumnToCHColumn.cpp b/utils/local-engine/Parser/SparkColumnToCHColumn.cpp index 68a4798fe48d..512ee4b3fef0 100644 --- a/utils/local-engine/Parser/SparkColumnToCHColumn.cpp +++ b/utils/local-engine/Parser/SparkColumnToCHColumn.cpp @@ -18,70 +18,71 @@ int64_t getStringColumnTotalSize(int ordinal, SparkRowInfo & spark_row_info) return size; } -static void writeRowToColumns(ColumnsWithTypeAndName & columns, SparkRowReader & spark_row_reader, SparkRowInfo & spark_row_info) +static void writeRowToColumns(std::vector & columns, SparkRowReader & spark_row_reader, SparkRowInfo & spark_row_info, int64_t row_num) { int32_t num_fields = columns.size(); for (int32_t i = 0; i < num_fields; i++) { - auto column = columns[i]; - WhichDataType which(column.type); +// auto column = columns[i]; + WhichDataType which(columns[i]->getDataType()); if (which.isUInt8()) { - auto & column_data = assert_cast &>(column.column).getData(); + auto & column_data = assert_cast &>(*columns[i]).getData(); column_data.emplace_back(spark_row_reader.getUnsignedByte(i)); } else if (which.isInt8()) { - auto & column_data = assert_cast &>(column.column).getData(); + auto & column_data = assert_cast &>(*columns[i]).getData(); column_data.emplace_back(spark_row_reader.getByte(i)); } else if (which.isInt16()) { - auto & column_data = assert_cast &>(column.column).getData(); + auto & column_data = assert_cast &>(*columns[i]).getData(); column_data.emplace_back(spark_row_reader.getShort(i)); } else if (which.isInt32()) { - auto & column_data = assert_cast &>(column.column).getData(); + auto & column_data = assert_cast &>(*columns[i]).getData(); column_data.emplace_back(spark_row_reader.getInt(i)); } else if (which.isInt64()) { - auto & column_data = assert_cast &>(column.column).getData(); + auto & column_data = assert_cast &>(*columns[i]).getData(); column_data.emplace_back(spark_row_reader.getLong(i)); } else if (which.isFloat32()) { - auto & column_data = assert_cast &>(column.column).getData(); + auto & column_data = assert_cast &>(*columns[i]).getData(); column_data.emplace_back(spark_row_reader.getFloat(i)); } else if (which.isFloat64()) { - auto & column_data = assert_cast &>(column.column).getData(); + auto & column_data = assert_cast &>(*columns[i]).getData(); column_data.emplace_back(spark_row_reader.getDouble(i)); } else if (which.isDate()) { - auto & column_data = assert_cast &>(column.column).getData(); + auto & column_data = assert_cast &>(*columns[i]).getData(); column_data.emplace_back(spark_row_reader.getUnsignedShort(i)); } else if (which.isString()) { - PaddedPODArray & column_chars_t = assert_cast(column.column).getChars(); - PaddedPODArray & column_offsets = assert_cast(column.column).getOffsets(); - if (static_cast(column_offsets.capacity()) < spark_row_info.getNumRows()) + PaddedPODArray & column_chars_t = assert_cast(*columns[i]).getChars(); + PaddedPODArray & column_offsets = assert_cast(*columns[i]).getOffsets(); +// auto capacity column_offsets.capacity(); + if (row_num == 0) { auto total_size = getStringColumnTotalSize(i, spark_row_info); column_chars_t.reserve(total_size); column_offsets.reserve(spark_row_info.getNumRows()); } - std::string data = spark_row_reader.getString(i); - column_chars_t.insert_assume_reserved(data.data(), data.size()); + StringRef data = spark_row_reader.getString(i); + column_chars_t.insert_assume_reserved(data.data, data.data+data.size); column_chars_t.emplace_back('\0'); column_offsets.emplace_back(column_chars_t.size()); } else { - throw std::runtime_error("doesn't support type " + std::string(getTypeName(column.type->getTypeId()))); + throw std::runtime_error("doesn't support type " + std::string(getTypeName(columns[i]->getDataType()))); } } } @@ -91,25 +92,30 @@ local_engine::SparkColumnToCHColumn::convertCHColumnToSparkRow(local_engine::Spa { auto columns_list = std::make_unique(); columns_list->reserve(header.columns()); + std::vector mutable_columns; for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) { const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); MutableColumnPtr read_column = header_column.type->createColumn(); read_column->reserve(spark_row_info.getNumRows()); - ColumnWithTypeAndName column; - column.name = header_column.name; - column.type = header_column.type; - column.column = std::move(read_column); - columns_list->push_back(std::move(column)); + mutable_columns.push_back(std::move(read_column)); } SparkRowReader row_reader(header.columns()); for (int64_t i = 0; i < spark_row_info.getNumRows(); i++) { row_reader.pointTo( reinterpret_cast(spark_row_info.getBufferAddress() + spark_row_info.getOffsets()[i]), spark_row_info.getLengths()[i]); - writeRowToColumns(*columns_list, row_reader, spark_row_info); + writeRowToColumns(mutable_columns, row_reader, spark_row_info, i); } - return std::make_unique(*std::move(columns_list)); + auto block = std::make_unique(*std::move(columns_list)); + for (size_t column_i = 0, columns = mutable_columns.size(); column_i < columns; ++column_i) + { + const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); + ColumnWithTypeAndName column(std::move(mutable_columns[column_i]), header_column.type,header_column.name); + block->insert(column); + } + mutable_columns.clear(); + return block; } diff --git a/utils/local-engine/Parser/SparkColumnToCHColumn.h b/utils/local-engine/Parser/SparkColumnToCHColumn.h index 492562a2c5f1..8c68162cefc3 100644 --- a/utils/local-engine/Parser/SparkColumnToCHColumn.h +++ b/utils/local-engine/Parser/SparkColumnToCHColumn.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "CHColumnToSparkRow.h" namespace local_engine @@ -108,13 +109,13 @@ class SparkRowReader return getValue(getFieldOffset(ordinal)); } - std::string getString(int ordinal) + StringRef getString(int ordinal) { assertIndexIsValid(ordinal); int64_t offset_and_size = getLong(ordinal); int32_t offset = static_cast(offset_and_size >> 32); int32_t size = static_cast(offset_and_size); - return std::string(reinterpret_cast(offset), size); + return StringRef(reinterpret_cast(this->base_offset + offset), size); } int32_t getStringSize(int ordinal) diff --git a/utils/local-engine/tests/CMakeLists.txt b/utils/local-engine/tests/CMakeLists.txt index 6eb5b6291c19..5c6e9e1db75a 100644 --- a/utils/local-engine/tests/CMakeLists.txt +++ b/utils/local-engine/tests/CMakeLists.txt @@ -1,8 +1,9 @@ set(USE_INTERNAL_GTEST_LIBRARY 0) enable_testing() include(CTest) + include (${PROJECT_SOURCE_DIR}/cmake/find/gtest.cmake) -message(GTEST_INCLUDE_DIRS:${GTEST_INCLUDE_DIRS}) +#include (${PROJECT_SOURCE_DIR}/cmake/warnings.cmake) include_directories(${GTEST_INCLUDE_DIRS}) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories @@ -14,23 +15,41 @@ configure_file( ${ClickHouse_SOURCE_DIR}/utils/local-engine/tests/testConfig.h.in ${ClickHouse_SOURCE_DIR}/utils/local-engine/tests/testConfig.h ) + +set(BENCHMARK_ENABLE_TESTING OFF) +#set(BENCHMARK_ENABLE_LIBPFM ON) +include(FetchContent) +FetchContent_Declare(googlebenchmark GIT_REPOSITORY https://github.com/google/benchmark GIT_TAG master) +FetchContent_MakeAvailable(googlebenchmark) +include_directories( + ${builder_headers} + ${parser_headers} +) # attach all dbms gtest sources + grep_gtest_sources("${ClickHouse_SOURCE_DIR}/utils/local_engine/tests" local_engine_gtest_sources) add_executable(unit_tests_local_engine ${local_engine_gtest_sources}) +add_executable(benchmark_local_engine benchmark_local_engine.cpp) target_compile_options(unit_tests_local_engine PRIVATE - -Wno-zero-as-null-pointer-constant - -Wno-covered-switch-default - -Wno-undef - -Wno-sign-compare - -Wno-used-but-marked-unused - -Wno-missing-noreturn - -Wno-gnu-zero-variadic-macro-arguments + -Wno-error + ) + +target_compile_options(benchmark_local_engine PUBLIC + -Wno-error ) +target_compile_options(benchmark PUBLIC + -Wno-error + ) + + target_include_directories(unit_tests_local_engine PRIVATE ${GTEST_INCLUDE_DIRS}/include - ${builder_headers} - ${parser_headers} ) +include_directories(benchmark_local_engine SYSTEM PUBLIC ${FETCH_CONTENT_SOURCE_DIR_GOOGLEBENCHMARK}/include) + target_link_libraries(unit_tests_local_engine ${CLICKHOUSE_SERVER_LINK} ${LOCALENGINE_SHARED_LIB} ${GTEST_BOTH_LIBRARIES}) -add_check(unit_tests_local_engine) \ No newline at end of file +target_link_libraries(benchmark_local_engine ${CLICKHOUSE_SERVER_LINK} ${LOCALENGINE_SHARED_LIB} benchmark::benchmark) + +add_check(unit_tests_local_engine) +add_check(benchmark_local_engine) \ No newline at end of file diff --git a/utils/local-engine/tests/benchmark_local_engine.cpp b/utils/local-engine/tests/benchmark_local_engine.cpp new file mode 100644 index 000000000000..857cc9e93f64 --- /dev/null +++ b/utils/local-engine/tests/benchmark_local_engine.cpp @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include +#include "testConfig.h" +#include +#include +#include + +// Define another benchmark +static void BM_CHColumnToSparkRow(benchmark::State& state) { + for (auto _: state) + { + state.PauseTiming(); + dbms::SerializedSchemaBuilder schema_builder; + auto schema = schema_builder.column("l_orderkey", "I64") + .column("l_partkey", "I64") + .column("l_suppkey", "I64") + .column("l_linenumber", "I32") + .column("l_quantity", "FP64") + .column("l_extendedprice", "FP64") + .column("l_discount", "FP64") + .column("l_tax", "FP64") + // .column("l_returnflag", "String") + // .column("l_linestatus", "String") + .column("l_shipdate_new", "FP64") + .column("l_commitdate_new", "FP64") + .column("l_receiptdate_new", "FP64") + // .column("l_shipinstruct", "String") + // .column("l_shipmode", "String") + // .column("l_comment", "String") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto plan = plan_builder.files("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + + auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + dbms::LocalExecutor local_executor; + state.ResumeTiming(); + local_executor.execute(std::move(query_plan)); + while (local_executor.hasNext()) + { + local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); + } + } +} + +static void BM_CHColumnToSparkRowWithString(benchmark::State& state) { + for (auto _: state) + { + state.PauseTiming(); + dbms::SerializedSchemaBuilder schema_builder; + auto schema = schema_builder.column("l_orderkey", "I64") + .column("l_partkey", "I64") + .column("l_suppkey", "I64") + .column("l_linenumber", "I32") + .column("l_quantity", "FP64") + .column("l_extendedprice", "FP64") + .column("l_discount", "FP64") + .column("l_tax", "FP64") + .column("l_returnflag", "String") + .column("l_linestatus", "String") + .column("l_shipdate_new", "FP64") + .column("l_commitdate_new", "FP64") + .column("l_receiptdate_new", "FP64") + .column("l_shipinstruct", "String") + .column("l_shipmode", "String") + .column("l_comment", "String") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto plan = plan_builder.files("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + + auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + dbms::LocalExecutor local_executor; + state.ResumeTiming(); + local_executor.execute(std::move(query_plan)); + while (local_executor.hasNext()) + { + local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); + } + } +} + +static void BM_SparkRowToCHColumn(benchmark::State& state) { + for (auto _: state) + { + state.PauseTiming(); + dbms::SerializedSchemaBuilder schema_builder; + auto schema = schema_builder.column("l_orderkey", "I64") + .column("l_partkey", "I64") + .column("l_suppkey", "I64") + .column("l_linenumber", "I32") + .column("l_quantity", "FP64") + .column("l_extendedprice", "FP64") + .column("l_discount", "FP64") + .column("l_tax", "FP64") + // .column("l_returnflag", "String") + // .column("l_linestatus", "String") + .column("l_shipdate_new", "FP64") + .column("l_commitdate_new", "FP64") + .column("l_receiptdate_new", "FP64") + // .column("l_shipinstruct", "String") + // .column("l_shipmode", "String") + // .column("l_comment", "String") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto plan = plan_builder.files("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + + auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + dbms::LocalExecutor local_executor; + + local_executor.execute(std::move(query_plan)); + local_engine::SparkColumnToCHColumn converter; + while (local_executor.hasNext()) + { + local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); + state.ResumeTiming(); + auto block = converter.convertCHColumnToSparkRow(*spark_row_info, local_executor.getHeader()); + state.PauseTiming(); + } + state.ResumeTiming(); + } +} + + +static void BM_SparkRowToCHColumnWithString(benchmark::State& state) { + for (auto _: state) + { + state.PauseTiming(); + dbms::SerializedSchemaBuilder schema_builder; + auto schema = schema_builder.column("l_orderkey", "I64") + .column("l_partkey", "I64") + .column("l_suppkey", "I64") + .column("l_linenumber", "I32") + .column("l_quantity", "FP64") + .column("l_extendedprice", "FP64") + .column("l_discount", "FP64") + .column("l_tax", "FP64") + .column("l_returnflag", "String") + .column("l_linestatus", "String") + .column("l_shipdate_new", "FP64") + .column("l_commitdate_new", "FP64") + .column("l_receiptdate_new", "FP64") + .column("l_shipinstruct", "String") + .column("l_shipmode", "String") + .column("l_comment", "String") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto plan = plan_builder.files("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + + auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + dbms::LocalExecutor local_executor; + + local_executor.execute(std::move(query_plan)); + local_engine::SparkColumnToCHColumn converter; + while (local_executor.hasNext()) + { + local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); + state.ResumeTiming(); + auto block = converter.convertCHColumnToSparkRow(*spark_row_info, local_executor.getHeader()); + state.PauseTiming(); + } + state.ResumeTiming(); + } +} + +BENCHMARK(BM_CHColumnToSparkRow)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK(BM_CHColumnToSparkRowWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +//BENCHMARK(BM_SparkRowToCHColumn)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +//BENCHMARK(BM_SparkRowToCHColumnWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK_MAIN(); diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 833c8b9a90ad..f102198d3f27 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -7,6 +7,7 @@ #include #include #include +#include TEST(TestSelect, ReadRel) @@ -99,13 +100,17 @@ TEST(TestSelect, PerformanceTest) TEST(TestSelect, MergeTreeWriteTest) { - DB::StorageID id("default", "test"); - std::string relative_path = TEST_DATA(/data/mergetree); - DB::StorageInMemoryMetadata storage_in_memory_metadata; - auto shared = DB::Context::createShared(); - auto global = DB::Context::createGlobal(shared.get()); - auto merging_params = DB::MergeTreeData::MergingParams(); - auto storage_setting = std::make_unique(); +// DB::StorageID id("default", "test"); +// std::string relative_path = TEST_DATA(/data/mergetree); +// DB::StorageInMemoryMetadata storage_in_memory_metadata; +// auto shared = DB::Context::createShared(); +// auto global = DB::Context::createGlobal(shared.get()); +// auto merging_params = DB::MergeTreeData::MergingParams(); +// auto storage_setting = std::make_unique(); + + Poco::URI uri("hdfs://clusterB/test.txt"); + std::cout << uri.toString() << std::endl; + ASSERT_GT("clusterB", uri.getHost()); // DB::MergeTreeData(id, relative_path, storage_in_memory_metadata, global, "", merging_params, std::move(storage_setting), false, false, nullptr); } From 99c9b95a0175688466bd63c928ca88b4751164d6 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Fri, 17 Dec 2021 06:39:18 +0000 Subject: [PATCH 256/472] fix memory error --- utils/local-engine/Parser/SparkColumnToCHColumn.cpp | 2 +- utils/local-engine/tests/benchmark_local_engine.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/local-engine/Parser/SparkColumnToCHColumn.cpp b/utils/local-engine/Parser/SparkColumnToCHColumn.cpp index 512ee4b3fef0..31386607dd60 100644 --- a/utils/local-engine/Parser/SparkColumnToCHColumn.cpp +++ b/utils/local-engine/Parser/SparkColumnToCHColumn.cpp @@ -13,7 +13,7 @@ int64_t getStringColumnTotalSize(int ordinal, SparkRowInfo & spark_row_info) for (int64_t i = 0; i < spark_row_info.getNumRows(); i++) { reader.pointTo(reinterpret_cast(spark_row_info.getBufferAddress() + spark_row_info.getOffsets()[i]), spark_row_info.getLengths()[i]); - size += reader.getStringSize(ordinal); + size += (reader.getStringSize(ordinal) + 1); } return size; } diff --git a/utils/local-engine/tests/benchmark_local_engine.cpp b/utils/local-engine/tests/benchmark_local_engine.cpp index 857cc9e93f64..08f5773f749f 100644 --- a/utils/local-engine/tests/benchmark_local_engine.cpp +++ b/utils/local-engine/tests/benchmark_local_engine.cpp @@ -166,6 +166,6 @@ static void BM_SparkRowToCHColumnWithString(benchmark::State& state) { BENCHMARK(BM_CHColumnToSparkRow)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); BENCHMARK(BM_CHColumnToSparkRowWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); -//BENCHMARK(BM_SparkRowToCHColumn)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); -//BENCHMARK(BM_SparkRowToCHColumnWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK(BM_SparkRowToCHColumn)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK(BM_SparkRowToCHColumnWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); BENCHMARK_MAIN(); From 0d74c6ea211862a4e30ccec59a6d6036b35d1f43 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Wed, 22 Dec 2021 06:55:24 +0000 Subject: [PATCH 257/472] init code of Q6 --- .../Builder/SerializedPlanBuilder.cpp | 46 +++- .../Builder/SerializedPlanBuilder.h | 15 +- .../Parser/SerializedPlanParser.cpp | 239 +++++++++++++++--- .../Parser/SerializedPlanParser.h | 61 ++++- utils/local-engine/local_engine_jni.cpp | 3 +- .../local-engine/tests/gtest_local_engine.cpp | 13 +- 6 files changed, 314 insertions(+), 63 deletions(-) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp index 785cb9bcb4b6..7cf79863e867 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.cpp +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -2,7 +2,7 @@ namespace dbms { -std::unique_ptr SerializedSchemaBuilder::build() +SchemaPtr SerializedSchemaBuilder::build() { for (const auto & [name, type] : this->type_map) { @@ -68,16 +68,52 @@ SerializedSchemaBuilder & SerializedSchemaBuilder::column(std::string name, std: this->nullability_map.emplace(name, nullable); return *this; } -SerializedSchemaBuilder::SerializedSchemaBuilder():schema(std::make_unique()) +SerializedSchemaBuilder::SerializedSchemaBuilder():schema(new io::substrait::Type_NamedStruct()) { } -SerializedPlanBuilder & SerializedPlanBuilder::filter(std::string lhs, CompareOperator compareOperator, int value) +SerializedPlanBuilder& SerializedPlanBuilder::registerFunction(int id, std::string name) { - this->filters.push_back(std::make_tuple(lhs, compareOperator, value)); + auto *mapping = this->plan->mutable_mappings()->Add(); + auto *function_mapping = mapping->mutable_function_mapping(); + function_mapping->mutable_function_id()->set_id(id); + function_mapping->set_name(name); return *this; } -SerializedPlanBuilder & SerializedPlanBuilder::files(std::string path, SchemaPtr schema) + +void SerializedPlanBuilder::setInputToPrev(io::substrait::Rel * input) +{ + if (!this->prev_rel) return; + if (this->prev_rel->has_filter()) + { + this->prev_rel->mutable_filter()->set_allocated_input(input); + } + else if (this->prev_rel->has_aggregate()) + { + this->prev_rel->mutable_aggregate()->set_allocated_input(input); + } + else if (this->prev_rel->has_project()) + { + this->prev_rel->mutable_project()->set_allocated_input(input); + } + else + { + throw std::runtime_error("does support rel type"); + } +} + +SerializedPlanBuilder & SerializedPlanBuilder::filter(io::substrait::Expression * condition) +{ + io::substrait::Rel * filter = new io::substrait::Rel(); + filter->mutable_filter()->set_allocated_condition(condition); + setInputToPrev(filter); + this->prev_rel = filter; + return *this; +} + +SerializedPlanBuilder & SerializedPlanBuilder::read(std::string path, SchemaPtr schema) { + io::substrait::Rel * rel = new io::substrait::Rel(); + auto * read = rel->mutable_read(); this->source = path; this->data_schema = std::move(schema); return *this; diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h index a45242703b3d..d74d13a22983 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.h +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -11,22 +11,27 @@ enum CompareOperator { EQUAL, GREATER }; -using SchemaPtr = std::unique_ptr; +using SchemaPtr = io::substrait::Type_NamedStruct *; using Filter = std::tuple; class SerializedPlanBuilder { public: SerializedPlanBuilder(); - SerializedPlanBuilder& filter(std::string lhs, CompareOperator compareOperator, int value); - SerializedPlanBuilder& files(std::string path, SchemaPtr schema); + SerializedPlanBuilder& registerFunction(int id, std::string name); + SerializedPlanBuilder& filter(io::substrait::Expression* condition); + SerializedPlanBuilder& read(std::string path, SchemaPtr schema); // SerializedPlanBuilder& aggregate(); // SerializedPlanBuilder& project(); std::unique_ptr build(); +private: + void setInputToPrev(io::substrait::Rel * input); + std::vector filters; std::string source; SchemaPtr data_schema; + io::substrait::Rel * prev_rel; std::unique_ptr plan; }; @@ -41,11 +46,11 @@ using Type = io::substrait::Type; class SerializedSchemaBuilder { public: SerializedSchemaBuilder(); - std::unique_ptr build(); + SchemaPtr build(); SerializedSchemaBuilder& column(std::string name, std::string type, bool nullable = false); private: std::map type_map; std::map nullability_map; - std::unique_ptr schema; + SchemaPtr schema; }; } diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index ca11f3c89098..6f0974c4fc54 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -1,20 +1,32 @@ #include "SerializedPlanParser.h" -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocalFile(const io::substrait::ReadRel& rel) +namespace substrait = io::substrait; + +DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocalFile(const io::substrait::ReadRel & rel) { assert(rel.has_local_files()); assert(rel.has_base_schema()); auto files_info = std::make_shared(); - for (const auto &item : rel.local_files().items()) + for (const auto & item : rel.local_files().items()) { files_info->files.push_back(item.uri_path()); } @@ -27,14 +39,14 @@ DB::Block dbms::SerializedPlanParser::parseNameStruct(const io::substrait::Type_ internal_cols->reserve(struct_.names_size()); for (int i = 0; i < struct_.names_size(); ++i) { - const auto& name = struct_.names(i); - const auto& type = struct_.struct_().types(i); + const auto & name = struct_.names(i); + const auto & type = struct_.struct_().types(i); auto data_type = parseType(type); internal_cols->push_back(DB::ColumnWithTypeAndName(data_type->createColumn(), data_type, name)); } return DB::Block(*std::move(internal_cols)); } -DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type& type) +DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type & type) { auto & factory = DB::DataTypeFactory::instance(); if (type.has_bool_() || type.has_i8()) @@ -72,55 +84,197 @@ DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type& } DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptr plan) { - auto query_plan = std::make_unique(); + if (plan->mappings_size() > 0) + { + for (auto mapping : plan->mappings()) + { + if (mapping.has_function_mapping()) + { + this->function_mapping.emplace(std::to_string(mapping.function_mapping().function_id().id()), mapping.function_mapping().name()); + } + } + } + if (plan->relations_size() == 1) { auto rel = plan->relations().at(0); - parse(*query_plan, rel); + return parseOp(rel); } else { throw std::runtime_error("too many relations found"); } - return query_plan; } -DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::string& plan) + +DB::QueryPlanPtr dbms::SerializedPlanParser::parseOp(const io::substrait::Rel & rel) { - auto plan_ptr = std::make_unique(); - plan_ptr->ParseFromString(plan); - return parse(std::move(plan_ptr)); + switch (rel.RelType_case()) + { + case substrait::Rel::RelTypeCase::kFetch: { + const auto & limit = rel.fetch(); + DB::QueryPlanPtr query_plan = parseOp(limit.input()); + auto limit_step = std::make_unique(query_plan->getCurrentDataStream(), limit.count(), limit.offset()); + query_plan->addStep(std::move(limit_step)); + return query_plan; + } + case substrait::Rel::RelTypeCase::kFilter: { + const auto & filter = rel.filter(); + DB::QueryPlanPtr query_plan = parseOp(filter.input()); + std::string filter_name; + auto actions_dag = parseFunction(query_plan->getCurrentDataStream(), filter.condition(), filter_name); + auto filter_step = std::make_unique(query_plan->getCurrentDataStream(), actions_dag, filter_name, true); + query_plan->addStep(std::move(filter_step)); + return query_plan; + } + case substrait::Rel::RelTypeCase::kProject: { + const auto & project = rel.project(); + DB::QueryPlanPtr query_plan = parseOp(project.input()); + const auto & expressions = project.expressions(); + for (const auto & expr : expressions) + { + std::string result_name; + auto expression_step = std::make_unique( + query_plan->getCurrentDataStream(), parseFunction(query_plan->getCurrentDataStream(), expr, result_name)); + query_plan->addStep(std::move(expression_step)); + } + return query_plan; + } + case substrait::Rel::RelTypeCase::kAggregate: { + const auto & aggregate = rel.aggregate(); + DB::QueryPlanPtr query_plan = parseOp(aggregate.input()); + auto aggregate_step = parseAggregate(aggregate); + query_plan->addStep(std::move(aggregate_step)); + return query_plan; + } + case substrait::Rel::RelTypeCase::kRead: { + const auto & read = rel.read(); + assert(read.has_local_files() && "Only support local files read rel"); + DB::QueryPlanPtr query_plan = std::make_unique(); + std::shared_ptr source = std::dynamic_pointer_cast(parseReadRealWithLocalFile(read)); + auto source_step = std::make_unique(Pipe(source), "Parquet"); + query_plan->addStep(std::move(source_step)); + // if (read.has_filter()) + // { + // const auto &filter_expr = read.filter(); + // auto filter_step = std::make_unique(query_plan->getCurrentDataStream(), parseExpr(filter_expr), "filter", true); + // query_plan->addStep(std::move(filter_step)); + // } + return query_plan; + } + default: + throw std::runtime_error("doesn't support relation type " + std::to_string(rel.RelType_case())); + } } -void dbms::SerializedPlanParser::parse(DB::QueryPlan & query_plan, const io::substrait::ReadRel & rel) + +DB::QueryPlanStepPtr dbms::SerializedPlanParser::parseAggregate(const io::substrait::AggregateRel & rel) { - std::shared_ptr source = std::dynamic_pointer_cast(SerializedPlanParser::parseReadRealWithLocalFile(rel)); - auto source_step = std::make_unique(Pipe(source), "Parquet"); - query_plan.addStep(std::move(source_step)); } -void dbms::SerializedPlanParser::parse(DB::QueryPlan & query_plan, const io::substrait::Rel& rel) + +DB::NamesAndTypesList dbms::SerializedPlanParser::blockToNameAndTypeList(const DB::Block & header) { - if (rel.has_read()) { - parse(query_plan, rel.read()); - } - else if (rel.has_project()) + DB::NamesAndTypesList types; + for (const auto & name : header.getNames()) { - parse(query_plan, rel.project()); + const auto * column = header.findByName(name); + types.push_back(DB::NameAndTypePair(column->name, column->type)); } - else + return types; +} + +void join(DB::ActionsDAG::NodeRawConstPtrs v, char c, std::string & s) +{ + s.clear(); + for (auto p = v.begin(); p != v.end(); ++p) { - throw std::runtime_error("unsupported relation"); + s += (*p)->result_name; + if (p != v.end() - 1) + s += c; } } -void dbms::SerializedPlanParser::parse(DB::QueryPlan & query_plan, const io::substrait::ProjectRel & rel) + + +DB::ActionsDAGPtr dbms::SerializedPlanParser::parseFunction( + const DataStream & input, const io::substrait::Expression & rel, std::string & result_name, DB::ActionsDAGPtr actions_dag) { - if (rel.has_input()) + assert(rel.has_scalar_function() && "the root of expression should be a scalar function"); + const auto & scalar_function = rel.scalar_function(); + if (!actions_dag) { - parse(query_plan, rel.input()); + actions_dag = std::make_shared(blockToNameAndTypeList(input.header)); } - else + DB::ActionsDAG::NodeRawConstPtrs args; + for (const auto & arg : scalar_function.args()) + { + if (arg.has_scalar_function()) + { + std::string arg_name; + parseFunction(input, arg, arg_name, actions_dag); + args.emplace_back(&actions_dag->getNodes().back()); + } + else + { + args.emplace_back(parseArgument(actions_dag, arg)); + } + } + auto function_name = this->function_mapping.at(std::to_string(rel.scalar_function().id().id())); + assert(SCALAR_FUNCTIONS.contains(function_name) && ("doesn't support function " + function_name).c_str()); + auto function_builder = DB::FunctionFactory::instance().get(SCALAR_FUNCTIONS.at(function_name), this->context); + std::string args_name; + join(args, ',', args_name); + result_name = function_name + "(" + args_name + ")"; + actions_dag->addFunction(function_builder, args, result_name); + return actions_dag; +} + +const DB::ActionsDAG::Node * dbms::SerializedPlanParser::parseArgument(DB::ActionsDAGPtr action_dag, const io::substrait::Expression & rel) +{ + switch (rel.rex_type_case()) { - throw std::runtime_error("project relation should contains a input relation"); + case io::substrait::Expression::RexTypeCase::kLiteral: { + const auto & literal = rel.literal(); + switch (literal.literal_type_case()) + { + case io::substrait::Expression_Literal::kFp64: { + auto type = std::make_shared(); + auto const_node = action_dag->addInput(ColumnWithTypeAndName( + type->createColumnConst(1, literal.fp64()), type, getUniqueName(std::to_string(literal.fp64())))); + return action_dag->tryFindInIndex(const_node.result_name); + } + case io::substrait::Expression_Literal::kString: { + auto type = std::make_shared(); + auto const_node = action_dag->addInput( + ColumnWithTypeAndName(type->createColumnConst(1, literal.string()), type, getUniqueName(literal.string()))); + return action_dag->tryFindInIndex(const_node.result_name); + } + case io::substrait::Expression_Literal::kI32: { + auto type = std::make_shared(); + auto const_node = action_dag->addInput(ColumnWithTypeAndName( + type->createColumnConst(1, literal.i32()), type, getUniqueName(std::to_string(literal.i32())))); + return action_dag->tryFindInIndex(const_node.result_name); + } + default: + throw std::runtime_error("unsupported constant type " + std::to_string(literal.literal_type_case())); + } + } + case io::substrait::Expression::RexTypeCase::kSelection: { + if (!rel.selection().has_direct_reference() || !rel.selection().direct_reference().has_struct_field()) + { + throw std::runtime_error("Can only have direct struct references in selections"); + } + const auto * field = action_dag->getInputs()[rel.selection().direct_reference().struct_field().field() - 1]; + return action_dag->tryFindInIndex(field->result_name); + } + default: + throw std::runtime_error("unsupported arg type " + std::to_string(rel.rex_type_case())); } - //TODO add project step +} + + +DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::string & plan) +{ + auto plan_ptr = std::make_unique(); + plan_ptr->ParseFromString(plan); + return parse(std::move(plan_ptr)); } DB::Chunk DB::BatchParquetFileSource::generate() { @@ -136,7 +290,9 @@ DB::Chunk DB::BatchParquetFileSource::generate() current_path = files_info->files[current_file]; std::unique_ptr nested_buffer; - struct stat file_stat{}; + struct stat file_stat + { + }; /// Check if file descriptor allows random reads (and reading it twice). if (0 != stat(current_path.c_str(), &file_stat)) @@ -173,8 +329,7 @@ DB::Chunk DB::BatchParquetFileSource::generate() return {}; } -DB::BatchParquetFileSource::BatchParquetFileSource( - FilesInfoPtr files, const DB::Block & sample) +DB::BatchParquetFileSource::BatchParquetFileSource(FilesInfoPtr files, const DB::Block & sample) : SourceWithProgress(sample), files_info(files), header(sample) { } @@ -186,7 +341,7 @@ void dbms::LocalExecutor::execute(DB::QueryPlanPtr query_plan) this->header = query_plan->getCurrentDataStream().header; this->ch_column_to_spark_row = std::make_unique(); } -std::unique_ptr dbms::LocalExecutor::writeBlockToSparkRow(DB::Block &block) +std::unique_ptr dbms::LocalExecutor::writeBlockToSparkRow(DB::Block & block) { return this->ch_column_to_spark_row->convertCHColumnToSparkRow(block); } @@ -197,7 +352,9 @@ bool dbms::LocalExecutor::hasNext() { this->current_chunk = std::make_unique(this->header); has_next = this->executor->pull(*this->current_chunk); - } else { + } + else + { has_next = true; } return has_next; diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index f497e95f4c8b..043b38bacb45 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -12,6 +12,7 @@ #include #include #include +#include #include "CHColumnToSparkRow.h" namespace DB @@ -57,18 +58,62 @@ namespace dbms using namespace DB; +static const std::map SCALAR_FUNCTIONS = { + {"IS_NOT_NULL","isNotNull"}, + {"GREATER_THAN_OR_EQUAL","greaterOrEquals"}, + {"AND", "and"}, + {"LESS_THAN_OR_EQUAL", "lessOrEquals"}, + {"LESS_THAN", "less"}, + {"MULTIPLY", "multiply"} +}; + class SerializedPlanParser { public: - static DB::QueryPlanPtr parse(std::string& plan); - static DB::QueryPlanPtr parse(std::unique_ptr plan); - static DB::BatchParquetFileSourcePtr parseReadRealWithLocalFile(const io::substrait::ReadRel& rel); - static DB::Block parseNameStruct(const io::substrait::Type_NamedStruct& struct_); - static DB::DataTypePtr parseType(const io::substrait::Type& type); + DB::QueryPlanPtr parse(std::string& plan); + DB::QueryPlanPtr parse(std::unique_ptr plan); + + DB::BatchParquetFileSourcePtr parseReadRealWithLocalFile(const io::substrait::ReadRel& rel); + DB::Block parseNameStruct(const io::substrait::Type_NamedStruct& struct_); + DB::DataTypePtr parseType(const io::substrait::Type& type); private: - static void parse(DB::QueryPlan & query_plan, const io::substrait::Rel& rel); - static void parse(DB::QueryPlan & query_plan, const io::substrait::ReadRel& rel); - static void parse(DB::QueryPlan & query_plan, const io::substrait::ProjectRel& rel); + static DB::NamesAndTypesList blockToNameAndTypeList(const DB::Block & header); + DB::QueryPlanPtr parseOp(const io::substrait::Rel &rel); + DB::ActionsDAGPtr parseFunction(const DataStream & input, const io::substrait::Expression &rel, std::string & result_name, DB::ActionsDAGPtr actions_dag = nullptr); + DB::QueryPlanStepPtr parseAggregate(const io::substrait::AggregateRel &rel); + const DB::ActionsDAG::Node * parseArgument(DB::ActionsDAGPtr action_dag, const io::substrait::Expression &rel); + std::string getUniqueName(std::string name) + { + return name + "_" + std::to_string(name_no++); + } + + Aggregator::Params getAggregateFunction(Block & header, ColumnNumbers & keys, AggregateDescriptions & aggregates) + { + Settings settings; + return Aggregator::Params( + header, + keys, + aggregates, + false, + settings.max_rows_to_group_by, + settings.group_by_overflow_mode, + settings.group_by_two_level_threshold, + settings.group_by_two_level_threshold_bytes, + settings.max_bytes_before_external_group_by, + settings.empty_result_for_aggregation_by_empty_set, + context->getTemporaryVolume(), + settings.max_threads, + settings.min_free_disk_space_for_temporary_data, + settings.compile_aggregate_expressions, + settings.min_count_to_compile_aggregate_expression); + } + + + int name_no = 0; + std::unordered_map function_mapping; + ContextPtr context; +// DB::QueryPlanPtr query_plan; + }; struct SparkBuffer diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index 94979f1bd719..38921ef462c8 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -107,7 +107,8 @@ void Java_io_kyligence_jni_engine_LocalEngine_execute(JNIEnv * env, jobject obj) jbyte * plan_address = env->GetByteArrayElements(*plan, nullptr); std::string plan_string; plan_string.assign(reinterpret_cast(plan_address), plan_size); - auto query_plan = dbms::SerializedPlanParser::parse(plan_string); + dbms::SerializedPlanParser parser; + auto query_plan = parser.parse(plan_string); dbms::LocalExecutor * executor = new dbms::LocalExecutor(); executor->execute(std::move(query_plan)); env->SetLongField(obj, local_engine_executor_field_id, reinterpret_cast(executor)); diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index f102198d3f27..3bac592ac243 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -34,7 +34,8 @@ TEST(TestSelect, ReadRel) ASSERT_EQ(plan->relations_size(), 1); std::cout << "start execute" <getNumRows(), 0); local_engine::SparkColumnToCHColumn converter; auto block = converter.convertCHColumnToSparkRow(*spark_row_info, local_executor.getHeader()); - ASSERT_GT(spark_row_info->getNumRows(), block->rows()); + ASSERT_EQ(spark_row_info->getNumRows(), block->rows()); } } +TEST(TestSelect, TestFilter) +{ + +} + TEST(TestSelect, PerformanceTest) { @@ -79,7 +85,8 @@ TEST(TestSelect, PerformanceTest) ASSERT_TRUE(plan->relations(0).has_read()); ASSERT_EQ(plan->relations_size(), 1); - auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + dbms::SerializedPlanParser parser; + auto query_plan = parser.parse(std::move(plan)); std::cout << "start execute" << std::endl; dbms::LocalExecutor local_executor; From 0972cb41f9a9e4089955ad4c624a4a8d9e8a43b8 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Mon, 27 Dec 2021 08:45:29 +0000 Subject: [PATCH 258/472] support filter and aggregate --- .../Builder/SerializedPlanBuilder.cpp | 136 ++++++++++++------ .../Builder/SerializedPlanBuilder.h | 47 ++++-- .../Parser/SerializedPlanParser.cpp | 94 ++++++++++-- .../Parser/SerializedPlanParser.h | 13 +- .../io/kyligence/jni/engine/SparkRowInfo.java | 4 +- utils/local-engine/local_engine_jni.cpp | 8 +- .../local-engine/tests/gtest_local_engine.cpp | 107 ++++++++++++-- 7 files changed, 324 insertions(+), 85 deletions(-) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp index 7cf79863e867..b350282aa13b 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.cpp +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -7,57 +7,58 @@ SchemaPtr SerializedSchemaBuilder::build() for (const auto & [name, type] : this->type_map) { this->schema->add_names(name); - auto *type_struct = this->schema->mutable_struct_(); + auto * type_struct = this->schema->mutable_struct_(); if (type == "I8") { - auto *t = type_struct->mutable_types()->Add(); - t->mutable_i8()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + auto * t = type_struct->mutable_types()->Add(); + t->mutable_i8()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } else if (type == "I32") { - auto *t = type_struct->mutable_types()->Add(); + auto * t = type_struct->mutable_types()->Add(); t->mutable_i32()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } else if (type == "I64") { - auto *t = type_struct->mutable_types()->Add(); + auto * t = type_struct->mutable_types()->Add(); t->mutable_i64()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } else if (type == "Boolean") { - auto *t = type_struct->mutable_types()->Add(); + auto * t = type_struct->mutable_types()->Add(); t->mutable_bool_()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } else if (type == "I16") { - auto *t = type_struct->mutable_types()->Add(); + auto * t = type_struct->mutable_types()->Add(); t->mutable_i16()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } else if (type == "String") { - auto *t = type_struct->mutable_types()->Add(); + auto * t = type_struct->mutable_types()->Add(); t->mutable_string()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } else if (type == "FP32") { - auto *t = type_struct->mutable_types()->Add(); + auto * t = type_struct->mutable_types()->Add(); t->mutable_fp32()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } else if (type == "FP64") { - auto *t = type_struct->mutable_types()->Add(); + auto * t = type_struct->mutable_types()->Add(); t->mutable_fp64()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } - else { - throw "doesn't support type "+ type; + else + { + throw "doesn't support type " + type; } } return std::move(this->schema); @@ -68,13 +69,13 @@ SerializedSchemaBuilder & SerializedSchemaBuilder::column(std::string name, std: this->nullability_map.emplace(name, nullable); return *this; } -SerializedSchemaBuilder::SerializedSchemaBuilder():schema(new io::substrait::Type_NamedStruct()) +SerializedSchemaBuilder::SerializedSchemaBuilder() : schema(new io::substrait::Type_NamedStruct()) { } -SerializedPlanBuilder& SerializedPlanBuilder::registerFunction(int id, std::string name) +SerializedPlanBuilder & SerializedPlanBuilder::registerFunction(int id, std::string name) { - auto *mapping = this->plan->mutable_mappings()->Add(); - auto *function_mapping = mapping->mutable_function_mapping(); + auto * mapping = this->plan->mutable_mappings()->Add(); + auto * function_mapping = mapping->mutable_function_mapping(); function_mapping->mutable_function_id()->set_id(id); function_mapping->set_name(name); return *this; @@ -82,7 +83,11 @@ SerializedPlanBuilder& SerializedPlanBuilder::registerFunction(int id, std::stri void SerializedPlanBuilder::setInputToPrev(io::substrait::Rel * input) { - if (!this->prev_rel) return; + if (!this->prev_rel) + { + this->plan->mutable_relations()->AddAllocated(input); + return; + } if (this->prev_rel->has_filter()) { this->prev_rel->mutable_filter()->set_allocated_input(input); @@ -95,7 +100,7 @@ void SerializedPlanBuilder::setInputToPrev(io::substrait::Rel * input) { this->prev_rel->mutable_project()->set_allocated_input(input); } - else + else { throw std::runtime_error("does support rel type"); } @@ -114,37 +119,80 @@ SerializedPlanBuilder & SerializedPlanBuilder::read(std::string path, SchemaPtr { io::substrait::Rel * rel = new io::substrait::Rel(); auto * read = rel->mutable_read(); - this->source = path; - this->data_schema = std::move(schema); + read->mutable_local_files()->add_items()->set_uri_path(path); + read->set_allocated_base_schema(schema); + setInputToPrev(rel); + this->prev_rel = rel; return *this; } std::unique_ptr SerializedPlanBuilder::build() { -// for (const auto & [lhs, compareOperator, value] : this->filters) -// { -// auto filter_rel = std::make_shared(); -// auto *function = filter_rel->mutable_condition()->mutable_scalar_function(); -// function->mutable_id()->set_id(1); -// auto *args = function->mutable_args(); -// -// auto arg1 = io::substrait::Expression(); -// arg1.literal().i32(); -// args->Add(std::move(arg1)); -// -// auto arg2 = io::substrait::Expression();co -// arg2.literal().i8() -// } -// -// filter_rel->mutable_input()->set_allocated_read(read_rel.get()) - auto *rel = this->plan->mutable_relations()->Add(); - auto *read_rel = rel->mutable_read(); - auto *local_files = read_rel->mutable_local_files(); - auto *file = local_files->mutable_items()->Add(); - file->set_uri_path(this->source); - read_rel->mutable_base_schema()->CopyFrom(*this->data_schema); return std::move(this->plan); } -SerializedPlanBuilder::SerializedPlanBuilder():plan(std::make_unique()) +SerializedPlanBuilder::SerializedPlanBuilder() : plan(std::make_unique()) +{ +} +SerializedPlanBuilder & SerializedPlanBuilder::aggregate(std::vector keys, std::vector aggregates) +{ + io::substrait::Rel * rel = new io::substrait::Rel(); + auto * agg = rel->mutable_aggregate(); + auto * grouping = agg->mutable_groupings()->Add(); + grouping->mutable_input_fields()->Add(keys.begin(), keys.end()); + auto * measures = agg->mutable_measures(); + for (auto * measure : aggregates) + { + measures->AddAllocated(measure); + } + setInputToPrev(rel); + this->prev_rel = rel; + return *this; +} + + +io::substrait::Expression * selection(int32_t field_id) +{ + io::substrait::Expression * rel = new io::substrait::Expression(); + auto * selection = rel->mutable_selection(); + selection->mutable_direct_reference()->mutable_struct_field()->set_field(field_id); + return rel; +} +io::substrait::Expression * scalarFunction(int32_t id, ExpressionList args) +{ + io::substrait::Expression * rel = new io::substrait::Expression(); + auto * function = rel->mutable_scalar_function(); + function->mutable_id()->set_id(id); + std::for_each(args.begin(), args.end(), [function](auto * expr) { function->mutable_args()->AddAllocated(expr); }); + return rel; +} +io::substrait::AggregateRel_Measure * measureFunction(int32_t id, ExpressionList args) +{ + io::substrait::AggregateRel_Measure * rel = new io::substrait::AggregateRel_Measure(); + auto * measure = rel->mutable_measure(); + measure->mutable_id()->set_id(id); + std::for_each(args.begin(), args.end(), [measure](auto * expr) { measure->mutable_args()->AddAllocated(expr); }); + return rel; +} +io::substrait::Expression * literal(double_t value) +{ + io::substrait::Expression * rel = new io::substrait::Expression(); + auto * literal = rel->mutable_literal(); + literal->set_fp64(value); + return rel; +} + +io::substrait::Expression * literal(int32_t value) +{ + io::substrait::Expression * rel = new io::substrait::Expression(); + auto * literal = rel->mutable_literal(); + literal->set_i32(value); + return rel; +} + +io::substrait::Expression * literal(std::string value) { + io::substrait::Expression * rel = new io::substrait::Expression(); + auto * literal = rel->mutable_literal(); + literal->set_string(value); + return rel; } } diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h index d74d13a22983..83f211ffeafe 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.h +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -6,31 +6,42 @@ namespace dbms { -enum CompareOperator { - LESS, - EQUAL, - GREATER + +enum Function +{ + IS_NOT_NULL=1, + GREATER_THAN_OR_EQUAL, + AND, + LESS_THAN_OR_EQUAL, + LESS_THAN, + MULTIPLY, + SUM }; + using SchemaPtr = io::substrait::Type_NamedStruct *; -using Filter = std::tuple; class SerializedPlanBuilder { public: SerializedPlanBuilder(); + SerializedPlanBuilder& registerSupportedFunctions() { + this->registerFunction(IS_NOT_NULL, "IS_NOT_NULL") + .registerFunction(GREATER_THAN_OR_EQUAL, "GREATER_THAN_OR_EQUAL") + .registerFunction(AND, "AND") + .registerFunction(LESS_THAN_OR_EQUAL, "LESS_THAN_OR_EQUAL") + .registerFunction(LESS_THAN, "LESS_THAN") + .registerFunction(MULTIPLY, "MULTIPLY") + .registerFunction(SUM, "SUM"); + return *this; + } SerializedPlanBuilder& registerFunction(int id, std::string name); SerializedPlanBuilder& filter(io::substrait::Expression* condition); + SerializedPlanBuilder& aggregate(std::vector keys, std::vector aggregates); SerializedPlanBuilder& read(std::string path, SchemaPtr schema); -// SerializedPlanBuilder& aggregate(); -// SerializedPlanBuilder& project(); std::unique_ptr build(); private: void setInputToPrev(io::substrait::Rel * input); - - std::vector filters; - std::string source; - SchemaPtr data_schema; io::substrait::Rel * prev_rel; std::unique_ptr plan; }; @@ -53,4 +64,18 @@ class SerializedSchemaBuilder { std::map nullability_map; SchemaPtr schema; }; + +using ExpressionList = std::vector; +using MeasureList = std::vector; + + +io::substrait::Expression * scalarFunction(int32_t id, ExpressionList args); +io::substrait::AggregateRel_Measure * measureFunction(int32_t id, ExpressionList args); + +io::substrait::Expression* literal(double_t value); +io::substrait::Expression* literal(int32_t value); +io::substrait::Expression* literal(std::string value); + +io::substrait::Expression * selection(int32_t field_id); + } diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 6f0974c4fc54..ec18ef99b11f 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -17,6 +16,10 @@ #include #include #include +#include +#include +#include +#include #include namespace substrait = io::substrait; @@ -86,7 +89,7 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptrmappings_size() > 0) { - for (auto mapping : plan->mappings()) + for (const auto& mapping : plan->mappings()) { if (mapping.has_function_mapping()) { @@ -121,7 +124,7 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parseOp(const io::substrait::Rel & const auto & filter = rel.filter(); DB::QueryPlanPtr query_plan = parseOp(filter.input()); std::string filter_name; - auto actions_dag = parseFunction(query_plan->getCurrentDataStream(), filter.condition(), filter_name); + auto actions_dag = parseFunction(query_plan->getCurrentDataStream(), filter.condition(), filter_name, nullptr, true); auto filter_step = std::make_unique(query_plan->getCurrentDataStream(), actions_dag, filter_name, true); query_plan->addStep(std::move(filter_step)); return query_plan; @@ -142,7 +145,7 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parseOp(const io::substrait::Rel & case substrait::Rel::RelTypeCase::kAggregate: { const auto & aggregate = rel.aggregate(); DB::QueryPlanPtr query_plan = parseOp(aggregate.input()); - auto aggregate_step = parseAggregate(aggregate); + auto aggregate_step = parseAggregate(*query_plan, aggregate); query_plan->addStep(std::move(aggregate_step)); return query_plan; } @@ -166,8 +169,65 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parseOp(const io::substrait::Rel & } } -DB::QueryPlanStepPtr dbms::SerializedPlanParser::parseAggregate(const io::substrait::AggregateRel & rel) +DB::AggregateFunctionPtr getAggregateFunction(const std::string & name, DB::DataTypes arg_types) { + auto & factory = DB::AggregateFunctionFactory::instance(); + DB::AggregateFunctionProperties properties; + return factory.get(name, arg_types, DB::Array{}, properties); +} + +DB::QueryPlanStepPtr dbms::SerializedPlanParser::parseAggregate(DB::QueryPlan & plan, const io::substrait::AggregateRel & rel) +{ + auto input = plan.getCurrentDataStream(); + DB::ActionsDAGPtr expression = std::make_shared(blockToNameAndTypeList(input.header)); + std::vector measure_names; + for (const auto& measure : rel.measures()) + { + assert(measure.measure().args_size() == 1 && "only support one argument aggregate function"); + auto arg = measure.measure().args(0); + if (arg.has_scalar_function()) { + std::string name; + parseFunction(input, arg, name, expression, true); + measure_names.emplace_back(name); + } + else if (arg.has_selection()) + { + auto name = input.header.getByPosition(arg.selection().direct_reference().struct_field().field()).name; + measure_names.emplace_back(name); + } + else + { + throw std::runtime_error("unsupported aggregate argument type."); + } + } + auto expression_before_aggregate = std::make_unique(input, expression); + plan.addStep(std::move(expression_before_aggregate)); + + // TODO need support grouping key + auto aggregates = DB::AggregateDescriptions(); + for (int i = 0; i < rel.measures_size(); ++i) + { + const auto& measure = rel.measures(i); + DB::AggregateDescription agg; + auto function_name = this->function_mapping.at(std::to_string(measure.measure().id().id())); + agg.column_name = function_name +"(" + measure_names.at(i) + ")"; + agg.arguments = DB::ColumnNumbers{plan.getCurrentDataStream().header.getPositionByName(measure_names.at(i))}; + agg.argument_names = DB::Names{measure_names.at(i)}; + agg.function = ::getAggregateFunction(function_name, {plan.getCurrentDataStream().header.getByName(measure_names.at(i)).type}); + aggregates.push_back(agg); + } + + auto aggregating_step = std::make_unique( + plan.getCurrentDataStream(), + this->getAggregateParam(plan.getCurrentDataStream().header, {}, aggregates), + true, + 1000000, + 1, + 1, + false, + nullptr, + DB::SortDescription()); + return aggregating_step; } DB::NamesAndTypesList dbms::SerializedPlanParser::blockToNameAndTypeList(const DB::Block & header) @@ -194,7 +254,7 @@ void join(DB::ActionsDAG::NodeRawConstPtrs v, char c, std::string & s) DB::ActionsDAGPtr dbms::SerializedPlanParser::parseFunction( - const DataStream & input, const io::substrait::Expression & rel, std::string & result_name, DB::ActionsDAGPtr actions_dag) + const DataStream & input, const io::substrait::Expression & rel, std::string & result_name, DB::ActionsDAGPtr actions_dag, bool keep_result) { assert(rel.has_scalar_function() && "the root of expression should be a scalar function"); const auto & scalar_function = rel.scalar_function(); @@ -222,7 +282,9 @@ DB::ActionsDAGPtr dbms::SerializedPlanParser::parseFunction( std::string args_name; join(args, ',', args_name); result_name = function_name + "(" + args_name + ")"; - actions_dag->addFunction(function_builder, args, result_name); + const auto* function_node = &actions_dag->addFunction(function_builder, args, result_name); + if (keep_result) + actions_dag->addOrReplaceInIndex(*function_node); return actions_dag; } @@ -236,21 +298,18 @@ const DB::ActionsDAG::Node * dbms::SerializedPlanParser::parseArgument(DB::Actio { case io::substrait::Expression_Literal::kFp64: { auto type = std::make_shared(); - auto const_node = action_dag->addInput(ColumnWithTypeAndName( + return &action_dag->addColumn(ColumnWithTypeAndName( type->createColumnConst(1, literal.fp64()), type, getUniqueName(std::to_string(literal.fp64())))); - return action_dag->tryFindInIndex(const_node.result_name); } case io::substrait::Expression_Literal::kString: { auto type = std::make_shared(); - auto const_node = action_dag->addInput( + return &action_dag->addColumn( ColumnWithTypeAndName(type->createColumnConst(1, literal.string()), type, getUniqueName(literal.string()))); - return action_dag->tryFindInIndex(const_node.result_name); } case io::substrait::Expression_Literal::kI32: { auto type = std::make_shared(); - auto const_node = action_dag->addInput(ColumnWithTypeAndName( + return &action_dag->addColumn(ColumnWithTypeAndName( type->createColumnConst(1, literal.i32()), type, getUniqueName(std::to_string(literal.i32())))); - return action_dag->tryFindInIndex(const_node.result_name); } default: throw std::runtime_error("unsupported constant type " + std::to_string(literal.literal_type_case())); @@ -276,6 +335,15 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::string & plan) plan_ptr->ParseFromString(plan); return parse(std::move(plan_ptr)); } +void dbms::SerializedPlanParser::initFunctionEnv() +{ + dbms::registerFunctions(); + dbms::registerAggregateFunctions(); +} +dbms::SerializedPlanParser::SerializedPlanParser(const DB::ContextPtr & context) : context(context) +{ +} +//dbms::ContextPtr dbms::SerializedPlanParser::context = dbms::Context::createGlobal(dbms::Context::createShared().get()); DB::Chunk DB::BatchParquetFileSource::generate() { while (!finished_generate) diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 043b38bacb45..915a305e5d73 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -64,12 +64,15 @@ static const std::map SCALAR_FUNCTIONS = { {"AND", "and"}, {"LESS_THAN_OR_EQUAL", "lessOrEquals"}, {"LESS_THAN", "less"}, - {"MULTIPLY", "multiply"} + {"MULTIPLY", "multiply"}, + {"SUM", "sum"} }; class SerializedPlanParser { public: + SerializedPlanParser(const ContextPtr & context); + static void initFunctionEnv(); DB::QueryPlanPtr parse(std::string& plan); DB::QueryPlanPtr parse(std::unique_ptr plan); @@ -79,15 +82,15 @@ class SerializedPlanParser private: static DB::NamesAndTypesList blockToNameAndTypeList(const DB::Block & header); DB::QueryPlanPtr parseOp(const io::substrait::Rel &rel); - DB::ActionsDAGPtr parseFunction(const DataStream & input, const io::substrait::Expression &rel, std::string & result_name, DB::ActionsDAGPtr actions_dag = nullptr); - DB::QueryPlanStepPtr parseAggregate(const io::substrait::AggregateRel &rel); + DB::ActionsDAGPtr parseFunction(const DataStream & input, const io::substrait::Expression &rel, std::string & result_name, DB::ActionsDAGPtr actions_dag = nullptr, bool keep_result = false); + DB::QueryPlanStepPtr parseAggregate(DB::QueryPlan & plan, const io::substrait::AggregateRel &rel); const DB::ActionsDAG::Node * parseArgument(DB::ActionsDAGPtr action_dag, const io::substrait::Expression &rel); std::string getUniqueName(std::string name) { return name + "_" + std::to_string(name_no++); } - Aggregator::Params getAggregateFunction(Block & header, ColumnNumbers & keys, AggregateDescriptions & aggregates) + Aggregator::Params getAggregateParam(const Block & header, const ColumnNumbers & keys, const AggregateDescriptions & aggregates) { Settings settings; return Aggregator::Params( @@ -101,7 +104,7 @@ class SerializedPlanParser settings.group_by_two_level_threshold_bytes, settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, - context->getTemporaryVolume(), + nullptr, settings.max_threads, settings.min_free_disk_space_for_temporary_data, settings.compile_aggregate_expressions, diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java index 387cc34bc6a0..1ed2e4e72ef6 100644 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java @@ -4,10 +4,12 @@ public class SparkRowInfo { public long[] offsets; public long[] lengths; public long memoryAddress; + public long column_number; - public SparkRowInfo(long[] offsets, long[] lengths, long memoryAddress) { + public SparkRowInfo(long[] offsets, long[] lengths, long memoryAddress, long column_number) { this.offsets = offsets; this.lengths = lengths; this.memoryAddress = memoryAddress; + this.column_number = column_number; } } diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index 38921ef462c8..25a5edfe02b8 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -71,7 +71,7 @@ jint JNI_OnLoad(JavaVM * vm, void * reserved) local_engine_executor_field_id = env->GetFieldID(local_engine_class, "nativeExecutor", "J"); spark_row_info_class = CreateGlobalClassReference(env, "Lio/kyligence/jni/engine/SparkRowInfo;"); - spark_row_info_constructor = env->GetMethodID(spark_row_info_class, "", "([J[JJ)V"); + spark_row_info_constructor = env->GetMethodID(spark_row_info_class, "", "([J[JJJ)V"); return JNI_VERSION_1_8; } @@ -107,7 +107,8 @@ void Java_io_kyligence_jni_engine_LocalEngine_execute(JNIEnv * env, jobject obj) jbyte * plan_address = env->GetByteArrayElements(*plan, nullptr); std::string plan_string; plan_string.assign(reinterpret_cast(plan_address), plan_size); - dbms::SerializedPlanParser parser; + auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); + dbms::SerializedPlanParser parser(context); auto query_plan = parser.parse(plan_string); dbms::LocalExecutor * executor = new dbms::LocalExecutor(); executor->execute(std::move(query_plan)); @@ -132,10 +133,11 @@ jobject Java_io_kyligence_jni_engine_LocalEngine_next(JNIEnv * env, jobject obj) const auto *lengths_src = reinterpret_cast(spark_row_info->getLengths().data()); env->SetLongArrayRegion(lengths_arr, 0, spark_row_info->getNumRows(), lengths_src); int64_t address = reinterpret_cast(spark_row_info->getBufferAddress()); + int64_t column_number = reinterpret_cast(spark_row_info->getNumCols()); jobject spark_row_info_object = env->NewObject( spark_row_info_class, spark_row_info_constructor, - offsets_arr, lengths_arr, address); + offsets_arr, lengths_arr, address, column_number); return spark_row_info_object; } diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 3bac592ac243..9a777d88405d 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -13,7 +13,7 @@ TEST(TestSelect, ReadRel) { dbms::SerializedSchemaBuilder schema_builder; - auto schema = schema_builder + auto* schema = schema_builder .column("sepal_length", "FP64") .column("sepal_width", "FP64") .column("petal_length", "FP64") @@ -21,7 +21,7 @@ TEST(TestSelect, ReadRel) .column("type", "I64").column("type_string", "String") .build(); dbms::SerializedPlanBuilder plan_builder; - auto plan = plan_builder.files( TEST_DATA(/data/iris.parquet), std::move(schema)).build(); + auto plan = plan_builder.read( TEST_DATA(/data/iris.parquet), std::move(schema)).build(); std::ofstream output; output.open(TEST_DATA(/../java/src/test/resources/plan.txt), std::fstream::in | std::fstream::out | std::fstream::trunc); @@ -34,7 +34,9 @@ TEST(TestSelect, ReadRel) ASSERT_EQ(plan->relations_size(), 1); std::cout << "start execute" <getNumRows(), block->rows()); } } - +bool inside_main=true; TEST(TestSelect, TestFilter) { - + dbms::SerializedSchemaBuilder schema_builder; + auto* schema = schema_builder + .column("sepal_length", "FP64") + .column("sepal_width", "FP64") + .column("petal_length", "FP64") + .column("petal_width", "FP64") + .column("type", "I64").column("type_string", "String") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto * mul_exp = dbms::scalarFunction(dbms::MULTIPLY, + {dbms::selection(3), + dbms::literal(0.8)}); + auto * less_exp = dbms::scalarFunction(dbms::LESS_THAN, { + mul_exp, + dbms::literal(5.0) + }); + + auto plan = plan_builder + .registerSupportedFunctions() + .filter(less_exp) + .read(TEST_DATA(/data/iris.parquet), std::move(schema)).build(); +// ASSERT_TRUE(plan->relations(0).has_read()); + ASSERT_EQ(plan->relations_size(), 1); + std::cout << "start execute" <getNumRows(), 99); + local_engine::SparkColumnToCHColumn converter; + auto block = converter.convertCHColumnToSparkRow(*spark_row_info, local_executor.getHeader()); + ASSERT_EQ(spark_row_info->getNumRows(), block->rows()); + } +} + +TEST(TestSelect, TestAgg) +{ + dbms::SerializedSchemaBuilder schema_builder; + auto* schema = schema_builder + .column("sepal_length", "FP64") + .column("sepal_width", "FP64") + .column("petal_length", "FP64") + .column("petal_width", "FP64") + .column("type", "I64").column("type_string", "String") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto * mul_exp = dbms::scalarFunction(dbms::MULTIPLY, + {dbms::selection(3), + dbms::literal(0.8)}); + auto * less_exp = dbms::scalarFunction(dbms::LESS_THAN, { + mul_exp, + dbms::literal(5.0) + }); + auto * mul_exp2 = dbms::scalarFunction(dbms::MULTIPLY, + {dbms::selection(3), + dbms::literal(1.1)}); + auto * measure = dbms::measureFunction(dbms::SUM, {dbms::selection(3)}); + auto plan = plan_builder + .registerSupportedFunctions() + .aggregate({}, {measure}) + .filter(less_exp) + .read(TEST_DATA(/data/iris.parquet), std::move(schema)).build(); + // ASSERT_TRUE(plan->relations(0).has_read()); + ASSERT_EQ(plan->relations_size(), 1); + std::cout << "start execute" <getNumRows(), 1); + ASSERT_EQ(spark_row_info->getNumCols(), 1); + local_engine::SparkColumnToCHColumn converter; + auto block = converter.convertCHColumnToSparkRow(*spark_row_info, local_executor.getHeader()); + ASSERT_EQ(spark_row_info->getNumRows(), block->rows()); + } } TEST(TestSelect, PerformanceTest) @@ -62,7 +152,7 @@ TEST(TestSelect, PerformanceTest) for (int i=0; i < 10; i++) { dbms::SerializedSchemaBuilder schema_builder; - auto schema = schema_builder + auto *schema = schema_builder .column("l_orderkey", "I64") .column("l_partkey", "I64") .column("l_suppkey", "I64") @@ -81,11 +171,12 @@ TEST(TestSelect, PerformanceTest) // .column("l_comment", "String") .build(); dbms::SerializedPlanBuilder plan_builder; - auto plan = plan_builder.files("/home/kyligence/Documents/intel-gazelle-test.snappy.parquet", std::move(schema)).build(); + auto plan = plan_builder.read("/home/kyligence/Documents/intel-gazelle-test.snappy.parquet", std::move(schema)).build(); ASSERT_TRUE(plan->relations(0).has_read()); ASSERT_EQ(plan->relations_size(), 1); - dbms::SerializedPlanParser parser; + auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); + dbms::SerializedPlanParser parser(context); auto query_plan = parser.parse(std::move(plan)); std::cout << "start execute" << std::endl; dbms::LocalExecutor local_executor; From 6ddfb923cfea3ffc4ec277cf5dd877d75931e6e9 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Mon, 27 Dec 2021 08:52:04 +0000 Subject: [PATCH 259/472] fix benchmark error --- .../tests/benchmark_local_engine.cpp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/utils/local-engine/tests/benchmark_local_engine.cpp b/utils/local-engine/tests/benchmark_local_engine.cpp index 08f5773f749f..cad1e67305fb 100644 --- a/utils/local-engine/tests/benchmark_local_engine.cpp +++ b/utils/local-engine/tests/benchmark_local_engine.cpp @@ -32,9 +32,10 @@ static void BM_CHColumnToSparkRow(benchmark::State& state) { // .column("l_comment", "String") .build(); dbms::SerializedPlanBuilder plan_builder; - auto plan = plan_builder.files("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); - - auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + auto plan = plan_builder.read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); + dbms::SerializedPlanParser parser(context); + auto query_plan = parser.parse(std::move(plan)); dbms::LocalExecutor local_executor; state.ResumeTiming(); local_executor.execute(std::move(query_plan)); @@ -68,9 +69,10 @@ static void BM_CHColumnToSparkRowWithString(benchmark::State& state) { .column("l_comment", "String") .build(); dbms::SerializedPlanBuilder plan_builder; - auto plan = plan_builder.files("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); - - auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + auto plan = plan_builder.read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); + dbms::SerializedPlanParser parser(context); + auto query_plan = parser.parse(std::move(plan)); dbms::LocalExecutor local_executor; state.ResumeTiming(); local_executor.execute(std::move(query_plan)); @@ -104,9 +106,11 @@ static void BM_SparkRowToCHColumn(benchmark::State& state) { // .column("l_comment", "String") .build(); dbms::SerializedPlanBuilder plan_builder; - auto plan = plan_builder.files("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + auto plan = plan_builder.read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); - auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); + dbms::SerializedPlanParser parser(context); + auto query_plan = parser.parse(std::move(plan)); dbms::LocalExecutor local_executor; local_executor.execute(std::move(query_plan)); From 643dd25453cf486e643ee01e077b9fb022509a60 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Mon, 27 Dec 2021 10:35:41 +0000 Subject: [PATCH 260/472] add agg benchmark --- .../Builder/SerializedPlanBuilder.h | 2 +- .../Parser/SerializedPlanParser.cpp | 2 +- .../Parser/SerializedPlanParser.h | 3 + utils/local-engine/local_engine_jni.cpp | 3 +- .../tests/benchmark_local_engine.cpp | 73 +++++++++++++++---- .../local-engine/tests/gtest_local_engine.cpp | 10 +-- 6 files changed, 70 insertions(+), 23 deletions(-) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h index 83f211ffeafe..917e142e88df 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.h +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -42,7 +42,7 @@ class SerializedPlanBuilder private: void setInputToPrev(io::substrait::Rel * input); - io::substrait::Rel * prev_rel; + io::substrait::Rel * prev_rel = nullptr; std::unique_ptr plan; }; diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index ec18ef99b11f..e8f8aa5f9603 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -343,7 +343,7 @@ void dbms::SerializedPlanParser::initFunctionEnv() dbms::SerializedPlanParser::SerializedPlanParser(const DB::ContextPtr & context) : context(context) { } -//dbms::ContextPtr dbms::SerializedPlanParser::context = dbms::Context::createGlobal(dbms::Context::createShared().get()); +dbms::ContextPtr dbms::SerializedPlanParser::global_context = dbms::Context::createGlobal(dbms::Context::createShared().get()); DB::Chunk DB::BatchParquetFileSource::generate() { while (!finished_generate) diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 915a305e5d73..f19a4927fbb4 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -79,6 +79,8 @@ class SerializedPlanParser DB::BatchParquetFileSourcePtr parseReadRealWithLocalFile(const io::substrait::ReadRel& rel); DB::Block parseNameStruct(const io::substrait::Type_NamedStruct& struct_); DB::DataTypePtr parseType(const io::substrait::Type& type); + + static ContextPtr global_context; private: static DB::NamesAndTypesList blockToNameAndTypeList(const DB::Block & header); DB::QueryPlanPtr parseOp(const io::substrait::Rel &rel); @@ -115,6 +117,7 @@ class SerializedPlanParser int name_no = 0; std::unordered_map function_mapping; ContextPtr context; + // DB::QueryPlanPtr query_plan; }; diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index 25a5edfe02b8..00a3f374e238 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -107,8 +107,7 @@ void Java_io_kyligence_jni_engine_LocalEngine_execute(JNIEnv * env, jobject obj) jbyte * plan_address = env->GetByteArrayElements(*plan, nullptr); std::string plan_string; plan_string.assign(reinterpret_cast(plan_address), plan_size); - auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); - dbms::SerializedPlanParser parser(context); + dbms::SerializedPlanParser parser(dbms::SerializedPlanParser::global_context); auto query_plan = parser.parse(plan_string); dbms::LocalExecutor * executor = new dbms::LocalExecutor(); executor->execute(std::move(query_plan)); diff --git a/utils/local-engine/tests/benchmark_local_engine.cpp b/utils/local-engine/tests/benchmark_local_engine.cpp index cad1e67305fb..85ea39bd0dde 100644 --- a/utils/local-engine/tests/benchmark_local_engine.cpp +++ b/utils/local-engine/tests/benchmark_local_engine.cpp @@ -8,6 +8,10 @@ #include #include +using namespace dbms; + +bool inside_main=true; + // Define another benchmark static void BM_CHColumnToSparkRow(benchmark::State& state) { for (auto _: state) @@ -33,8 +37,45 @@ static void BM_CHColumnToSparkRow(benchmark::State& state) { .build(); dbms::SerializedPlanBuilder plan_builder; auto plan = plan_builder.read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); - auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); - dbms::SerializedPlanParser parser(context); + + dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); + auto query_plan = parser.parse(std::move(plan)); + dbms::LocalExecutor local_executor; + state.ResumeTiming(); + local_executor.execute(std::move(query_plan)); + while (local_executor.hasNext()) + { + local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); + } + } +} + +static void BM_SimpleAggregate(benchmark::State& state) { + for (auto _: state) + { + state.PauseTiming(); + dbms::SerializedSchemaBuilder schema_builder; + auto schema = schema_builder.column("l_orderkey", "I64") + .column("l_partkey", "I64") + .column("l_suppkey", "I64") + .column("l_linenumber", "I32") + .column("l_quantity", "FP64") + .column("l_extendedprice", "FP64") + .column("l_discount", "FP64") + .column("l_tax", "FP64") + // .column("l_returnflag", "String") + // .column("l_linestatus", "String") + .column("l_shipdate_new", "FP64") + .column("l_commitdate_new", "FP64") + .column("l_receiptdate_new", "FP64") + // .column("l_shipinstruct", "String") + // .column("l_shipmode", "String") + // .column("l_comment", "String") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto * measure = dbms::measureFunction(dbms::SUM, {dbms::selection(5)}); + auto plan = plan_builder.registerSupportedFunctions().aggregate({}, {measure}).read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); auto query_plan = parser.parse(std::move(plan)); dbms::LocalExecutor local_executor; state.ResumeTiming(); @@ -70,8 +111,7 @@ static void BM_CHColumnToSparkRowWithString(benchmark::State& state) { .build(); dbms::SerializedPlanBuilder plan_builder; auto plan = plan_builder.read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); - auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); - dbms::SerializedPlanParser parser(context); + dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); auto query_plan = parser.parse(std::move(plan)); dbms::LocalExecutor local_executor; state.ResumeTiming(); @@ -108,8 +148,7 @@ static void BM_SparkRowToCHColumn(benchmark::State& state) { dbms::SerializedPlanBuilder plan_builder; auto plan = plan_builder.read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); - auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); - dbms::SerializedPlanParser parser(context); + dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); auto query_plan = parser.parse(std::move(plan)); dbms::LocalExecutor local_executor; @@ -150,9 +189,9 @@ static void BM_SparkRowToCHColumnWithString(benchmark::State& state) { .column("l_comment", "String") .build(); dbms::SerializedPlanBuilder plan_builder; - auto plan = plan_builder.files("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); - - auto query_plan = dbms::SerializedPlanParser::parse(std::move(plan)); + auto plan = plan_builder.read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); + auto query_plan = parser.parse(std::move(plan)); dbms::LocalExecutor local_executor; local_executor.execute(std::move(query_plan)); @@ -169,7 +208,15 @@ static void BM_SparkRowToCHColumnWithString(benchmark::State& state) { } BENCHMARK(BM_CHColumnToSparkRow)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); -BENCHMARK(BM_CHColumnToSparkRowWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); -BENCHMARK(BM_SparkRowToCHColumn)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); -BENCHMARK(BM_SparkRowToCHColumnWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); -BENCHMARK_MAIN(); +//BENCHMARK(BM_SimpleAggregate)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +//BENCHMARK(BM_CHColumnToSparkRowWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +//BENCHMARK(BM_SparkRowToCHColumn)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +//BENCHMARK(BM_SparkRowToCHColumnWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +int main(int argc, char** argv) { + dbms::SerializedPlanParser::initFunctionEnv(); + ::benchmark::Initialize(&argc, argv); + if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); + return 0; +} diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 9a777d88405d..6d13cfb5f180 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -9,6 +9,7 @@ #include #include +using namespace dbms; TEST(TestSelect, ReadRel) { @@ -79,8 +80,7 @@ TEST(TestSelect, TestFilter) std::cout << "start execute" <relations(0).has_read()); ASSERT_EQ(plan->relations_size(), 1); - auto context = dbms::Context::createGlobal(dbms::Context::createShared().get()); - dbms::SerializedPlanParser parser(context); + dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); auto query_plan = parser.parse(std::move(plan)); std::cout << "start execute" << std::endl; dbms::LocalExecutor local_executor; From 1cbe87e626752775a48a1d74980287308d21d7b3 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Tue, 28 Dec 2021 03:14:40 +0000 Subject: [PATCH 261/472] support Q6 benchmark --- .../Builder/SerializedPlanBuilder.cpp | 11 +++ .../Builder/SerializedPlanBuilder.h | 1 + .../Parser/SerializedPlanParser.cpp | 23 +++++-- .../io/kyligence/jni/engine/SparkRowInfo.java | 6 +- .../tests/benchmark_local_engine.cpp | 69 ++++++++++++++++++- 5 files changed, 102 insertions(+), 8 deletions(-) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp index b350282aa13b..631ce8d07b3b 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.cpp +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -147,6 +147,17 @@ SerializedPlanBuilder & SerializedPlanBuilder::aggregate(std::vector ke this->prev_rel = rel; return *this; } +SerializedPlanBuilder & SerializedPlanBuilder::project(std::vector projections) +{ + io::substrait::Rel * project = new io::substrait::Rel(); + for (auto * expr : projections) + { + project->mutable_project()->mutable_expressions()->AddAllocated(expr); + } + setInputToPrev(project); + this->prev_rel = project; + return *this; +} io::substrait::Expression * selection(int32_t field_id) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h index 917e142e88df..ff2153384530 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.h +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -36,6 +36,7 @@ class SerializedPlanBuilder } SerializedPlanBuilder& registerFunction(int id, std::string name); SerializedPlanBuilder& filter(io::substrait::Expression* condition); + SerializedPlanBuilder& project(std::vector projections); SerializedPlanBuilder& aggregate(std::vector keys, std::vector aggregates); SerializedPlanBuilder& read(std::string path, SchemaPtr schema); std::unique_ptr build(); diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index e8f8aa5f9603..b4571d2b83e4 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -80,6 +80,10 @@ DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type { return factory.get("Float64"); } + else if (type.has_date()) + { + return factory.get("Date"); + } else { throw std::runtime_error("doesn't support type " + type.DebugString()); @@ -133,13 +137,24 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parseOp(const io::substrait::Rel & const auto & project = rel.project(); DB::QueryPlanPtr query_plan = parseOp(project.input()); const auto & expressions = project.expressions(); + auto actions_dag = std::make_shared(blockToNameAndTypeList(query_plan->getCurrentDataStream().header)); + DB::NamesWithAliases required_columns; for (const auto & expr : expressions) { - std::string result_name; - auto expression_step = std::make_unique( - query_plan->getCurrentDataStream(), parseFunction(query_plan->getCurrentDataStream(), expr, result_name)); - query_plan->addStep(std::move(expression_step)); + if (expr.has_selection()) + { + const auto * field = actions_dag->getInputs()[expr.selection().direct_reference().struct_field().field() - 1]; + required_columns.emplace_back(DB::NameWithAlias (field->result_name, field->result_name)); + } + else + { + throw std::runtime_error("unsupported projection type"); + } } + actions_dag->project(required_columns); + auto expression_step = std::make_unique( + query_plan->getCurrentDataStream(), actions_dag); + query_plan->addStep(std::move(expression_step)); return query_plan; } case substrait::Rel::RelTypeCase::kAggregate: { diff --git a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java index 1ed2e4e72ef6..d6e2ccf7a9aa 100644 --- a/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java +++ b/utils/local-engine/java/src/main/java/io/kyligence/jni/engine/SparkRowInfo.java @@ -4,12 +4,12 @@ public class SparkRowInfo { public long[] offsets; public long[] lengths; public long memoryAddress; - public long column_number; + public long fieldsNum; - public SparkRowInfo(long[] offsets, long[] lengths, long memoryAddress, long column_number) { + public SparkRowInfo(long[] offsets, long[] lengths, long memoryAddress, long fieldsNum) { this.offsets = offsets; this.lengths = lengths; this.memoryAddress = memoryAddress; - this.column_number = column_number; + this.fieldsNum = fieldsNum; } } diff --git a/utils/local-engine/tests/benchmark_local_engine.cpp b/utils/local-engine/tests/benchmark_local_engine.cpp index 85ea39bd0dde..01d589781a8d 100644 --- a/utils/local-engine/tests/benchmark_local_engine.cpp +++ b/utils/local-engine/tests/benchmark_local_engine.cpp @@ -87,6 +87,72 @@ static void BM_SimpleAggregate(benchmark::State& state) { } } +static void BM_TPCH_Q6(benchmark::State& state) { + for (auto _: state) + { + state.PauseTiming(); + dbms::SerializedSchemaBuilder schema_builder; + auto schema = schema_builder +// .column("l_orderkey", "I64") +// .column("l_partkey", "I64") +// .column("l_suppkey", "I64") +// .column("l_linenumber", "I32") + .column("l_quantity", "FP64") + .column("l_extendedprice", "FP64") + .column("l_discount", "FP64") +// .column("l_tax", "FP64") + // .column("l_returnflag", "String") + // .column("l_linestatus", "String") + .column("l_shipdate_new", "FP64") +// .column("l_commitdate_new", "FP64") +// .column("l_receiptdate_new", "FP64") + // .column("l_shipinstruct", "String") + // .column("l_shipmode", "String") + // .column("l_comment", "String") + .build(); + dbms::SerializedPlanBuilder plan_builder; + auto *agg_mul = dbms::scalarFunction(dbms::MULTIPLY, {dbms::selection(2), dbms::selection(3)}); + auto * measure1 = dbms::measureFunction(dbms::SUM, {agg_mul}); + auto * measure2 = dbms::measureFunction(dbms::SUM, {dbms::selection(2)}); + auto * measure3 = dbms::measureFunction(dbms::SUM, {dbms::selection(1)}); + auto plan = plan_builder.registerSupportedFunctions() + .aggregate({}, {measure1, measure2, measure3}) + .project({dbms::selection(1), dbms::selection(2), dbms::selection(3)}) + .filter(dbms::scalarFunction(dbms::AND, { + dbms::scalarFunction(AND, { + dbms::scalarFunction(AND, { + dbms::scalarFunction(AND, { + dbms::scalarFunction(AND, { + dbms::scalarFunction(AND, { + dbms::scalarFunction(AND, { + scalarFunction(IS_NOT_NULL, {selection(4)}), + scalarFunction(IS_NOT_NULL, {selection(3)}) + }), + scalarFunction(IS_NOT_NULL, {selection(1)}) + }), + dbms::scalarFunction(GREATER_THAN_OR_EQUAL, {selection(4), literal(8766.0)}) + }), + scalarFunction(LESS_THAN, {selection(4), literal(9131.0)}) + }), + scalarFunction(GREATER_THAN_OR_EQUAL, {selection(3), literal(0.05)}) + }), + scalarFunction(LESS_THAN_OR_EQUAL, {selection(3), literal(0.07)}) + }), + scalarFunction(LESS_THAN, {selection(1), literal(24.0)}) + })) + .read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); + dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); + auto query_plan = parser.parse(std::move(plan)); + dbms::LocalExecutor local_executor; + state.ResumeTiming(); + local_executor.execute(std::move(query_plan)); + while (local_executor.hasNext()) + { + local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); + } + } +} + static void BM_CHColumnToSparkRowWithString(benchmark::State& state) { for (auto _: state) { @@ -207,8 +273,9 @@ static void BM_SparkRowToCHColumnWithString(benchmark::State& state) { } } -BENCHMARK(BM_CHColumnToSparkRow)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +//BENCHMARK(BM_CHColumnToSparkRow)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_SimpleAggregate)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK(BM_TPCH_Q6)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_CHColumnToSparkRowWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_SparkRowToCHColumn)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_SparkRowToCHColumnWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); From bdca24736209df352a0ca93b0a541ca294269d7f Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Tue, 28 Dec 2021 03:49:37 +0000 Subject: [PATCH 262/472] fix ut errors --- .../tests/benchmark_local_engine.cpp | 2 +- .../local-engine/tests/gtest_local_engine.cpp | 62 +------------------ 2 files changed, 3 insertions(+), 61 deletions(-) diff --git a/utils/local-engine/tests/benchmark_local_engine.cpp b/utils/local-engine/tests/benchmark_local_engine.cpp index 01d589781a8d..d658528b1a2a 100644 --- a/utils/local-engine/tests/benchmark_local_engine.cpp +++ b/utils/local-engine/tests/benchmark_local_engine.cpp @@ -274,7 +274,7 @@ static void BM_SparkRowToCHColumnWithString(benchmark::State& state) { } //BENCHMARK(BM_CHColumnToSparkRow)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); -//BENCHMARK(BM_SimpleAggregate)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK(BM_SimpleAggregate)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); BENCHMARK(BM_TPCH_Q6)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_CHColumnToSparkRowWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_SparkRowToCHColumn)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 6d13cfb5f180..3a29165f8e3f 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -35,9 +35,7 @@ TEST(TestSelect, ReadRel) ASSERT_EQ(plan->relations_size(), 1); std::cout << "start execute" <relations_size(), 1); std::cout << "start execute" <relations_size(), 1); std::cout << "start execute" <relations(0).has_read()); - ASSERT_EQ(plan->relations_size(), 1); - dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); - auto query_plan = parser.parse(std::move(plan)); - std::cout << "start execute" << std::endl; - dbms::LocalExecutor local_executor; - - local_executor.execute(std::move(query_plan)); - ASSERT_TRUE(local_executor.hasNext()); - while (local_executor.hasNext()) - { - local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); - ASSERT_GT(spark_row_info->getNumRows(), 0); - std::cout << "fetch batch" << spark_row_info->getNumRows() << " rows" - << "" - << "" << std::endl; - } - } - auto duration = stopwatch.elapsedMilliseconds(); - std::cout <<"duration:" << duration << std::endl; -} - TEST(TestSelect, MergeTreeWriteTest) { // DB::StorageID id("default", "test"); @@ -204,15 +149,12 @@ TEST(TestSelect, MergeTreeWriteTest) // auto merging_params = DB::MergeTreeData::MergingParams(); // auto storage_setting = std::make_unique(); - Poco::URI uri("hdfs://clusterB/test.txt"); - std::cout << uri.toString() << std::endl; - ASSERT_GT("clusterB", uri.getHost()); - // DB::MergeTreeData(id, relative_path, storage_in_memory_metadata, global, "", merging_params, std::move(storage_setting), false, false, nullptr); } int main(int argc, char **argv) { + dbms::SerializedPlanParser::initFunctionEnv(); ::testing::InitGoogleTest(&argc,argv); return RUN_ALL_TESTS(); } From 387e53bc4e760c54206e7775436666b0fc174e3a Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Tue, 28 Dec 2021 06:06:14 +0000 Subject: [PATCH 263/472] support type date --- .../Builder/SerializedPlanBuilder.cpp | 14 ++++++++ .../Builder/SerializedPlanBuilder.h | 7 ++-- .../Parser/SerializedPlanParser.cpp | 32 +++++++++++++------ .../Parser/SerializedPlanParser.h | 3 +- .../tests/benchmark_local_engine.cpp | 8 ++--- 5 files changed, 48 insertions(+), 16 deletions(-) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp index 631ce8d07b3b..9d0a773f3f0e 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.cpp +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -56,6 +56,12 @@ SchemaPtr SerializedSchemaBuilder::build() t->mutable_fp64()->set_nullability( this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); } + else if (type == "Date") + { + auto * t = type_struct->mutable_types()->Add(); + t->mutable_date()->set_nullability( + this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + } else { throw "doesn't support type " + type; @@ -206,4 +212,12 @@ io::substrait::Expression * literal(std::string value) literal->set_string(value); return rel; } + +io::substrait::Expression* literalDate(int32_t value) +{ + io::substrait::Expression * rel = new io::substrait::Expression(); + auto * literal = rel->mutable_literal(); + literal->set_date(value); + return rel; +} } diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h index ff2153384530..0164ac37f752 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.h +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -15,7 +15,8 @@ enum Function LESS_THAN_OR_EQUAL, LESS_THAN, MULTIPLY, - SUM + SUM, + TO_DATE }; using SchemaPtr = io::substrait::Type_NamedStruct *; @@ -31,7 +32,8 @@ class SerializedPlanBuilder .registerFunction(LESS_THAN_OR_EQUAL, "LESS_THAN_OR_EQUAL") .registerFunction(LESS_THAN, "LESS_THAN") .registerFunction(MULTIPLY, "MULTIPLY") - .registerFunction(SUM, "SUM"); + .registerFunction(SUM, "SUM") + .registerFunction(TO_DATE, "TO_DATE"); return *this; } SerializedPlanBuilder& registerFunction(int id, std::string name); @@ -76,6 +78,7 @@ io::substrait::AggregateRel_Measure * measureFunction(int32_t id, ExpressionList io::substrait::Expression* literal(double_t value); io::substrait::Expression* literal(int32_t value); io::substrait::Expression* literal(std::string value); +io::substrait::Expression* literalDate(int32_t value); io::substrait::Expression * selection(int32_t field_id); diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index b4571d2b83e4..7a931c828d2b 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -1,12 +1,17 @@ #include "SerializedPlanParser.h" +#include +#include #include #include #include +#include #include #include #include +#include #include #include +#include #include #include #include @@ -16,10 +21,7 @@ #include #include #include -#include -#include -#include -#include +#include #include namespace substrait = io::substrait; @@ -307,30 +309,42 @@ const DB::ActionsDAG::Node * dbms::SerializedPlanParser::parseArgument(DB::Actio { switch (rel.rex_type_case()) { - case io::substrait::Expression::RexTypeCase::kLiteral: { + case io::substrait::Expression::RexTypeCase::kLiteral: + { const auto & literal = rel.literal(); switch (literal.literal_type_case()) { - case io::substrait::Expression_Literal::kFp64: { + case io::substrait::Expression_Literal::kFp64: + { auto type = std::make_shared(); return &action_dag->addColumn(ColumnWithTypeAndName( type->createColumnConst(1, literal.fp64()), type, getUniqueName(std::to_string(literal.fp64())))); } - case io::substrait::Expression_Literal::kString: { + case io::substrait::Expression_Literal::kString: + { auto type = std::make_shared(); return &action_dag->addColumn( ColumnWithTypeAndName(type->createColumnConst(1, literal.string()), type, getUniqueName(literal.string()))); } - case io::substrait::Expression_Literal::kI32: { + case io::substrait::Expression_Literal::kI32: + { auto type = std::make_shared(); return &action_dag->addColumn(ColumnWithTypeAndName( type->createColumnConst(1, literal.i32()), type, getUniqueName(std::to_string(literal.i32())))); } + case io::substrait::Expression_Literal::kDate: + { + + auto type = std::make_shared(); + return &action_dag->addColumn(ColumnWithTypeAndName( + type->createColumnConst(1, literal.date()), type, getUniqueName(std::to_string(literal.date())))); + } default: throw std::runtime_error("unsupported constant type " + std::to_string(literal.literal_type_case())); } } - case io::substrait::Expression::RexTypeCase::kSelection: { + case io::substrait::Expression::RexTypeCase::kSelection: + { if (!rel.selection().has_direct_reference() || !rel.selection().direct_reference().has_struct_field()) { throw std::runtime_error("Can only have direct struct references in selections"); diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index f19a4927fbb4..620730dc3a48 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -65,7 +65,8 @@ static const std::map SCALAR_FUNCTIONS = { {"LESS_THAN_OR_EQUAL", "lessOrEquals"}, {"LESS_THAN", "less"}, {"MULTIPLY", "multiply"}, - {"SUM", "sum"} + {"SUM", "sum"}, + {"TO_DATE", "toDate"} }; class SerializedPlanParser diff --git a/utils/local-engine/tests/benchmark_local_engine.cpp b/utils/local-engine/tests/benchmark_local_engine.cpp index d658528b1a2a..7eea9b92c200 100644 --- a/utils/local-engine/tests/benchmark_local_engine.cpp +++ b/utils/local-engine/tests/benchmark_local_engine.cpp @@ -103,7 +103,7 @@ static void BM_TPCH_Q6(benchmark::State& state) { // .column("l_tax", "FP64") // .column("l_returnflag", "String") // .column("l_linestatus", "String") - .column("l_shipdate_new", "FP64") + .column("l_shipdate_new", "Date") // .column("l_commitdate_new", "FP64") // .column("l_receiptdate_new", "FP64") // .column("l_shipinstruct", "String") @@ -130,9 +130,9 @@ static void BM_TPCH_Q6(benchmark::State& state) { }), scalarFunction(IS_NOT_NULL, {selection(1)}) }), - dbms::scalarFunction(GREATER_THAN_OR_EQUAL, {selection(4), literal(8766.0)}) + dbms::scalarFunction(GREATER_THAN_OR_EQUAL, {selection(4), literalDate(8766)}) }), - scalarFunction(LESS_THAN, {selection(4), literal(9131.0)}) + scalarFunction(LESS_THAN, {selection(4), literalDate(9131)}) }), scalarFunction(GREATER_THAN_OR_EQUAL, {selection(3), literal(0.05)}) }), @@ -274,7 +274,7 @@ static void BM_SparkRowToCHColumnWithString(benchmark::State& state) { } //BENCHMARK(BM_CHColumnToSparkRow)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); -BENCHMARK(BM_SimpleAggregate)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); +//BENCHMARK(BM_SimpleAggregate)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); BENCHMARK(BM_TPCH_Q6)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_CHColumnToSparkRowWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_SparkRowToCHColumn)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); From 45ebf02b827cdca35151be2928a4be8b05c833ec Mon Sep 17 00:00:00 2001 From: Neng Liu Date: Wed, 29 Dec 2021 10:06:43 +0800 Subject: [PATCH 264/472] upgrade substrait --- .../local-engine/Substrait/capabilities.proto | 28 + utils/local-engine/Substrait/expression.proto | 484 ++++++++++++------ utils/local-engine/Substrait/extensions.proto | 127 ++--- .../Substrait/extensions/extensions.proto | 81 +++ utils/local-engine/Substrait/function.proto | 214 ++++---- .../Substrait/parameterized_types.proto | 211 ++++---- utils/local-engine/Substrait/plan.proto | 40 +- utils/local-engine/Substrait/relations.proto | 303 ++++++----- utils/local-engine/Substrait/selection.proto | 114 ----- utils/local-engine/Substrait/type.proto | 359 ++++++------- .../Substrait/type_expressions.proto | 260 +++++----- 11 files changed, 1237 insertions(+), 984 deletions(-) create mode 100644 utils/local-engine/Substrait/capabilities.proto create mode 100644 utils/local-engine/Substrait/extensions/extensions.proto delete mode 100644 utils/local-engine/Substrait/selection.proto diff --git a/utils/local-engine/Substrait/capabilities.proto b/utils/local-engine/Substrait/capabilities.proto new file mode 100644 index 000000000000..711f0ccbba7d --- /dev/null +++ b/utils/local-engine/Substrait/capabilities.proto @@ -0,0 +1,28 @@ +syntax = "proto3"; + +package substrait; + +option java_multiple_files = true; +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; + +// Defines a set of Capabilities that a system (producer or consumer) supports. +message Capabilities { + + // List of Substrait versions this system supports + repeated string substrait_versions = 1; + + // list of com.google.Any message types this system supports for advanced + // extensions. + repeated string advanced_extension_type_urls = 2; + + // list of simple extensions this system supports. + repeated SimpleExtension simple_extensions = 3; + + message SimpleExtension { + string uri = 1; + repeated string function_keys = 2; + repeated string type_keys = 3; + repeated string type_variation_keys = 4; + } +} diff --git a/utils/local-engine/Substrait/expression.proto b/utils/local-engine/Substrait/expression.proto index 2744991a9b82..02693a3addb0 100644 --- a/utils/local-engine/Substrait/expression.proto +++ b/utils/local-engine/Substrait/expression.proto @@ -1,226 +1,372 @@ syntax = "proto3"; -package io.substrait; +package substrait; -import "type.proto"; -import "selection.proto"; -import "extensions.proto"; +import "substrait/type.proto"; option java_multiple_files = true; +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; message Expression { - oneof rex_type { - Literal literal = 1; - FieldReference selection = 2; - ScalarFunction scalar_function = 3; - WindowFunction window_function = 5; - IfThen if_then = 6; - SwitchExpression switch_expression = 7; - SingularOrList singular_or_list = 8; - MultiOrList multi_or_list = 9; - Enum enum = 10; - } - - message Enum { - oneof enum_kind { - string specified = 1; - Empty unspecified = 2; - } + oneof rex_type { + Literal literal = 1; + FieldReference selection = 2; + ScalarFunction scalar_function = 3; + WindowFunction window_function = 5; + IfThen if_then = 6; + SwitchExpression switch_expression = 7; + SingularOrList singular_or_list = 8; + MultiOrList multi_or_list = 9; + Enum enum = 10; + Cast cast = 11; + } + + message Enum { + oneof enum_kind { + string specified = 1; + Empty unspecified = 2; + } - message Empty {} - } - - message Literal { - oneof literal_type { - bool boolean = 1; - int32 i8 = 2; - int32 i16 = 3; - int32 i32 = 5; - int64 i64 = 7; - float fp32 = 10; - double fp64 = 11; - string string = 12; - bytes binary = 13; - fixed64 timestamp = 14; - fixed32 date = 16; - uint64 time = 17; - IntervalYearToMonth interval_year_to_month = 19; - IntervalDayToSecond interval_day_to_second = 20; - string fixed_char = 21; - string var_char = 22; - bytes fixed_binary = 23; - bytes decimal = 24; - Struct struct = 25; - Map map = 26; - fixed64 timestamp_tz = 27; - bytes uuid = 28; - Type null = 29; // a typed null literal - List list = 30; - } + message Empty {} + } + + message Literal { + oneof literal_type { + bool boolean = 1; + int32 i8 = 2; + int32 i16 = 3; + int32 i32 = 5; + int64 i64 = 7; + float fp32 = 10; + double fp64 = 11; + string string = 12; + bytes binary = 13; + // Timestamp in units of microseconds since the UNIX epoch. + int64 timestamp = 14; + // Date in units of days since the UNIX epoch. + int32 date = 16; + // Time in units of microseconds past midnight + int64 time = 17; + IntervalYearToMonth interval_year_to_month = 19; + IntervalDayToSecond interval_day_to_second = 20; + string fixed_char = 21; + VarChar var_char = 22; + bytes fixed_binary = 23; + Decimal decimal = 24; + Struct struct = 25; + Map map = 26; + // Timestamp in units of microseconds since the UNIX epoch. + int64 timestamp_tz = 27; + bytes uuid = 28; + Type null = 29; // a typed null literal + List list = 30; + Type.List empty_list = 31; + Type.Map empty_map = 32; + } - message Map { - message KeyValue { - Literal key = 1; - Literal value = 2; - } + // whether the literal type should be treated as a nullable type. Applies to + // all members of union other than the Typed null (which should directly + // declare nullability). + bool nullable = 50; - repeated KeyValue key_values = 1; - } + message VarChar { + string value = 1; + uint32 length = 2; + } - message IntervalYearToMonth { - int32 years = 1; - int32 months = 2; - } + message Decimal { + // little-endian twos-complement integer representation of complete value + // (ignoring precision) Always 16 bytes in length + bytes value = 1; + // The maximum number of digits allowed in the value. + // the maximum precision is 38. + int32 precision = 2; + // declared scale of decimal literal + int32 scale = 3; + } - message IntervalDayToSecond { - int32 days = 1; - int32 seconds = 2; - } + message Map { + message KeyValue { + Literal key = 1; + Literal value = 2; + } - message Struct { - // A possibly heterogeneously typed list of literals - repeated Literal fields = 1; - } + repeated KeyValue key_values = 1; + } - message List { - // A homogeneously typed list of literals - repeated Literal values = 1; - } + message IntervalYearToMonth { + int32 years = 1; + int32 months = 2; } - message ScalarFunction { - Extensions.FunctionId id = 1; - repeated Expression args = 2; - Type output_type = 3; + message IntervalDayToSecond { + int32 days = 1; + int32 seconds = 2; } - message AggregateFunction { - Extensions.FunctionId id = 1; - repeated Expression args = 2; - repeated SortField sorts = 3; - AggregationPhase phase = 4; - Type output_type = 5; + message Struct { + // A possibly heterogeneously typed list of literals + repeated Literal fields = 1; } - enum AggregationPhase { - UNKNOWN = 0; - INITIAL_TO_INTERMEDIATE = 1; - INTERMEDIATE_TO_INTERMEDIATE = 2; - INITIAL_TO_RESULT = 3; - INTERMEDIATE_TO_RESULT = 4; + message List { + // A homogeneously typed list of literals + repeated Literal values = 1; + } + } + + message ScalarFunction { + // points to a function_anchor defined in this plan + uint32 function_reference = 1; + repeated Expression args = 2; + Type output_type = 3; + } + + message WindowFunction { + // points to a function_anchor defined in this plan + uint32 function_reference = 1; + repeated Expression partitions = 2; + repeated SortField sorts = 3; + Bound upper_bound = 4; + Bound lower_bound = 5; + AggregationPhase phase = 6; + Type output_type = 7; + repeated Expression args = 8; + + message Bound { + + message Preceding { int64 offset = 1; } + + message Following { int64 offset = 1; } + + message CurrentRow {} + + message Unbounded {} + + oneof kind { + Preceding preceding = 1; + Following following = 2; + CurrentRow current_row = 3; + Unbounded unbounded = 4; + } } + } + message IfThen { - message WindowFunction { - Extensions.FunctionId id = 1; - repeated Expression partitions = 2; - repeated SortField sorts = 3; - Bound upper_bound = 4; - Bound lower_bound = 5; - AggregationPhase phase = 6; - Type output_type = 7; - repeated Expression args = 8; + repeated IfClause ifs = 1; + Expression else = 2; - message Bound { + message IfClause { + Expression if = 1; + Expression then = 2; + } + } - message Preceding { - int64 offset = 1; - } + message Cast { + Type type = 1; + Expression input = 2; + } - message Following { - int64 offset = 1; - } + message SwitchExpression { + repeated IfValue ifs = 1; + Expression else = 2; - message CurrentRow {} + message IfValue { + Literal if = 1; + Expression then = 2; + } + } + + message SingularOrList { + Expression value = 1; + repeated Expression options = 2; + } + + message MultiOrList { + repeated Expression value = 1; + repeated Record options = 2; + + message Record { repeated Expression fields = 1; } + } + + message EmbeddedFunction { + repeated Expression arguments = 1; + Type output_type = 2; + oneof kind { + PythonPickleFunction python_pickle_function = 3; + WebAssemblyFunction web_assembly_function = 4; + } - message Unbounded {} + message PythonPickleFunction { + bytes function = 1; + repeated string prerequisite = 2; + } - oneof kind { - Preceding preceding = 1; - Following following = 2; - CurrentRow current_row = 3; - Unbounded unbounded = 4; - } + message WebAssemblyFunction { + bytes script = 1; + repeated string prerequisite = 2; + } + } + + // A way to reference the inner property of a complex record. Can reference + // either a map key by literal, a struct field by the ordinal position of + // the desired field or a particular element in an array. Supports + // expressions that would roughly translate to something similar to: + // a.b[2].c['my_map_key'].x where a,b,c and x are struct field references + // (ordinalized in the internal representation here), [2] is a list offset + // and ['my_map_key'] is a reference into a map field. + message ReferenceSegment { + + oneof reference_type { + MapKey map_key = 1; + StructField struct_field = 2; + ListElement list_element = 3; + } - } + message MapKey { + // literal based reference to specific possible value in map. + Literal map_key = 1; + // Optional child segment + ReferenceSegment child = 2; } - message SortField { - Expression expr = 1; - - oneof sort_kind { - SortType formal = 2; - Extensions.FunctionId comparison_function = 3; - } - enum SortType { - UNKNOWN = 0; - ASC_NULLS_FIRST = 1; - ASC_NULLS_LAST = 2; - DESC_NULLS_FIRST = 3; - DESC_NULLS_LAST = 4; - CLUSTERED = 5; - } + message StructField { + // zero-indexed ordinal position of field in struct + int32 field = 1; + // Optional child segment + ReferenceSegment child = 2; } - message IfThen { + message ListElement { + // zero-indexed ordinal position of element in list + int32 offset = 1; - repeated IfClause ifs = 1; - Expression else = 2; + // Optional child segment + ReferenceSegment child = 2; + } - message IfClause { - Expression if = 1; - Expression then = 2; - } + } + + // A reference that takes an existing subtype and selectively removes fields from + // it. For example, one might initially have an inner struct with 100 fields but a + // a particular operation only needs to interact with only 2 of those 100 fields. + // In this situation, one would use a mask expression to eliminate the 98 fields that + // are not relevant to the rest of the operation pipeline. + // + // Note that this does not fundamentally alter the structure of data beyond the + // elimination of unecessary elements. + message MaskExpression { + + StructSelect select = 1; + bool maintain_singular_struct = 2; + + message Select { + oneof type { + StructSelect struct = 1; + ListSelect list = 2; + MapSelect map = 3; + } + } + + message StructSelect { repeated StructItem struct_items = 1; } + message StructItem { + int32 field = 1; + Select child = 2; } - message SwitchExpression { - repeated IfValue ifs = 1; - Expression else = 2; + message ListSelect { + + repeated ListSelectItem selection = 1; + Select child = 2; - message IfValue { - Expression if = 1; - Expression then = 2; + message ListSelectItem { + oneof type { + ListElement item = 1; + ListSlice slice = 2; } - } - message SingularOrList { - Expression value = 1; - repeated Expression options = 2; + message ListElement { int32 field = 1; } + + message ListSlice { + int32 start = 1; + int32 end = 2; + } + } } - message MultiOrList { - repeated Expression value = 1; - repeated Record options = 2; + message MapSelect { + oneof select { + MapKey key = 1; + MapKeyExpression expression = 2; + } - message Record { - repeated Expression fields = 1; - } + Select child = 3; + + message MapKey { string map_key = 1; } + message MapKeyExpression { string map_key_expression = 1; } } + } - message EmbeddedFunction { - repeated Expression arguments = 1; - Type output_type = 2; - oneof kind { - PythonPickleFunction python_pickle_function = 3; - WebAssemblyFunction web_assembly_function = 4; - } + // A reference to an inner part of a complex object. Can reference reference a single + // element or a masked version of elements + message FieldReference { - message PythonPickleFunction { - bytes function = 1; - repeated string prerequisite = 2; - } + // Whether this is composed of a single element reference or a masked element subtree + oneof reference_type { + ReferenceSegment direct_reference = 1; + MaskExpression masked_reference = 2; + } - message WebAssemblyFunction { - bytes script = 1; - repeated string prerequisite = 2; - } + // Whether this reference has an origin of a root struct or is based on the ouput + // of an expression. When this is a RootReference and direct_reference above is used, + // the direct_reference must be of a type StructField. + oneof root_type { + Expression expression = 3; + RootReference root_reference = 4; } -} + // Singleton that expresses this FieldReference is rooted off the root incoming record type + message RootReference {} + } +} +message SortField { + Expression expr = 1; + + oneof sort_kind { + SortDirection direction = 2; + uint32 comparison_function_reference = 3; + } + enum SortDirection { + SORT_DIRECTION_UNSPECIFIED = 0; + SORT_DIRECTION_ASC_NULLS_FIRST = 1; + SORT_DIRECTION_ASC_NULLS_LAST = 2; + SORT_DIRECTION_DESC_NULLS_FIRST = 3; + SORT_DIRECTION_DESC_NULLS_LAST = 4; + SORT_DIRECTION_CLUSTERED = 5; + } +} +enum AggregationPhase { + AGGREGATION_PHASE_UNSPECIFIED = 0; + AGGREGATION_PHASE_INITIAL_TO_INTERMEDIATE = 1; + AGGREGATION_PHASE_INTERMEDIATE_TO_INTERMEDIATE = 2; + AGGREGATION_PHASE_INITIAL_TO_RESULT = 3; + AGGREGATION_PHASE_INTERMEDIATE_TO_RESULT = 4; +} + +message AggregateFunction { + // points to a function_anchor defined in this plan + uint32 function_reference = 1; + repeated Expression args = 2; + repeated SortField sorts = 3; + AggregationPhase phase = 4; + Type output_type = 5; +} diff --git a/utils/local-engine/Substrait/extensions.proto b/utils/local-engine/Substrait/extensions.proto index 8c5793c04578..3e8450b5c127 100644 --- a/utils/local-engine/Substrait/extensions.proto +++ b/utils/local-engine/Substrait/extensions.proto @@ -1,70 +1,81 @@ syntax = "proto3"; -package io.substrait; +package substrait.extensions; option java_multiple_files = true; +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; +import "google/protobuf/any.proto"; -message Extensions { +message SimpleExtensionURI { + // A surrogate key used in the context of a single plan used to reference the + // URI associated with an extension. + uint32 extension_uri_anchor = 1; + // The URI where this extension YAML can be retrieved. This is the "namespace" + // of this extension. + string uri = 2; +} - message Extension { - - // unique that describes a particular source for (and type of) extensions. - ExtensionId extension_id = 1; - - oneof extension_type { - // git uri for extension types information - TypeExtension type_extension = 2; - FunctionExtension function_extension = 3; - } - - message TypeExtension { - string git_uri = 1; - } - - message FunctionExtension { - string git_uri = 1; - } - - } - - message Mapping { - - oneof mapping_type { - TypeMapping type_mapping = 1; - FunctionMapping function_mapping = 2; - } - - message TypeMapping { - TypeId type_id = 1; - ExtensionId extension_id = 2; - string name = 3; - } - - message FunctionMapping { - FunctionId function_id = 1; - ExtensionId extension_id = 2; - string name = 3; - uint32 index = 4; - repeated Option options = 5; - message Option { - string key = 1; - string value = 2; - } - } - } +// Describes a mapping between a specific extension entity and the uri where +// that extension can be found. +message SimpleExtensionDeclaration { + + oneof mapping_type { + ExtensionType extension_type = 1; + ExtensionTypeVariation extension_type_variation = 2; + ExtensionFunction extension_function = 3; + } + + // Describes a Type + message ExtensionType { + // references the extension_uri_anchor defined for a specific extension URI. + uint32 extension_uri_reference = 1; + + // A surrogate key used in the context of a single plan to reference a + // specific extension type + uint32 type_anchor = 2; + + // the name of the type in the defined extension YAML. + string name = 3; + } + + message ExtensionTypeVariation { + // references the extension_uri_anchor defined for a specific extension URI. + uint32 extension_uri_reference = 1; + + // A surrogate key used in the context of a single plan to reference a + // specific type variation + uint32 type_variation_anchor = 2; + + // the name of the type in the defined extension YAML. + string name = 3; + } + + message ExtensionFunction { + // references the extension_uri_anchor defined for a specific extension URI. + uint32 extension_uri_reference = 1; + + // A surrogate key used in the context of a single plan to reference a + // specific function + uint32 function_anchor = 2; + + // A simple name if there is only one impl for the function within the YAML. + // A compound name, referencing that includes type short names if there is + // more than one impl per name in the YAML. + string name = 3; + } +} - message ExtensionId { - uint32 id = 1; - } +// A generic object that can be used to embed additional extension information +// into the serialized substrait plan. +message AdvancedExtension { - message FunctionId { - uint64 id = 1; - } + // An optimization is helpful information that don't influence semantics. May + // be ignored by a consumer. + google.protobuf.Any optimization = 1; - message TypeId { - uint64 id = 1; - } + // An enhancement alter semantics. Cannot be ignored by a consumer. + google.protobuf.Any enhancement = 2; } - diff --git a/utils/local-engine/Substrait/extensions/extensions.proto b/utils/local-engine/Substrait/extensions/extensions.proto new file mode 100644 index 000000000000..3e8450b5c127 --- /dev/null +++ b/utils/local-engine/Substrait/extensions/extensions.proto @@ -0,0 +1,81 @@ +syntax = "proto3"; + +package substrait.extensions; + +option java_multiple_files = true; +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; + +import "google/protobuf/any.proto"; + +message SimpleExtensionURI { + // A surrogate key used in the context of a single plan used to reference the + // URI associated with an extension. + uint32 extension_uri_anchor = 1; + + // The URI where this extension YAML can be retrieved. This is the "namespace" + // of this extension. + string uri = 2; +} + +// Describes a mapping between a specific extension entity and the uri where +// that extension can be found. +message SimpleExtensionDeclaration { + + oneof mapping_type { + ExtensionType extension_type = 1; + ExtensionTypeVariation extension_type_variation = 2; + ExtensionFunction extension_function = 3; + } + + // Describes a Type + message ExtensionType { + // references the extension_uri_anchor defined for a specific extension URI. + uint32 extension_uri_reference = 1; + + // A surrogate key used in the context of a single plan to reference a + // specific extension type + uint32 type_anchor = 2; + + // the name of the type in the defined extension YAML. + string name = 3; + } + + message ExtensionTypeVariation { + // references the extension_uri_anchor defined for a specific extension URI. + uint32 extension_uri_reference = 1; + + // A surrogate key used in the context of a single plan to reference a + // specific type variation + uint32 type_variation_anchor = 2; + + // the name of the type in the defined extension YAML. + string name = 3; + } + + message ExtensionFunction { + // references the extension_uri_anchor defined for a specific extension URI. + uint32 extension_uri_reference = 1; + + // A surrogate key used in the context of a single plan to reference a + // specific function + uint32 function_anchor = 2; + + // A simple name if there is only one impl for the function within the YAML. + // A compound name, referencing that includes type short names if there is + // more than one impl per name in the YAML. + string name = 3; + } +} + +// A generic object that can be used to embed additional extension information +// into the serialized substrait plan. +message AdvancedExtension { + + // An optimization is helpful information that don't influence semantics. May + // be ignored by a consumer. + google.protobuf.Any optimization = 1; + + // An enhancement alter semantics. Cannot be ignored by a consumer. + google.protobuf.Any enhancement = 2; +} diff --git a/utils/local-engine/Substrait/function.proto b/utils/local-engine/Substrait/function.proto index 7e2142687946..af3b898cb0f6 100644 --- a/utils/local-engine/Substrait/function.proto +++ b/utils/local-engine/Substrait/function.proto @@ -1,152 +1,146 @@ syntax = "proto3"; -package io.substrait; +package substrait; -import "type.proto"; -import "parameterized_types.proto"; -import "type_expressions.proto"; -import "extensions.proto"; +import "substrait/type.proto"; +import "substrait/parameterized_types.proto"; +import "substrait/type_expressions.proto"; option java_multiple_files = true; - +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; // List of function signatures available. message FunctionSignature { - message FinalArgVariadic { - // the minimum number of arguments allowed for the list of final arguments (inclusive). - int64 min_args = 1; + message FinalArgVariadic { + // the minimum number of arguments allowed for the list of final arguments + // (inclusive). + int64 min_args = 1; - // the maximum number of arguments allowed for the list of final arguments (exclusive) - int64 max_args = 2; + // the maximum number of arguments allowed for the list of final arguments + // (exclusive) + int64 max_args = 2; - // the type of parameterized type consistency - ParameterConsistency consistency = 3; + // the type of parameterized type consistency + ParameterConsistency consistency = 3; - enum ParameterConsistency { - UNKNOWN = 0; + enum ParameterConsistency { + PARAMETER_CONSISTENCY_UNSPECIFIED = 0; - // All argument must be the same concrete type. - CONSISTENT = 1; + // All argument must be the same concrete type. + PARAMETER_CONSISTENCY_CONSISTENT = 1; - // Each argument can be any possible concrete type afforded by the bounds of any parameter defined in - // the arguments specification. - INCONSISTENT = 2; - } + // Each argument can be any possible concrete type afforded by the bounds + // of any parameter defined in the arguments specification. + PARAMETER_CONSISTENCY_INCONSISTENT = 2; } + } - message FinalArgNormal {} - - message Scalar { - Extensions.FunctionId id = 1; - repeated Argument arguments = 2; - repeated string name = 3; - Description description = 4; + message FinalArgNormal {} - bool deterministic = 7; - bool session_dependent = 8; + message Scalar { + repeated Argument arguments = 2; + repeated string name = 3; + Description description = 4; - DerivationExpression output_type = 9; + bool deterministic = 7; + bool session_dependent = 8; - oneof final_variable_behavior { - FinalArgVariadic variadic = 10; - FinalArgNormal normal = 11; - } + DerivationExpression output_type = 9; - repeated Implementation implementations = 12; + oneof final_variable_behavior { + FinalArgVariadic variadic = 10; + FinalArgNormal normal = 11; } - message Aggregate { - Extensions.FunctionId id = 1; - repeated Argument arguments = 2; - repeated string name = 3; - Description description = 4; - - bool deterministic = 7; - bool session_dependent = 8; + repeated Implementation implementations = 12; + } - DerivationExpression output_type = 9; + message Aggregate { + repeated Argument arguments = 2; + string name = 3; + Description description = 4; - oneof final_variable_behavior { - FinalArgVariadic variadic = 10; - FinalArgNormal normal = 11; - } + bool deterministic = 7; + bool session_dependent = 8; - bool ordered = 14; - uint64 max_set = 12; - Type intermediate_type = 13; + DerivationExpression output_type = 9; - repeated Implementation implementations = 15; + oneof final_variable_behavior { + FinalArgVariadic variadic = 10; + FinalArgNormal normal = 11; } - message Window { - Extensions.FunctionId id = 1; - repeated Argument arguments = 2; - repeated string name = 3; - Description description = 4; - - bool deterministic = 7; - bool session_dependent = 8; - - DerivationExpression intermediate_type = 9; - DerivationExpression output_type = 10; - oneof final_variable_behavior { - FinalArgVariadic variadic = 16; - FinalArgNormal normal = 17; - } - bool ordered = 11; - uint64 max_set = 12; - WindowType window_type = 14; - repeated Implementation implementations = 15; - - enum WindowType { - UNKNOWN = 0; - STREAMING = 1; - PARTITION = 2; - } - } + bool ordered = 14; + uint64 max_set = 12; + Type intermediate_type = 13; - message Description { - string language = 1; - string body = 2; - } + repeated Implementation implementations = 15; + } - message Implementation { + message Window { + repeated Argument arguments = 2; + repeated string name = 3; + Description description = 4; - Type type = 1; - string uri = 2; + bool deterministic = 7; + bool session_dependent = 8; - enum Type { - UNKNOWN = 0; - WEB_ASSEMBLY = 1; - TRINO_JAR = 2; - } + DerivationExpression intermediate_type = 9; + DerivationExpression output_type = 10; + oneof final_variable_behavior { + FinalArgVariadic variadic = 16; + FinalArgNormal normal = 17; + } + bool ordered = 11; + uint64 max_set = 12; + WindowType window_type = 14; + repeated Implementation implementations = 15; + + enum WindowType { + WINDOW_TYPE_UNSPECIFIED = 0; + WINDOW_TYPE_STREAMING = 1; + WINDOW_TYPE_PARTITION = 2; } + } - message Argument { - string name = 1; + message Description { + string language = 1; + string body = 2; + } - oneof argument_kind { - ValueArgument value = 2; - TypeArgument type = 3; - EnumArgument enum = 4; - } + message Implementation { - message ValueArgument { - ParameterizedType type = 1; - bool constant = 2; - } + Type type = 1; + string uri = 2; - message TypeArgument { - ParameterizedType type = 1; - } + enum Type { + TYPE_UNSPECIFIED = 0; + TYPE_WEB_ASSEMBLY = 1; + TYPE_TRINO_JAR = 2; + } + } - message EnumArgument { - repeated string options = 1; - bool optional = 2; - } + message Argument { + string name = 1; + oneof argument_kind { + ValueArgument value = 2; + TypeArgument type = 3; + EnumArgument enum = 4; } -} + message ValueArgument { + ParameterizedType type = 1; + bool constant = 2; + } + message TypeArgument { ParameterizedType type = 1; } + + message EnumArgument { + repeated string options = 1; + bool optional = 2; + } + } +} diff --git a/utils/local-engine/Substrait/parameterized_types.proto b/utils/local-engine/Substrait/parameterized_types.proto index ffcd9f933228..a6b2bfbf21f5 100644 --- a/utils/local-engine/Substrait/parameterized_types.proto +++ b/utils/local-engine/Substrait/parameterized_types.proto @@ -1,116 +1,113 @@ syntax = "proto3"; -package io.substrait; +package substrait; -import "type.proto"; -import "extensions.proto"; +import "substrait/type.proto"; option java_multiple_files = true; +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; message ParameterizedType { - oneof kind { - Type.Boolean bool = 1; - Type.I8 i8 = 2; - Type.I16 i16 = 3; - Type.I32 i32 = 5; - Type.I64 i64 = 7; - Type.FP32 fp32 = 10; - Type.FP64 fp64 = 11; - Type.String string = 12; - Type.Binary binary = 13; - Type.Timestamp timestamp = 14; - Type.Date date = 16; - Type.Time time = 17; - Type.IntervalYear interval_year = 19; - Type.IntervalDay interval_day = 20; - Type.TimestampTZ timestamp_tz = 29; - Type.UUID uuid = 32; - - ParameterizedFixedChar fixed_char = 21; - ParameterizedVarChar varchar = 22; - ParameterizedFixedBinary fixed_binary = 23; - ParameterizedDecimal decimal = 24; - - ParameterizedStruct struct = 25; - ParameterizedList list = 27; - ParameterizedMap map = 28; - - Extensions.TypeId user_defined = 31; - - TypeParameter type_parameter = 33; - - } - - message TypeParameter { - string name = 1; - repeated ParameterizedType bounds = 2; - } - - message IntegerParameter { - string name = 1; - NullableInteger range_start_inclusive = 2; - NullableInteger range_end_exclusive = 3; - } - - message NullableInteger { - int64 value = 1; - } - - message ParameterizedFixedChar { - IntegerOption length = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ParameterizedVarChar { - IntegerOption length = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ParameterizedFixedBinary { - IntegerOption length = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ParameterizedDecimal { - IntegerOption scale = 1; - IntegerOption precision = 2; - Type.Variation variation = 3; - Type.Nullability nullability = 4; - } - - message ParameterizedStruct { - repeated ParameterizedType types = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ParameterizedNamedStruct { - // list of names in dfs order - repeated string names = 1; - ParameterizedStruct struct = 2; - } - - message ParameterizedList { - ParameterizedType type = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ParameterizedMap { - ParameterizedType key = 1; - ParameterizedType value = 2; - Type.Variation variation = 3; - Type.Nullability nullability = 4; - } - - message IntegerOption { - oneof integer_type { - int32 literal = 1; - IntegerParameter parameter = 2; - } + oneof kind { + Type.Boolean bool = 1; + Type.I8 i8 = 2; + Type.I16 i16 = 3; + Type.I32 i32 = 5; + Type.I64 i64 = 7; + Type.FP32 fp32 = 10; + Type.FP64 fp64 = 11; + Type.String string = 12; + Type.Binary binary = 13; + Type.Timestamp timestamp = 14; + Type.Date date = 16; + Type.Time time = 17; + Type.IntervalYear interval_year = 19; + Type.IntervalDay interval_day = 20; + Type.TimestampTZ timestamp_tz = 29; + Type.UUID uuid = 32; + + ParameterizedFixedChar fixed_char = 21; + ParameterizedVarChar varchar = 22; + ParameterizedFixedBinary fixed_binary = 23; + ParameterizedDecimal decimal = 24; + + ParameterizedStruct struct = 25; + ParameterizedList list = 27; + ParameterizedMap map = 28; + + uint32 user_defined_pointer = 31; + + TypeParameter type_parameter = 33; + } + + message TypeParameter { + string name = 1; + repeated ParameterizedType bounds = 2; + } + + message IntegerParameter { + string name = 1; + NullableInteger range_start_inclusive = 2; + NullableInteger range_end_exclusive = 3; + } + + message NullableInteger { int64 value = 1; } + + message ParameterizedFixedChar { + IntegerOption length = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedVarChar { + IntegerOption length = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedFixedBinary { + IntegerOption length = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedDecimal { + IntegerOption scale = 1; + IntegerOption precision = 2; + uint32 variation_pointer = 3; + Type.Nullability nullability = 4; + } + + message ParameterizedStruct { + repeated ParameterizedType types = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedNamedStruct { + // list of names in dfs order + repeated string names = 1; + ParameterizedStruct struct = 2; + } + + message ParameterizedList { + ParameterizedType type = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedMap { + ParameterizedType key = 1; + ParameterizedType value = 2; + uint32 variation_pointer = 3; + Type.Nullability nullability = 4; + } + + message IntegerOption { + oneof integer_type { + int32 literal = 1; + IntegerParameter parameter = 2; } + } } - diff --git a/utils/local-engine/Substrait/plan.proto b/utils/local-engine/Substrait/plan.proto index 3981583c9730..95978bdf9720 100644 --- a/utils/local-engine/Substrait/plan.proto +++ b/utils/local-engine/Substrait/plan.proto @@ -1,20 +1,44 @@ syntax = "proto3"; -package io.substrait; +package substrait; -import "relations.proto"; -import "extensions.proto"; +import "substrait/relations.proto"; +import "substrait/extensions/extensions.proto"; option java_multiple_files = true; - +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; + +// Either a relation or root relation +message PlanRel { + oneof rel_type { + // Any relation + Rel rel = 1; + // The root of a relation tree + RelRoot root = 2; + } +} // Describe a set of operations to complete. // For compactness sake, identifiers are normalized at the plan level. message Plan { - repeated Extensions.Extension extensions = 1; - repeated Extensions.Mapping mappings = 2; - repeated Rel relations = 3; + // a list of yaml specifications this plan may depend on + repeated substrait.extensions.SimpleExtensionURI extension_uris = 1; -} + // a list of extensions this plan may depend on + repeated substrait.extensions.SimpleExtensionDeclaration extensions = 2; + // one or more relation trees that are associated with this plan. + repeated PlanRel relations = 3; + + // additional extensions associated with this plan. + substrait.extensions.AdvancedExtension advanced_extensions = 4; + + // A list of com.google.Any entities that this plan may use. Can be used to + // warn if some embedded message types are unknown. Note that this list may + // include message types that are ignorable (optimizations) or that are + // unused. In many cases, a consumer may be able to work with a plan even if + // one or more message types defined here are unknown. + repeated string expected_type_urls = 5; +} diff --git a/utils/local-engine/Substrait/relations.proto b/utils/local-engine/Substrait/relations.proto index 4ba08184244b..d0395e42d91f 100644 --- a/utils/local-engine/Substrait/relations.proto +++ b/utils/local-engine/Substrait/relations.proto @@ -1,175 +1,238 @@ syntax = "proto3"; -package io.substrait; +package substrait; -import "type.proto"; -import "expression.proto"; -import "selection.proto"; +import "substrait/type.proto"; +import "substrait/expression.proto"; +import "substrait/extensions/extensions.proto"; +import "google/protobuf/any.proto"; option java_multiple_files = true; +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; message RelCommon { - oneof kind { - Direct direct = 1; - Emit emit = 2; - } - - Hint hint = 3; - RuntimeConstraint constraint = 4; - - message Direct {} - message Emit { - repeated int32 output_mapping = 1; - } + oneof emit_kind { + Direct direct = 1; + Emit emit = 2; + } - message Hint { - repeated HintKeyValue hint_key_values = 1; - Stats stats = 2; + Hint hint = 3; + substrait.extensions.AdvancedExtension advanced_extension = 4; - message Stats { - double row_count = 1; - double record_size = 2; - } + message Direct {} + message Emit { repeated int32 output_mapping = 1; } - message HintKeyValue { - string key = 1; - bytes value = 2; - } + // Changes to the operation that can influence efficiency/performance but + // should not impact correctness. + message Hint { + Stats stats = 1; + RuntimeConstraint constraint = 2; + substrait.extensions.AdvancedExtension advanced_extension = 10; + message Stats { + double row_count = 1; + double record_size = 2; + substrait.extensions.AdvancedExtension advanced_extension = 10; } message RuntimeConstraint { - // TODO: nodes, cpu threads/%, memory, iops, etc. - } + // TODO: nodes, cpu threads/%, memory, iops, etc. + substrait.extensions.AdvancedExtension advanced_extension = 10; + } + } } message ReadRel { - RelCommon common = 1; - Type.NamedStruct base_schema = 2; - Expression filter = 3; - MaskExpression projection = 4; - - oneof read_type { - VirtualTable virtual_table = 5; - LocalFiles local_files = 6; - NamedTable named_table = 7; - } + RelCommon common = 1; + NamedStruct base_schema = 2; + Expression filter = 3; + Expression.MaskExpression projection = 4; + substrait.extensions.AdvancedExtension advanced_extension = 10; + + oneof read_type { + VirtualTable virtual_table = 5; + LocalFiles local_files = 6; + NamedTable named_table = 7; + ExtensionTable extension_table = 8; + } - message NamedTable { - repeated string names = 1; - } + message NamedTable { + repeated string names = 1; + substrait.extensions.AdvancedExtension advanced_extension = 10; + } + // a table composed of literals. + message VirtualTable { repeated Expression.Literal.Struct values = 1; } - message VirtualTable { - repeated Expression.Literal.Struct values = 1; - } + // a stub type that can be used to extend/introduce new table types outside + // the specification. + message ExtensionTable { google.protobuf.Any detail = 1; } - message LocalFiles { + message LocalFiles { - repeated FileOrFiles items = 1; + repeated FileOrFiles items = 1; + substrait.extensions.AdvancedExtension advanced_extension = 10; - message FileOrFiles { - oneof path_type { - string uri_path = 1; - string uri_path_glob = 2; - } + message FileOrFiles { + oneof path_type { + string uri_path = 1; + string uri_path_glob = 2; + string uri_file = 3; + string uri_folder = 4; + } - Format format = 3; + FileFormat format = 5; - enum Format { - UNKNOWN = 0; - PARQUET = 1; - } - } + // the index of the partition this item belongs to + uint64 partition_index = 6; - } + // the start position in byte to read from this item + uint64 start = 7; + + // the length in byte to read from this item + uint64 length = 8; + enum FileFormat { + FILE_FORMAT_UNSPECIFIED = 0; + FILE_FORMAT_PARQUET = 1; + } + } + } } message ProjectRel { - RelCommon common = 1; - Rel input = 2; - repeated Expression expressions = 3; + RelCommon common = 1; + Rel input = 2; + repeated Expression expressions = 3; + substrait.extensions.AdvancedExtension advanced_extension = 10; } message JoinRel { - RelCommon common = 1; - Rel left = 2; - Rel right = 3; - Expression expression = 4; - Expression post_join_filter = 5; - - enum JoinType { - UNKNOWN = 0; - INNER = 1; - OUTER = 2; - LEFT = 3; - RIGHT = 4; - } + RelCommon common = 1; + Rel left = 2; + Rel right = 3; + Expression expression = 4; + Expression post_join_filter = 5; + + JoinType type = 6; + + enum JoinType { + JOIN_TYPE_UNSPECIFIED = 0; + JOIN_TYPE_INNER = 1; + JOIN_TYPE_OUTER = 2; + JOIN_TYPE_LEFT = 3; + JOIN_TYPE_RIGHT = 4; + JOIN_TYPE_SEMI = 5; + JOIN_TYPE_ANTI = 6; + } + + substrait.extensions.AdvancedExtension advanced_extension = 10; } message FetchRel { - RelCommon common = 1; - Rel input = 2; - int64 offset = 3; - int64 count = 4; + RelCommon common = 1; + Rel input = 2; + int64 offset = 3; + int64 count = 4; + substrait.extensions.AdvancedExtension advanced_extension = 10; } message AggregateRel { - RelCommon common = 1; - Rel input = 2; - repeated Grouping groupings = 3; - repeated Measure measures = 4; - Expression.AggregationPhase phase = 5; - - message Grouping { - repeated int32 input_fields = 1; - } + RelCommon common = 1; + Rel input = 2; + repeated Grouping groupings = 3; + repeated Measure measures = 4; - message Measure { - Expression.AggregateFunction measure = 1; - } + substrait.extensions.AdvancedExtension advanced_extension = 10; + + message Grouping { repeated Expression grouping_expressions = 1; } + + message Measure { + AggregateFunction measure = 1; + + // An optional boolean expression that acts to filter which records are + // included in the measure. True means include this record for calculation + // within the measure. + Expression filter = 2; + } } message SortRel { - RelCommon common = 1; - Rel input = 2; - repeated Expression.SortField sorts = 3; + RelCommon common = 1; + Rel input = 2; + repeated SortField sorts = 3; + substrait.extensions.AdvancedExtension advanced_extension = 10; } message FilterRel { - RelCommon common = 1; - Rel input = 2; - Expression condition = 3; + RelCommon common = 1; + Rel input = 2; + Expression condition = 3; + substrait.extensions.AdvancedExtension advanced_extension = 10; } message SetRel { - RelCommon common = 1; - repeated Rel inputs = 2; - SetOp op = 3; - - enum SetOp { - UNKNOWN = 0; - MINUS_PRIMARY = 1; - MINUS_MULTISET = 2; - INTERSECTION_PRIMARY = 3; - INTERSECTION_MULTISET = 4; - UNION_DISTINCT = 5; - UNION_ALL = 6; - } + RelCommon common = 1; + repeated Rel inputs = 2; + SetOp op = 3; + substrait.extensions.AdvancedExtension advanced_extension = 10; + + enum SetOp { + SET_OP_UNSPECIFIED = 0; + SET_OP_MINUS_PRIMARY = 1; + SET_OP_MINUS_MULTISET = 2; + SET_OP_INTERSECTION_PRIMARY = 3; + SET_OP_INTERSECTION_MULTISET = 4; + SET_OP_UNION_DISTINCT = 5; + SET_OP_UNION_ALL = 6; + } +} + +// Stub to support extension with a single input +message ExtensionSingleRel { + RelCommon common = 1; + Rel input = 2; + google.protobuf.Any detail = 3; +} + +// Stub to support extension with a zero inputs +message ExtensionLeafRel { + RelCommon common = 1; + google.protobuf.Any detail = 2; +} + +// Stub to support extension with multiple inputs +message ExtensionMultiRel { + RelCommon common = 1; + repeated Rel inputs = 2; + google.protobuf.Any detail = 3; +} + +// A relation with output field names. +// +// This is for use at the root of a `Rel` tree. +message RelRoot { + // A relation + Rel input = 1; + // Field names in depth-first order + repeated string names = 2; } message Rel { - oneof RelType { - ReadRel read = 1; - FilterRel filter = 2; - FetchRel fetch = 3; - AggregateRel aggregate = 4; - SortRel sort = 5; - JoinRel join = 6; - ProjectRel project = 7; - SetRel set = 8; + oneof rel_type { + ReadRel read = 1; + FilterRel filter = 2; + FetchRel fetch = 3; + AggregateRel aggregate = 4; + SortRel sort = 5; + JoinRel join = 6; + ProjectRel project = 7; + SetRel set = 8; + ExtensionSingleRel extension_single = 9; + ExtensionMultiRel extension_multi = 10; + ExtensionLeafRel extension_leaf = 11; } -} \ No newline at end of file +} diff --git a/utils/local-engine/Substrait/selection.proto b/utils/local-engine/Substrait/selection.proto deleted file mode 100644 index 83bc8aad3ecd..000000000000 --- a/utils/local-engine/Substrait/selection.proto +++ /dev/null @@ -1,114 +0,0 @@ -syntax = "proto3"; - -package io.substrait; - -option java_multiple_files = true; - -message ReferenceSegment { - - oneof reference_type { - MapKey map_key = 1; - MapKeyExpression expression = 2; - StructField struct_field = 3; - ListElement list_element = 4; - ListRange list_range = 5; - } - - message MapKey { - string map_key = 1; - ReferenceSegment child = 2; - } - - message MapKeyExpression { - string map_key_expression = 1; - ReferenceSegment child = 2; - } - - message StructField { - int32 field = 1; - ReferenceSegment child = 2; - } - - message ListElement { - int32 offset = 1; - ReferenceSegment child = 2; - } - - message ListRange { - int32 start = 1; - int32 end = 2; - ReferenceSegment child = 3; - } -} - -message MaskExpression { - - StructSelect select = 1; - bool maintain_singular_struct = 2; - - message Select { - oneof type { - StructSelect struct = 1; - ListSelect list = 2; - MapSelect map = 3; - } - } - - message StructSelect { - repeated StructItem struct_items = 1; - } - - message StructItem { - int32 field = 1; - Select child = 2; - } - - message ListSelect { - - repeated ListSelectItem selection = 1; - Select child = 2; - - message ListSelectItem { - oneof type { - ListElement item = 1; - ListSlice slice = 2; - } - - message ListElement { - int32 field = 1; - } - - message ListSlice { - int32 start = 1; - int32 end = 2; - } - - } - } - - message MapSelect { - oneof select { - MapKey key = 1; - MapKeyExpression expression = 2; - } - - Select child = 3; - - message MapKey { - string map_key = 1; - } - - message MapKeyExpression { - string map_key_expression = 1; - } - } -} - -message FieldReference { - - oneof reference_type { - ReferenceSegment direct_reference = 1; - MaskExpression masked_reference = 2; - } - -} diff --git a/utils/local-engine/Substrait/type.proto b/utils/local-engine/Substrait/type.proto index 62c8e7f64e5b..a1c56caf084b 100644 --- a/utils/local-engine/Substrait/type.proto +++ b/utils/local-engine/Substrait/type.proto @@ -1,181 +1,194 @@ syntax = "proto3"; -package io.substrait; - -import "extensions.proto"; +package substrait; option java_multiple_files = true; +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; message Type { - oneof kind { - Boolean bool = 1; - I8 i8 = 2; - I16 i16 = 3; - I32 i32 = 5; - I64 i64 = 7; - FP32 fp32 = 10; - FP64 fp64 = 11; - String string = 12; - Binary binary = 13; - Timestamp timestamp = 14; - Date date = 16; - Time time = 17; - IntervalYear interval_year = 19; - IntervalDay interval_day = 20; - TimestampTZ timestamp_tz = 29; - UUID uuid = 32; - - FixedChar fixed_char = 21; - VarChar varchar = 22; - FixedBinary fixed_binary = 23; - Decimal decimal = 24; - - Struct struct = 25; - List list = 27; - Map map = 28; - - Extensions.TypeId user_defined = 31; - } - - enum Nullability { - NULLABLE = 0; - REQUIRED = 1; - } - - message Boolean { - Variation variation = 1; - Nullability nullability = 2; - } - message I8 { - Variation variation = 1; - Nullability nullability = 2; - } - - message I16 { - Variation variation = 1; - Nullability nullability = 2; - } - - message I32 { - Variation variation = 1; - Nullability nullability = 2; - } - - message I64 { - Variation variation = 1; - Nullability nullability = 2; - } - - message FP32 { - Variation variation = 1; - Nullability nullability = 2; - } - - message FP64 { - Variation variation = 1; - Nullability nullability = 2; - } - - message String { - Variation variation = 1; - Nullability nullability = 2; - } - - message Binary { - Variation variation = 1; - Nullability nullability = 2; - } - - message Timestamp { - Variation variation = 1; - Nullability nullability = 2; - } - - message Date { - Variation variation = 1; - Nullability nullability = 2; - } - - message Time { - Variation variation = 1; - Nullability nullability = 2; - } - - message TimestampTZ { - Variation variation = 1; - Nullability nullability = 2; - } - - message IntervalYear { - Variation variation = 1; - Nullability nullability = 2; - } - - message IntervalDay { - Variation variation = 1; - Nullability nullability = 2; - } - - message UUID { - Variation variation = 1; - Nullability nullability = 2; - } - - // Start compound types. - message FixedChar { - int32 length = 1; - Variation variation = 2; - Nullability nullability = 3; - } - - message VarChar { - int32 length = 1; - Variation variation = 2; - Nullability nullability = 3; - } - - message FixedBinary { - int32 length = 1; - Variation variation = 2; - Nullability nullability = 3; - } - - message Decimal { - int32 scale = 1; - int32 precision = 2; - Variation variation = 3; - Nullability nullability = 4; - } - - message Struct { - repeated Type types = 1; - Variation variation = 2; - Nullability nullability = 3; - } - - message NamedStruct { - // list of names in dfs order - repeated string names = 1; - Struct struct = 2; - } - - message List { - Type type = 1; - Variation variation = 2; - Nullability nullability = 3; - } - - message Map { - Type key = 1; - Type value = 2; - Variation variation = 3; - Nullability nullability = 4; - } - - message Variation { - int32 organization = 1; - string name = 2; - } - + oneof kind { + Boolean bool = 1; + I8 i8 = 2; + I16 i16 = 3; + I32 i32 = 5; + I64 i64 = 7; + FP32 fp32 = 10; + FP64 fp64 = 11; + String string = 12; + Binary binary = 13; + Timestamp timestamp = 14; + Date date = 16; + Time time = 17; + IntervalYear interval_year = 19; + IntervalDay interval_day = 20; + TimestampTZ timestamp_tz = 29; + UUID uuid = 32; + + FixedChar fixed_char = 21; + VarChar varchar = 22; + FixedBinary fixed_binary = 23; + Decimal decimal = 24; + + Struct struct = 25; + List list = 27; + Map map = 28; + + uint32 user_defined_type_reference = 31; + } + + enum Nullability { + NULLABILITY_UNSPECIFIED = 0; + NULLABILITY_NULLABLE = 1; + NULLABILITY_REQUIRED = 2; + } + + message Boolean { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + message I8 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message I16 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message I32 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message I64 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message FP32 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message FP64 { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message String { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message Binary { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message Timestamp { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message Date { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message Time { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message TimestampTZ { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message IntervalYear { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message IntervalDay { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + message UUID { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + } + + // Start compound types. + message FixedChar { + int32 length = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message VarChar { + int32 length = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message FixedBinary { + int32 length = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message Decimal { + int32 scale = 1; + int32 precision = 2; + uint32 type_variation_reference = 3; + Nullability nullability = 4; + } + + message Struct { + repeated Type types = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message List { + Type type = 1; + uint32 type_variation_reference = 2; + Nullability nullability = 3; + } + + message Map { + Type key = 1; + Type value = 2; + uint32 type_variation_reference = 3; + Nullability nullability = 4; + } } +// A message for modeling name/type pairs. +// +// Useful for representing relation schemas. +// +// Notes: +// +// * The names field is in depth-first order. +// +// For example a schema such as: +// +// a: int64 +// b: struct +// +// would have a `names` field that looks like: +// +// ["a", "b", "c", "d"] +// +// * Only struct fields are contained in this field's elements, +// * Map keys should be traversed first, then values when producing/consuming +message NamedStruct { + // list of names in dfs order + repeated string names = 1; + Type.Struct struct = 2; +} diff --git a/utils/local-engine/Substrait/type_expressions.proto b/utils/local-engine/Substrait/type_expressions.proto index 87fe5d762fda..e089e8daea85 100644 --- a/utils/local-engine/Substrait/type_expressions.proto +++ b/utils/local-engine/Substrait/type_expressions.proto @@ -1,138 +1,148 @@ syntax = "proto3"; -package io.substrait; +package substrait; -import "type.proto"; -import "extensions.proto"; +import "substrait/type.proto"; option java_multiple_files = true; +option java_package = "io.substrait.proto"; +option csharp_namespace = "Substrait.Protobuf"; message DerivationExpression { - oneof kind { - Type.Boolean bool = 1; - Type.I8 i8 = 2; - Type.I16 i16 = 3; - Type.I32 i32 = 5; - Type.I64 i64 = 7; - Type.FP32 fp32 = 10; - Type.FP64 fp64 = 11; - Type.String string = 12; - Type.Binary binary = 13; - Type.Timestamp timestamp = 14; - Type.Date date = 16; - Type.Time time = 17; - Type.IntervalYear interval_year = 19; - Type.IntervalDay interval_day = 20; - Type.TimestampTZ timestamp_tz = 29; - Type.UUID uuid = 32; - - ExpressionFixedChar fixed_char = 21; - ExpressionVarChar varchar = 22; - ExpressionFixedBinary fixed_binary = 23; - ExpressionDecimal decimal = 24; - - ExpressionStruct struct = 25; - ExpressionList list = 27; - ExpressionMap map = 28; - - Extensions.TypeId user_defined = 31; - - string type_parameter_name = 33; - string integer_parameter_name = 34; - - int32 integer_literal = 35; - UnaryOp unary_op = 36; - BinaryOp binary_op = 37; - IfElse if_else = 38; + oneof kind { + Type.Boolean bool = 1; + Type.I8 i8 = 2; + Type.I16 i16 = 3; + Type.I32 i32 = 5; + Type.I64 i64 = 7; + Type.FP32 fp32 = 10; + Type.FP64 fp64 = 11; + Type.String string = 12; + Type.Binary binary = 13; + Type.Timestamp timestamp = 14; + Type.Date date = 16; + Type.Time time = 17; + Type.IntervalYear interval_year = 19; + Type.IntervalDay interval_day = 20; + Type.TimestampTZ timestamp_tz = 29; + Type.UUID uuid = 32; + + ExpressionFixedChar fixed_char = 21; + ExpressionVarChar varchar = 22; + ExpressionFixedBinary fixed_binary = 23; + ExpressionDecimal decimal = 24; + + ExpressionStruct struct = 25; + ExpressionList list = 27; + ExpressionMap map = 28; + + uint32 user_defined_pointer = 31; + + string type_parameter_name = 33; + string integer_parameter_name = 34; + + int32 integer_literal = 35; + UnaryOp unary_op = 36; + BinaryOp binary_op = 37; + IfElse if_else = 38; + ReturnProgram return_program = 39; + } + + message ExpressionFixedChar { + DerivationExpression length = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ExpressionVarChar { + DerivationExpression length = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ExpressionFixedBinary { + DerivationExpression length = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ExpressionDecimal { + DerivationExpression scale = 1; + DerivationExpression precision = 2; + uint32 variation_pointer = 3; + Type.Nullability nullability = 4; + } + + message ExpressionStruct { + repeated DerivationExpression types = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ExpressionNamedStruct { + repeated string names = 1; + ExpressionStruct struct = 2; + } + + message ExpressionList { + DerivationExpression type = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ExpressionMap { + DerivationExpression key = 1; + DerivationExpression value = 2; + uint32 variation_pointer = 3; + Type.Nullability nullability = 4; + } + + message IfElse { + DerivationExpression if_condition = 1; + DerivationExpression if_return = 2; + DerivationExpression else_return = 3; + } + + message UnaryOp { + UnaryOpType op_type = 1; + DerivationExpression arg = 2; + + enum UnaryOpType { + UNARY_OP_TYPE_UNSPECIFIED = 0; + UNARY_OP_TYPE_BOOLEAN_NOT = 1; } - - message ExpressionFixedChar { - DerivationExpression length = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ExpressionVarChar { - DerivationExpression length = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ExpressionFixedBinary { - DerivationExpression length = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ExpressionDecimal { - DerivationExpression scale = 1; - DerivationExpression precision = 2; - Type.Variation variation = 3; - Type.Nullability nullability = 4; - } - - message ExpressionStruct { - repeated DerivationExpression types = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ExpressionNamedStruct { - repeated string names = 1; - ExpressionStruct struct = 2; - } - - message ExpressionList { - DerivationExpression type = 1; - Type.Variation variation = 2; - Type.Nullability nullability = 3; - } - - message ExpressionMap { - DerivationExpression key = 1; - DerivationExpression value = 2; - Type.Variation variation = 3; - Type.Nullability nullability = 4; - } - - - message IfElse { - DerivationExpression if_condition = 1; - DerivationExpression if_return = 2; - DerivationExpression else_return = 3; - } - - message UnaryOp { - OpType op_type = 1; - DerivationExpression arg = 2; - - enum OpType { - UNKNOWN = 0; - BOOLEAN_NOT = 1; - } + } + + message BinaryOp { + + BinaryOpType op_type = 1; + DerivationExpression arg1 = 2; + DerivationExpression arg2 = 3; + + enum BinaryOpType { + BINARY_OP_TYPE_UNSPECIFIED = 0; + BINARY_OP_TYPE_PLUS = 1; + BINARY_OP_TYPE_MINUS = 2; + BINARY_OP_TYPE_MULTIPLY = 3; + BINARY_OP_TYPE_DIVIDE = 4; + BINARY_OP_TYPE_MIN = 5; + BINARY_OP_TYPE_MAX = 6; + BINARY_OP_TYPE_GREATER_THAN = 7; + BINARY_OP_TYPE_LESS_THAN = 8; + BINARY_OP_TYPE_AND = 9; + BINARY_OP_TYPE_OR = 10; + BINARY_OP_TYPE_EQUALS = 11; + BINARY_OP_TYPE_COVERS = 12; } + } - message BinaryOp { - - OpType op_type = 1; - DerivationExpression arg1 = 2; - DerivationExpression arg2 = 3; - - enum OpType { - UNKNOWN = 0; - PLUS = 1; - MINUS = 2; - MULTIPLY = 3; - DIVIDE = 4; - MIN = 5; - MAX = 6; - GREATER_THAN = 7; - LESS_THAN = 8; - AND = 9; - OR = 10; - EQUALS = 11; - COVERS = 12; - } + message ReturnProgram { + message Assignment { + string name = 1; + DerivationExpression expression = 2; } + repeated Assignment assignments = 1; + DerivationExpression final_expression = 2; + } } From 8a9e4a12b298e1ee2f660d19de8fbbc268ce7e89 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Wed, 29 Dec 2021 02:09:11 +0000 Subject: [PATCH 265/472] print test result --- utils/local-engine/tests/gtest_local_engine.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 3a29165f8e3f..abb9e256ddea 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -136,6 +136,9 @@ TEST(TestSelect, TestAgg) local_engine::SparkColumnToCHColumn converter; auto block = converter.convertCHColumnToSparkRow(*spark_row_info, local_executor.getHeader()); ASSERT_EQ(spark_row_info->getNumRows(), block->rows()); + auto reader = SparkRowReader(spark_row_info->getNumCols()); + reader.pointTo(reinterpret_cast(spark_row_info->getBufferAddress() + spark_row_info->getOffsets()[1]), spark_row_info->getLengths()[0]); + std::cout << "result: " << reader.getDouble(0) << std::endl; } } From 2aff0153f1da7fab22b7174d054e43fd64185fa6 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Thu, 30 Dec 2021 02:56:44 +0000 Subject: [PATCH 266/472] upgrade substrait --- .../Builder/SerializedFunctionBuilder.cpp | 103 ------------------ .../Builder/SerializedFunctionBuilder.h | 34 ------ .../Builder/SerializedPlanBuilder.cpp | 86 +++++++-------- .../Builder/SerializedPlanBuilder.h | 38 +++---- utils/local-engine/CMakeLists.txt | 5 +- .../Parser/SerializedPlanParser.cpp | 55 +++++----- .../Parser/SerializedPlanParser.h | 18 +-- utils/local-engine/Substrait/CMakeLists.txt | 9 -- .../Substrait/extensions/extensions.proto | 81 -------------- utils/local-engine/proto/CMakeLists.txt | 28 +++++ .../substrait}/capabilities.proto | 0 .../substrait}/expression.proto | 0 .../substrait/extensions}/extensions.proto | 0 .../substrait}/function.proto | 0 .../substrait}/parameterized_types.proto | 0 .../{Substrait => proto/substrait}/plan.proto | 0 .../substrait}/relations.proto | 0 .../{Substrait => proto/substrait}/type.proto | 0 .../substrait}/type_expressions.proto | 0 .../local-engine/tests/gtest_local_engine.cpp | 2 +- 20 files changed, 129 insertions(+), 330 deletions(-) delete mode 100644 utils/local-engine/Builder/SerializedFunctionBuilder.cpp delete mode 100644 utils/local-engine/Builder/SerializedFunctionBuilder.h delete mode 100644 utils/local-engine/Substrait/CMakeLists.txt delete mode 100644 utils/local-engine/Substrait/extensions/extensions.proto create mode 100644 utils/local-engine/proto/CMakeLists.txt rename utils/local-engine/{Substrait => proto/substrait}/capabilities.proto (100%) rename utils/local-engine/{Substrait => proto/substrait}/expression.proto (100%) rename utils/local-engine/{Substrait => proto/substrait/extensions}/extensions.proto (100%) rename utils/local-engine/{Substrait => proto/substrait}/function.proto (100%) rename utils/local-engine/{Substrait => proto/substrait}/parameterized_types.proto (100%) rename utils/local-engine/{Substrait => proto/substrait}/plan.proto (100%) rename utils/local-engine/{Substrait => proto/substrait}/relations.proto (100%) rename utils/local-engine/{Substrait => proto/substrait}/type.proto (100%) rename utils/local-engine/{Substrait => proto/substrait}/type_expressions.proto (100%) diff --git a/utils/local-engine/Builder/SerializedFunctionBuilder.cpp b/utils/local-engine/Builder/SerializedFunctionBuilder.cpp deleted file mode 100644 index 185ac4cc0ece..000000000000 --- a/utils/local-engine/Builder/SerializedFunctionBuilder.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "SerializedFunctionBuilder.h" -dbms::SerializedScalarFunctionBuilder::SerializedScalarFunctionBuilder( - int functionId, - const DB::NamesAndTypesList & args, - const std::string & func_name, - bool is_deterministic, - const DB::DataTypePtr & outputType) - : function_id(functionId), arguments(args), name(func_name), deterministic(is_deterministic), output_type(outputType) -{ -} -std::unique_ptr dbms::SerializedScalarFunctionBuilder::build() -{ - this->function = std::make_unique(); - function->mutable_name()->Add(std::move(this->name)); - function->mutable_id()->set_id(this->function_id); - function->set_deterministic(this->deterministic); - convertDataTypeToDerivationExpression(function->mutable_output_type(), this->output_type); - function->mutable_normal(); - for (const auto &arg : this->arguments) { - auto *s_arg = function->mutable_arguments()->Add(); - convertNameAndTypeToArgument(s_arg, arg); - } - return std::move(function); -} -void dbms::convertDataTypeToDerivationExpression(io::substrait::DerivationExpression * expression, DB::DataTypePtr type) -{ - DB::WhichDataType which(type); - if (which.isDate()) - { - auto * date = expression->mutable_date(); - date->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isInt32()) - { - auto * int_32 = expression->mutable_i32(); - int_32->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isInt64()) - { - auto * int_64 = expression->mutable_i64(); - int_64->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isFloat32()) - { - auto * float_32 = expression->mutable_fp32(); - float_32->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isFloat64()) - { - auto * float_64 = expression->mutable_fp64(); - float_64->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isInt8()) - { - auto * boolean = expression->mutable_bool_(); - boolean->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else - { - throw std::runtime_error("unsupported data type " + std::string(type->getFamilyName())); - } -} - -void dbms::convertNameAndTypeToArgument(io::substrait::FunctionSignature_Argument *argument, DB::NameAndTypePair arg) -{ - argument->set_name(arg.name); - DB::WhichDataType which(arg.type); - auto * p_type = argument->mutable_type()->mutable_type(); - if (which.isDate()) - { - auto * date = p_type->mutable_date(); - date->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isInt32()) - { - auto * int_32 = p_type->mutable_i32(); - int_32->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isInt64()) - { - auto * int_64 = p_type->mutable_i64(); - int_64->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isFloat32()) - { - auto * float_32 = p_type->mutable_fp32(); - float_32->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isFloat64()) - { - auto * float_64 = p_type->mutable_fp64(); - float_64->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else if (which.isInt8()) - { - auto * boolean = p_type->mutable_bool_(); - boolean->set_nullability(io::substrait::Type_Nullability_REQUIRED); - } - else - { - throw std::runtime_error("unsupported data type " + std::string(arg.type->getFamilyName())); - } -} diff --git a/utils/local-engine/Builder/SerializedFunctionBuilder.h b/utils/local-engine/Builder/SerializedFunctionBuilder.h deleted file mode 100644 index c260c47778a1..000000000000 --- a/utils/local-engine/Builder/SerializedFunctionBuilder.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once -#include -#include - -namespace dbms -{ - -void convertDataTypeToDerivationExpression(io::substrait::DerivationExpression* expression, DB::DataTypePtr type); - -void convertNameAndTypeToArgument(io::substrait::FunctionSignature_Argument* argument, DB::NameAndTypePair args); - -class SerializedFunctionBuilder -{ -}; - -class SerializedScalarFunctionBuilder -{ -public: - SerializedScalarFunctionBuilder( - int functionId, - const DB::NamesAndTypesList & args, - const std::string & func_name, - bool is_deterministic, - const DB::DataTypePtr & outputType); - std::unique_ptr build(); -private: - int function_id; - DB::NamesAndTypesList arguments; - std::string name; - bool deterministic; - DB::DataTypePtr output_type; - std::unique_ptr function; -}; -} diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp index 9d0a773f3f0e..c427ab62840b 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.cpp +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -12,59 +12,59 @@ SchemaPtr SerializedSchemaBuilder::build() { auto * t = type_struct->mutable_types()->Add(); t->mutable_i8()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE : substrait::Type_Nullability_NULLABILITY_REQUIRED); } else if (type == "I32") { auto * t = type_struct->mutable_types()->Add(); t->mutable_i32()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE : substrait::Type_Nullability_NULLABILITY_REQUIRED); } else if (type == "I64") { auto * t = type_struct->mutable_types()->Add(); t->mutable_i64()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE : substrait::Type_Nullability_NULLABILITY_REQUIRED); } else if (type == "Boolean") { auto * t = type_struct->mutable_types()->Add(); t->mutable_bool_()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE : substrait::Type_Nullability_NULLABILITY_REQUIRED); } else if (type == "I16") { auto * t = type_struct->mutable_types()->Add(); t->mutable_i16()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE : substrait::Type_Nullability_NULLABILITY_REQUIRED); } else if (type == "String") { auto * t = type_struct->mutable_types()->Add(); t->mutable_string()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE : substrait::Type_Nullability_NULLABILITY_REQUIRED); } else if (type == "FP32") { auto * t = type_struct->mutable_types()->Add(); t->mutable_fp32()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE : substrait::Type_Nullability_NULLABILITY_REQUIRED); } else if (type == "FP64") { auto * t = type_struct->mutable_types()->Add(); t->mutable_fp64()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE : substrait::Type_Nullability_NULLABILITY_REQUIRED); } else if (type == "Date") { auto * t = type_struct->mutable_types()->Add(); t->mutable_date()->set_nullability( - this->nullability_map[name] ? io::substrait::Type_Nullability_NULLABLE : io::substrait::Type_Nullability_REQUIRED); + this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE : substrait::Type_Nullability_NULLABILITY_REQUIRED); } else { - throw "doesn't support type " + type; + throw std::runtime_error("doesn't support type " + type); } } return std::move(this->schema); @@ -75,23 +75,24 @@ SerializedSchemaBuilder & SerializedSchemaBuilder::column(std::string name, std: this->nullability_map.emplace(name, nullable); return *this; } -SerializedSchemaBuilder::SerializedSchemaBuilder() : schema(new io::substrait::Type_NamedStruct()) +SerializedSchemaBuilder::SerializedSchemaBuilder() : schema(new substrait::NamedStruct()) { } SerializedPlanBuilder & SerializedPlanBuilder::registerFunction(int id, std::string name) { - auto * mapping = this->plan->mutable_mappings()->Add(); - auto * function_mapping = mapping->mutable_function_mapping(); - function_mapping->mutable_function_id()->set_id(id); + auto * extension = this->plan->mutable_extensions()->Add(); + auto * function_mapping = extension->mutable_extension_function(); + function_mapping->set_function_anchor(id); function_mapping->set_name(name); return *this; } -void SerializedPlanBuilder::setInputToPrev(io::substrait::Rel * input) +void SerializedPlanBuilder::setInputToPrev(substrait::Rel * input) { if (!this->prev_rel) { - this->plan->mutable_relations()->AddAllocated(input); + auto * root = this->plan->mutable_relations()->Add()->mutable_root(); + root->set_allocated_input(input); return; } if (this->prev_rel->has_filter()) @@ -112,9 +113,9 @@ void SerializedPlanBuilder::setInputToPrev(io::substrait::Rel * input) } } -SerializedPlanBuilder & SerializedPlanBuilder::filter(io::substrait::Expression * condition) +SerializedPlanBuilder & SerializedPlanBuilder::filter(substrait::Expression * condition) { - io::substrait::Rel * filter = new io::substrait::Rel(); + substrait::Rel * filter = new substrait::Rel(); filter->mutable_filter()->set_allocated_condition(condition); setInputToPrev(filter); this->prev_rel = filter; @@ -123,7 +124,7 @@ SerializedPlanBuilder & SerializedPlanBuilder::filter(io::substrait::Expression SerializedPlanBuilder & SerializedPlanBuilder::read(std::string path, SchemaPtr schema) { - io::substrait::Rel * rel = new io::substrait::Rel(); + substrait::Rel * rel = new substrait::Rel(); auto * read = rel->mutable_read(); read->mutable_local_files()->add_items()->set_uri_path(path); read->set_allocated_base_schema(schema); @@ -131,19 +132,18 @@ SerializedPlanBuilder & SerializedPlanBuilder::read(std::string path, SchemaPtr this->prev_rel = rel; return *this; } -std::unique_ptr SerializedPlanBuilder::build() +std::unique_ptr SerializedPlanBuilder::build() { return std::move(this->plan); } -SerializedPlanBuilder::SerializedPlanBuilder() : plan(std::make_unique()) +SerializedPlanBuilder::SerializedPlanBuilder() : plan(std::make_unique()) { } -SerializedPlanBuilder & SerializedPlanBuilder::aggregate(std::vector keys, std::vector aggregates) +SerializedPlanBuilder & SerializedPlanBuilder::aggregate(std::vector keys, std::vector aggregates) { - io::substrait::Rel * rel = new io::substrait::Rel(); + substrait::Rel * rel = new substrait::Rel(); auto * agg = rel->mutable_aggregate(); - auto * grouping = agg->mutable_groupings()->Add(); - grouping->mutable_input_fields()->Add(keys.begin(), keys.end()); + // TODO support group auto * measures = agg->mutable_measures(); for (auto * measure : aggregates) { @@ -153,9 +153,9 @@ SerializedPlanBuilder & SerializedPlanBuilder::aggregate(std::vector ke this->prev_rel = rel; return *this; } -SerializedPlanBuilder & SerializedPlanBuilder::project(std::vector projections) +SerializedPlanBuilder & SerializedPlanBuilder::project(std::vector projections) { - io::substrait::Rel * project = new io::substrait::Rel(); + substrait::Rel * project = new substrait::Rel(); for (auto * expr : projections) { project->mutable_project()->mutable_expressions()->AddAllocated(expr); @@ -166,56 +166,56 @@ SerializedPlanBuilder & SerializedPlanBuilder::project(std::vectormutable_selection(); selection->mutable_direct_reference()->mutable_struct_field()->set_field(field_id); return rel; } -io::substrait::Expression * scalarFunction(int32_t id, ExpressionList args) +substrait::Expression * scalarFunction(int32_t id, ExpressionList args) { - io::substrait::Expression * rel = new io::substrait::Expression(); + substrait::Expression * rel = new substrait::Expression(); auto * function = rel->mutable_scalar_function(); - function->mutable_id()->set_id(id); + function->set_function_reference(id); std::for_each(args.begin(), args.end(), [function](auto * expr) { function->mutable_args()->AddAllocated(expr); }); return rel; } -io::substrait::AggregateRel_Measure * measureFunction(int32_t id, ExpressionList args) +substrait::AggregateRel_Measure * measureFunction(int32_t id, ExpressionList args) { - io::substrait::AggregateRel_Measure * rel = new io::substrait::AggregateRel_Measure(); + substrait::AggregateRel_Measure * rel = new substrait::AggregateRel_Measure(); auto * measure = rel->mutable_measure(); - measure->mutable_id()->set_id(id); + measure->set_function_reference(id); std::for_each(args.begin(), args.end(), [measure](auto * expr) { measure->mutable_args()->AddAllocated(expr); }); return rel; } -io::substrait::Expression * literal(double_t value) +substrait::Expression * literal(double_t value) { - io::substrait::Expression * rel = new io::substrait::Expression(); + substrait::Expression * rel = new substrait::Expression(); auto * literal = rel->mutable_literal(); literal->set_fp64(value); return rel; } -io::substrait::Expression * literal(int32_t value) +substrait::Expression * literal(int32_t value) { - io::substrait::Expression * rel = new io::substrait::Expression(); + substrait::Expression * rel = new substrait::Expression(); auto * literal = rel->mutable_literal(); literal->set_i32(value); return rel; } -io::substrait::Expression * literal(std::string value) +substrait::Expression * literal(std::string value) { - io::substrait::Expression * rel = new io::substrait::Expression(); + substrait::Expression * rel = new substrait::Expression(); auto * literal = rel->mutable_literal(); literal->set_string(value); return rel; } -io::substrait::Expression* literalDate(int32_t value) +substrait::Expression* literalDate(int32_t value) { - io::substrait::Expression * rel = new io::substrait::Expression(); + substrait::Expression * rel = new substrait::Expression(); auto * literal = rel->mutable_literal(); literal->set_date(value); return rel; diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h index 0164ac37f752..05cfe07fb64a 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.h +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace dbms @@ -19,7 +19,7 @@ enum Function TO_DATE }; -using SchemaPtr = io::substrait::Type_NamedStruct *; +using SchemaPtr = substrait::NamedStruct *; class SerializedPlanBuilder { @@ -37,20 +37,20 @@ class SerializedPlanBuilder return *this; } SerializedPlanBuilder& registerFunction(int id, std::string name); - SerializedPlanBuilder& filter(io::substrait::Expression* condition); - SerializedPlanBuilder& project(std::vector projections); - SerializedPlanBuilder& aggregate(std::vector keys, std::vector aggregates); + SerializedPlanBuilder& filter(substrait::Expression* condition); + SerializedPlanBuilder& project(std::vector projections); + SerializedPlanBuilder& aggregate(std::vector keys, std::vector aggregates); SerializedPlanBuilder& read(std::string path, SchemaPtr schema); - std::unique_ptr build(); + std::unique_ptr build(); private: - void setInputToPrev(io::substrait::Rel * input); - io::substrait::Rel * prev_rel = nullptr; - std::unique_ptr plan; + void setInputToPrev(substrait::Rel * input); + substrait::Rel * prev_rel = nullptr; + std::unique_ptr plan; }; -using Type = io::substrait::Type; +using Type = substrait::Type; /** * build a schema, need define column name and column. * 1. column name @@ -68,18 +68,18 @@ class SerializedSchemaBuilder { SchemaPtr schema; }; -using ExpressionList = std::vector; -using MeasureList = std::vector; +using ExpressionList = std::vector; +using MeasureList = std::vector; -io::substrait::Expression * scalarFunction(int32_t id, ExpressionList args); -io::substrait::AggregateRel_Measure * measureFunction(int32_t id, ExpressionList args); +substrait::Expression * scalarFunction(int32_t id, ExpressionList args); +substrait::AggregateRel_Measure * measureFunction(int32_t id, ExpressionList args); -io::substrait::Expression* literal(double_t value); -io::substrait::Expression* literal(int32_t value); -io::substrait::Expression* literal(std::string value); -io::substrait::Expression* literalDate(int32_t value); +substrait::Expression* literal(double_t value); +substrait::Expression* literal(int32_t value); +substrait::Expression* literal(std::string value); +substrait::Expression* literalDate(int32_t value); -io::substrait::Expression * selection(int32_t field_id); +substrait::Expression * selection(int32_t field_id); } diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index e89120b1ba79..af5a02a85db0 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -62,12 +62,12 @@ function(add_cxx_compile_options option) add_compile_options("$<$,CXX>:${option}>") endfunction() add_cxx_compile_options(-Wzero-as-null-pointer-constant) -add_subdirectory(Substrait) +add_subdirectory(proto) add_subdirectory(Builder) add_headers_and_sources(builder Builder) add_headers_and_sources(parser Parser) #include (../../cmake/find/parquet.cmake) -include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/proto) include_directories(${ClickHouse_SOURCE_DIR}/utils/local-engine) #add_executable (local_engine # local_engine.cpp @@ -84,7 +84,6 @@ include_directories(${ClickHouse_SOURCE_DIR}/utils/local-engine) include_directories( ${RAPIDJSON_INCLUDE_DIR} - ${SUBSTRAIT_HEADERS} ${ARROW_INCLUDE_DIR} ) diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 7a931c828d2b..068eaeb6cae8 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -21,12 +21,10 @@ #include #include #include -#include #include -namespace substrait = io::substrait; -DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocalFile(const io::substrait::ReadRel & rel) +DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocalFile(const substrait::ReadRel & rel) { assert(rel.has_local_files()); assert(rel.has_base_schema()); @@ -38,7 +36,7 @@ DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocal return std::make_shared(files_info, parseNameStruct(rel.base_schema())); } -DB::Block dbms::SerializedPlanParser::parseNameStruct(const io::substrait::Type_NamedStruct & struct_) +DB::Block dbms::SerializedPlanParser::parseNameStruct(const substrait::NamedStruct & struct_) { auto internal_cols = std::make_unique>(); internal_cols->reserve(struct_.names_size()); @@ -51,7 +49,7 @@ DB::Block dbms::SerializedPlanParser::parseNameStruct(const io::substrait::Type_ } return DB::Block(*std::move(internal_cols)); } -DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type & type) +DB::DataTypePtr dbms::SerializedPlanParser::parseType(const substrait::Type & type) { auto & factory = DB::DataTypeFactory::instance(); if (type.has_bool_() || type.has_i8()) @@ -91,23 +89,24 @@ DB::DataTypePtr dbms::SerializedPlanParser::parseType(const io::substrait::Type throw std::runtime_error("doesn't support type " + type.DebugString()); } } -DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptr plan) +DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptr plan) { - if (plan->mappings_size() > 0) + if (plan->extensions_size() > 0) { - for (const auto& mapping : plan->mappings()) + for (const auto& extension : plan->extensions()) { - if (mapping.has_function_mapping()) + if (extension.has_extension_function()) { - this->function_mapping.emplace(std::to_string(mapping.function_mapping().function_id().id()), mapping.function_mapping().name()); + this->function_mapping.emplace(std::to_string(extension.extension_function().function_anchor()), extension.extension_function().name()); } } } - if (plan->relations_size() == 1) { - auto rel = plan->relations().at(0); - return parseOp(rel); + + auto root_rel = plan->relations().at(0); + assert(root_rel.has_root() && "must have root rel!"); + return parseOp(root_rel.root().input()); } else { @@ -115,9 +114,9 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::unique_ptr(blockToNameAndTypeList(input.header)); @@ -226,7 +225,7 @@ DB::QueryPlanStepPtr dbms::SerializedPlanParser::parseAggregate(DB::QueryPlan & { const auto& measure = rel.measures(i); DB::AggregateDescription agg; - auto function_name = this->function_mapping.at(std::to_string(measure.measure().id().id())); + auto function_name = this->function_mapping.at(std::to_string(measure.measure().function_reference())); agg.column_name = function_name +"(" + measure_names.at(i) + ")"; agg.arguments = DB::ColumnNumbers{plan.getCurrentDataStream().header.getPositionByName(measure_names.at(i))}; agg.argument_names = DB::Names{measure_names.at(i)}; @@ -271,7 +270,7 @@ void join(DB::ActionsDAG::NodeRawConstPtrs v, char c, std::string & s) DB::ActionsDAGPtr dbms::SerializedPlanParser::parseFunction( - const DataStream & input, const io::substrait::Expression & rel, std::string & result_name, DB::ActionsDAGPtr actions_dag, bool keep_result) + const DataStream & input, const substrait::Expression & rel, std::string & result_name, DB::ActionsDAGPtr actions_dag, bool keep_result) { assert(rel.has_scalar_function() && "the root of expression should be a scalar function"); const auto & scalar_function = rel.scalar_function(); @@ -293,7 +292,7 @@ DB::ActionsDAGPtr dbms::SerializedPlanParser::parseFunction( args.emplace_back(parseArgument(actions_dag, arg)); } } - auto function_name = this->function_mapping.at(std::to_string(rel.scalar_function().id().id())); + auto function_name = this->function_mapping.at(std::to_string(rel.scalar_function().function_reference())); assert(SCALAR_FUNCTIONS.contains(function_name) && ("doesn't support function " + function_name).c_str()); auto function_builder = DB::FunctionFactory::instance().get(SCALAR_FUNCTIONS.at(function_name), this->context); std::string args_name; @@ -305,34 +304,34 @@ DB::ActionsDAGPtr dbms::SerializedPlanParser::parseFunction( return actions_dag; } -const DB::ActionsDAG::Node * dbms::SerializedPlanParser::parseArgument(DB::ActionsDAGPtr action_dag, const io::substrait::Expression & rel) +const DB::ActionsDAG::Node * dbms::SerializedPlanParser::parseArgument(DB::ActionsDAGPtr action_dag, const substrait::Expression & rel) { switch (rel.rex_type_case()) { - case io::substrait::Expression::RexTypeCase::kLiteral: + case substrait::Expression::RexTypeCase::kLiteral: { const auto & literal = rel.literal(); switch (literal.literal_type_case()) { - case io::substrait::Expression_Literal::kFp64: + case substrait::Expression_Literal::kFp64: { auto type = std::make_shared(); return &action_dag->addColumn(ColumnWithTypeAndName( type->createColumnConst(1, literal.fp64()), type, getUniqueName(std::to_string(literal.fp64())))); } - case io::substrait::Expression_Literal::kString: + case substrait::Expression_Literal::kString: { auto type = std::make_shared(); return &action_dag->addColumn( ColumnWithTypeAndName(type->createColumnConst(1, literal.string()), type, getUniqueName(literal.string()))); } - case io::substrait::Expression_Literal::kI32: + case substrait::Expression_Literal::kI32: { auto type = std::make_shared(); return &action_dag->addColumn(ColumnWithTypeAndName( type->createColumnConst(1, literal.i32()), type, getUniqueName(std::to_string(literal.i32())))); } - case io::substrait::Expression_Literal::kDate: + case substrait::Expression_Literal::kDate: { auto type = std::make_shared(); @@ -343,7 +342,7 @@ const DB::ActionsDAG::Node * dbms::SerializedPlanParser::parseArgument(DB::Actio throw std::runtime_error("unsupported constant type " + std::to_string(literal.literal_type_case())); } } - case io::substrait::Expression::RexTypeCase::kSelection: + case substrait::Expression::RexTypeCase::kSelection: { if (!rel.selection().has_direct_reference() || !rel.selection().direct_reference().has_struct_field()) { @@ -360,7 +359,7 @@ const DB::ActionsDAG::Node * dbms::SerializedPlanParser::parseArgument(DB::Actio DB::QueryPlanPtr dbms::SerializedPlanParser::parse(std::string & plan) { - auto plan_ptr = std::make_unique(); + auto plan_ptr = std::make_unique(); plan_ptr->ParseFromString(plan); return parse(std::move(plan_ptr)); } diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 620730dc3a48..a8ce2aa243e5 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -75,19 +75,19 @@ class SerializedPlanParser SerializedPlanParser(const ContextPtr & context); static void initFunctionEnv(); DB::QueryPlanPtr parse(std::string& plan); - DB::QueryPlanPtr parse(std::unique_ptr plan); + DB::QueryPlanPtr parse(std::unique_ptr plan); - DB::BatchParquetFileSourcePtr parseReadRealWithLocalFile(const io::substrait::ReadRel& rel); - DB::Block parseNameStruct(const io::substrait::Type_NamedStruct& struct_); - DB::DataTypePtr parseType(const io::substrait::Type& type); + DB::BatchParquetFileSourcePtr parseReadRealWithLocalFile(const substrait::ReadRel& rel); + DB::Block parseNameStruct(const substrait::NamedStruct& struct_); + DB::DataTypePtr parseType(const substrait::Type& type); static ContextPtr global_context; private: static DB::NamesAndTypesList blockToNameAndTypeList(const DB::Block & header); - DB::QueryPlanPtr parseOp(const io::substrait::Rel &rel); - DB::ActionsDAGPtr parseFunction(const DataStream & input, const io::substrait::Expression &rel, std::string & result_name, DB::ActionsDAGPtr actions_dag = nullptr, bool keep_result = false); - DB::QueryPlanStepPtr parseAggregate(DB::QueryPlan & plan, const io::substrait::AggregateRel &rel); - const DB::ActionsDAG::Node * parseArgument(DB::ActionsDAGPtr action_dag, const io::substrait::Expression &rel); + DB::QueryPlanPtr parseOp(const substrait::Rel &rel); + DB::ActionsDAGPtr parseFunction(const DataStream & input, const substrait::Expression &rel, std::string & result_name, DB::ActionsDAGPtr actions_dag = nullptr, bool keep_result = false); + DB::QueryPlanStepPtr parseAggregate(DB::QueryPlan & plan, const substrait::AggregateRel &rel); + const DB::ActionsDAG::Node * parseArgument(DB::ActionsDAGPtr action_dag, const substrait::Expression &rel); std::string getUniqueName(std::string name) { return name + "_" + std::to_string(name_no++); diff --git a/utils/local-engine/Substrait/CMakeLists.txt b/utils/local-engine/Substrait/CMakeLists.txt deleted file mode 100644 index 69fe6d8d85f2..000000000000 --- a/utils/local-engine/Substrait/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -set(protobuf_generate_PROTOC_OUT_DIR "${ClickHouse_SOURCE_DIR}/utils/local-engine/Substrait") -file(GLOB PROTOBUF_DEFINITION_FILES "${ClickHouse_SOURCE_DIR}/utils/local-engine/Substrait/*.proto") -include_directories(${Protobuf_INCLUDE_DIRS}) -include_directories(${CMAKE_CURRENT_BINARY_DIR}) -PROTOBUF_GENERATE_CPP(SUBSTRAIT_SRCS SUBSTRAIT_HEADERS ${PROTOBUF_DEFINITION_FILES}) -add_library(substrait ${SUBSTRAIT_SRCS}) -target_include_directories(substrait PRIVATE ${PROTOBUF_INCLUDE_DIR}) -target_link_libraries(substrait libprotobuf) - diff --git a/utils/local-engine/Substrait/extensions/extensions.proto b/utils/local-engine/Substrait/extensions/extensions.proto deleted file mode 100644 index 3e8450b5c127..000000000000 --- a/utils/local-engine/Substrait/extensions/extensions.proto +++ /dev/null @@ -1,81 +0,0 @@ -syntax = "proto3"; - -package substrait.extensions; - -option java_multiple_files = true; -option java_package = "io.substrait.proto"; -option csharp_namespace = "Substrait.Protobuf"; - -import "google/protobuf/any.proto"; - -message SimpleExtensionURI { - // A surrogate key used in the context of a single plan used to reference the - // URI associated with an extension. - uint32 extension_uri_anchor = 1; - - // The URI where this extension YAML can be retrieved. This is the "namespace" - // of this extension. - string uri = 2; -} - -// Describes a mapping between a specific extension entity and the uri where -// that extension can be found. -message SimpleExtensionDeclaration { - - oneof mapping_type { - ExtensionType extension_type = 1; - ExtensionTypeVariation extension_type_variation = 2; - ExtensionFunction extension_function = 3; - } - - // Describes a Type - message ExtensionType { - // references the extension_uri_anchor defined for a specific extension URI. - uint32 extension_uri_reference = 1; - - // A surrogate key used in the context of a single plan to reference a - // specific extension type - uint32 type_anchor = 2; - - // the name of the type in the defined extension YAML. - string name = 3; - } - - message ExtensionTypeVariation { - // references the extension_uri_anchor defined for a specific extension URI. - uint32 extension_uri_reference = 1; - - // A surrogate key used in the context of a single plan to reference a - // specific type variation - uint32 type_variation_anchor = 2; - - // the name of the type in the defined extension YAML. - string name = 3; - } - - message ExtensionFunction { - // references the extension_uri_anchor defined for a specific extension URI. - uint32 extension_uri_reference = 1; - - // A surrogate key used in the context of a single plan to reference a - // specific function - uint32 function_anchor = 2; - - // A simple name if there is only one impl for the function within the YAML. - // A compound name, referencing that includes type short names if there is - // more than one impl per name in the YAML. - string name = 3; - } -} - -// A generic object that can be used to embed additional extension information -// into the serialized substrait plan. -message AdvancedExtension { - - // An optimization is helpful information that don't influence semantics. May - // be ignored by a consumer. - google.protobuf.Any optimization = 1; - - // An enhancement alter semantics. Cannot be ignored by a consumer. - google.protobuf.Any enhancement = 2; -} diff --git a/utils/local-engine/proto/CMakeLists.txt b/utils/local-engine/proto/CMakeLists.txt new file mode 100644 index 000000000000..124200dde4df --- /dev/null +++ b/utils/local-engine/proto/CMakeLists.txt @@ -0,0 +1,28 @@ +file(GLOB protobuf_files + substrait/*.proto + substrait/extensions/*.proto + ) +FOREACH(FIL ${protobuf_files}) + file(RELATIVE_PATH FIL_RELATIVE ${ClickHouse_SOURCE_DIR}/utils/local-engine/proto/ ${FIL}) + string(REGEX REPLACE "\\.proto" "" FILE_NAME ${FIL_RELATIVE}) + LIST(APPEND SUBSTRAIT_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${FILE_NAME}.pb.cc") + LIST(APPEND SUBSTRAIT_HEADERS "${CMAKE_CURRENT_BINARY_DIR}/${FILE_NAME}.pb.h") +ENDFOREACH() + +add_custom_target( + generate_substrait + COMMAND protobuf::protoc -I${CMAKE_CURRENT_SOURCE_DIR} -I${ClickHouse_SOURCE_DIR}/contrib/protobuf/src --cpp_out=${CMAKE_CURRENT_BINARY_DIR}/ ${protobuf_files} +# ARGS + DEPENDS protobuf::protoc + COMMENT "Running cpp protocol buffer compiler" + VERBATIM ) + +include_directories(${Protobuf_INCLUDE_DIRS}) + +set_source_files_properties(${SUBSTRAIT_SRCS} PROPERTIES GENERATED TRUE) +add_library(substrait ${SUBSTRAIT_SRCS}) +add_dependencies(substrait generate_substrait) +target_include_directories(substrait PRIVATE ${PROTOBUF_INCLUDE_DIR}) +target_include_directories(substrait PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/) +target_link_libraries(substrait libprotobuf) + diff --git a/utils/local-engine/Substrait/capabilities.proto b/utils/local-engine/proto/substrait/capabilities.proto similarity index 100% rename from utils/local-engine/Substrait/capabilities.proto rename to utils/local-engine/proto/substrait/capabilities.proto diff --git a/utils/local-engine/Substrait/expression.proto b/utils/local-engine/proto/substrait/expression.proto similarity index 100% rename from utils/local-engine/Substrait/expression.proto rename to utils/local-engine/proto/substrait/expression.proto diff --git a/utils/local-engine/Substrait/extensions.proto b/utils/local-engine/proto/substrait/extensions/extensions.proto similarity index 100% rename from utils/local-engine/Substrait/extensions.proto rename to utils/local-engine/proto/substrait/extensions/extensions.proto diff --git a/utils/local-engine/Substrait/function.proto b/utils/local-engine/proto/substrait/function.proto similarity index 100% rename from utils/local-engine/Substrait/function.proto rename to utils/local-engine/proto/substrait/function.proto diff --git a/utils/local-engine/Substrait/parameterized_types.proto b/utils/local-engine/proto/substrait/parameterized_types.proto similarity index 100% rename from utils/local-engine/Substrait/parameterized_types.proto rename to utils/local-engine/proto/substrait/parameterized_types.proto diff --git a/utils/local-engine/Substrait/plan.proto b/utils/local-engine/proto/substrait/plan.proto similarity index 100% rename from utils/local-engine/Substrait/plan.proto rename to utils/local-engine/proto/substrait/plan.proto diff --git a/utils/local-engine/Substrait/relations.proto b/utils/local-engine/proto/substrait/relations.proto similarity index 100% rename from utils/local-engine/Substrait/relations.proto rename to utils/local-engine/proto/substrait/relations.proto diff --git a/utils/local-engine/Substrait/type.proto b/utils/local-engine/proto/substrait/type.proto similarity index 100% rename from utils/local-engine/Substrait/type.proto rename to utils/local-engine/proto/substrait/type.proto diff --git a/utils/local-engine/Substrait/type_expressions.proto b/utils/local-engine/proto/substrait/type_expressions.proto similarity index 100% rename from utils/local-engine/Substrait/type_expressions.proto rename to utils/local-engine/proto/substrait/type_expressions.proto diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index abb9e256ddea..b8e1822fac4c 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -31,7 +31,7 @@ TEST(TestSelect, ReadRel) output.flush(); output.close(); - ASSERT_TRUE(plan->relations(0).has_read()); + ASSERT_TRUE(plan->relations(0).root().input().has_read()); ASSERT_EQ(plan->relations_size(), 1); std::cout << "start execute" < Date: Thu, 30 Dec 2021 05:34:37 +0000 Subject: [PATCH 267/472] fix read rel --- utils/local-engine/Builder/SerializedPlanBuilder.cpp | 2 +- utils/local-engine/Parser/SerializedPlanParser.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.cpp b/utils/local-engine/Builder/SerializedPlanBuilder.cpp index c427ab62840b..8f8666618978 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.cpp +++ b/utils/local-engine/Builder/SerializedPlanBuilder.cpp @@ -126,7 +126,7 @@ SerializedPlanBuilder & SerializedPlanBuilder::read(std::string path, SchemaPtr { substrait::Rel * rel = new substrait::Rel(); auto * read = rel->mutable_read(); - read->mutable_local_files()->add_items()->set_uri_path(path); + read->mutable_local_files()->add_items()->set_uri_file(path); read->set_allocated_base_schema(schema); setInputToPrev(rel); this->prev_rel = rel; diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 068eaeb6cae8..e3d744ccd374 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -31,7 +31,7 @@ DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocal auto files_info = std::make_shared(); for (const auto & item : rel.local_files().items()) { - files_info->files.push_back(item.uri_path()); + files_info->files.push_back(item.uri_file()); } return std::make_shared(files_info, parseNameStruct(rel.base_schema())); } From 6c04e84508e40533df71feaa459aef6e84f4da73 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Thu, 30 Dec 2021 09:58:49 +0000 Subject: [PATCH 268/472] add rpm config --- utils/local-engine/CMakeLists.txt | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index af5a02a85db0..748deb3bcea0 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -48,7 +48,7 @@ else () endif() set (CLICKHOUSE_SERVER_LINK - PRIVATE + PUBLIC # dbms clickhouse_aggregate_functions clickhouse_common_io @@ -106,6 +106,28 @@ set_property(TARGET ${LOCALENGINE_SHARED_LIB} PROPERTY POSITION_INDEPENDENT_CODE add_dependencies(${LOCALENGINE_SHARED_LIB} local_engine_headers) target_link_libraries(${LOCALENGINE_SHARED_LIB} ${CLICKHOUSE_SERVER_LINK} ) +install(TARGETS ${LOCALENGINE_SHARED_LIB} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + PUBLIC_HEADER DESTINATION include + ) + +install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCY_SET myset) +install(RUNTIME_DEPENDENCY_SET myset + DIRECTORIES lib + ) + +set(CPACK_PACKAGE_VERSION 0.1.0) +set(CPACK_GENERATOR "RPM") +set(CPACK_PACKAGE_NAME "local_engine_jni") +set(CPACK_PACKAGE_RELEASE 1) +set(CPACK_PACKAGE_CONTACT "neng.liu@kyligence.io") +set(CPACK_PACKAGE_VENDOR "Kyligence") +set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) +set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${CPACK_PACKAGE_RELEASE}.${CMAKE_SYSTEM_PROCESSOR}") +include(CPack) + if (ENABLE_TESTS) add_subdirectory(tests) endif () From e97fa5c290d3689c5ae62d2ab3759c1535d0de5c Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Thu, 30 Dec 2021 10:18:12 +0000 Subject: [PATCH 269/472] add rpm config --- utils/local-engine/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index 748deb3bcea0..1dc70ba63beb 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -107,10 +107,7 @@ add_dependencies(${LOCALENGINE_SHARED_LIB} local_engine_headers) target_link_libraries(${LOCALENGINE_SHARED_LIB} ${CLICKHOUSE_SERVER_LINK} ) install(TARGETS ${LOCALENGINE_SHARED_LIB} - RUNTIME DESTINATION bin LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - PUBLIC_HEADER DESTINATION include ) install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCY_SET myset) @@ -122,6 +119,7 @@ set(CPACK_PACKAGE_VERSION 0.1.0) set(CPACK_GENERATOR "RPM") set(CPACK_PACKAGE_NAME "local_engine_jni") set(CPACK_PACKAGE_RELEASE 1) +set(CPACK_CMAKE_GENERATOR Ninja) set(CPACK_PACKAGE_CONTACT "neng.liu@kyligence.io") set(CPACK_PACKAGE_VENDOR "Kyligence") set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) From 72348edd58f4fba9c9b0ff8f65e4a405fd8621cc Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Tue, 4 Jan 2022 03:44:43 +0000 Subject: [PATCH 270/472] add rpm config --- utils/local-engine/CMakeLists.txt | 190 +++++++++++++++++++++++++++++- 1 file changed, 188 insertions(+), 2 deletions(-) diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index 1dc70ba63beb..a6ded28d0e54 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -110,10 +110,196 @@ install(TARGETS ${LOCALENGINE_SHARED_LIB} LIBRARY DESTINATION lib ) -install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCY_SET myset) -install(RUNTIME_DEPENDENCY_SET myset +#install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCY_SET myset) +#install(RUNTIME_DEPENDENCY_SET myset +# DIRECTORIES lib +# POST_INCLUDE_REGEXES ".*" +# POST_INCLUDE_FILES ".*" +# POST_EXCLUDE_FILES_STRICT "nononono" +# ) +install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCIES DIRECTORIES lib ) +install( FILES + ${CMAKE_CURRENT_BINARY_DIR}/../../src/AggregateFunctions/libclickhouse_aggregate_functions.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/libclickhouse_functions.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Storages/System/libclickhouse_storages_system.so + ${CMAKE_CURRENT_BINARY_DIR}/../../utils/local-engine/proto/libsubstrait.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/base64-cmake/libbase64.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libfarmhash/libfarmhash.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/fastops-cmake/libfastops.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Dictionaries/libclickhouse_dictionaries.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/consistent-hashing/libconsistent-hashing.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libmetrohash/libmetrohash.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/murmurhash/libmurmurhash.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libdbms.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_access.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_core.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_core_mysql.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_compression.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_datastreams.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_datatypes.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_datatypes_serializations.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_databases.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_databases_mysql.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_disks.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_interpreters.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_interpreters_mysql.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_interpreters_clusterproxy.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_interpreters_jit.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_columns.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_storages.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_storages_mysql.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_storages_distributed.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_storages_mergetree.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_storages_liveview.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_client.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_bridge.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_server.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_server_http.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_formats.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors_executors.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors_formats.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors_formats_impl.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors_transforms.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors_sources.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors_merges.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors_merges_algorithms.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors_queryplan.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_processors_queryplan_optimizations.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_coordination.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Dictionaries/Embedded/libclickhouse_dictionaries_embedded.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Parsers/libclickhouse_parsers.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/nuraft-cmake/libnuraft.so + ${CMAKE_CURRENT_BINARY_DIR}/../../base/mysqlxx/libmysqlxx.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Server/grpc_protos/libclickhouse_grpc_protos.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libgrpc++.so.1 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libgrpc.so.13 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/hash/libabsl_hash.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/types/libabsl_bad_variant_access.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/hash/libabsl_city.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/container/libabsl_raw_hash_set.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/container/libabsl_hashtablez_sampler.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/base/libabsl_exponential_biased.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/third_party/cares/cares/lib/libcares.so.2 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/status/libabsl_status.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/types/libabsl_bad_optional_access.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/strings/libabsl_cord.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libaddress_sorting.so.13 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libupb.so.13 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libgpr.so.13 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/synchronization/libabsl_synchronization.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/debugging/libabsl_stacktrace.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/debugging/libabsl_symbolize.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/debugging/libabsl_debugging_internal.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/debugging/libabsl_demangle_internal.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/synchronization/libabsl_graphcycles_internal.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/time/libabsl_time.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/time/libabsl_civil_time.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/time/libabsl_time_zone.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/base/libabsl_malloc_internal.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/strings/libabsl_str_format_internal.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/strings/libabsl_strings.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/strings/libabsl_strings_internal.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/numeric/libabsl_int128.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/base/libabsl_throw_delegate.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/base/libabsl_base.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/base/libabsl_raw_logging_internal.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/base/libabsl_dynamic_annotations.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/base/libabsl_log_severity.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/base/libabsl_spinlock_wait.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/s2geometry-cmake/libs2.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/amqpcpp-cmake/libamqp-cpp.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/sqlite-cmake/libsqlite.so + ${CMAKE_CURRENT_BINARY_DIR}/../../libcassandra.so.2 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libuv/libuv.so.1 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libpqxx-cmake/liblibpqxx.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libpq-cmake/liblibpq.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/arrow-cmake/libarrow_shared.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/lz4-cmake/liblz4.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/snappy/libsnappy.so.1 + ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_common_io.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/double-conversion-cmake/libdouble-conversion.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/dragonbox-cmake/libdragonbox_to_chars.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/re2/libre2.so.9 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/re2_st/libre2_st.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_program_options.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/croaring-cmake/libroaring.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/zstd-cmake/libzstd.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/xz/liblzma.so.5 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/aws-s3-cmake/libaws_s3.so + ${CMAKE_CURRENT_BINARY_DIR}/../../base/common/libcommon.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/cityhash102/libcityhash.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_system.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Net/SSL/lib_poco_net_ssl.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Net/lib_poco_net.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Crypto/lib_poco_crypto.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boringssl-cmake/libssl.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boringssl-cmake/libcrypto.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Util/lib_poco_util.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/JSON/lib_poco_json.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/JSON/lib_poco_json_pdjson.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/XML/lib_poco_xml.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Foundation/lib_poco_foundation.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/zlib-ng/libz.so.1 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/XML/lib_poco_xml_expat.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/replxx-cmake/libreplxx.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/cctz-cmake/libcctz.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/fmtlib-cmake/libfmt.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/protobuf/libprotobuf.so.3.13.0.0 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libcxx-cmake/libcxx.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libcxxabi-cmake/libcxxabi.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libunwind-cmake/libunwind.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/divide/libdivide_impl.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/h3-cmake/libh3.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/simdjson-cmake/libsimdjson.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/URL/libclickhouse_functions_url.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/array/libclickhouse_functions_array.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/JSONPath/libclickhouse_functions_jsonpath.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/divide/libdivide_impl_sse2.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/divide/libdivide_impl_avx2.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/hyperscan-cmake/libhyperscan.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/GatherUtils/libclickhouse_functions_gatherutils.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/icu-cmake/libicui18n.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/icu-cmake/libicuuc.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/icu-cmake/libicudata.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_filesystem.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Common/ZooKeeper/libclickhouse_common_zookeeper.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Common/StringUtils/libstring_utils.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Data/ODBC/lib_poco_data_odbc.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Redis/lib_poco_redis.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/MongoDB/lib_poco_mongodb.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Data/lib_poco_data.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/unixodbc-cmake/libunixodbc.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/cppkafka-cmake/libcppkafka.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/librdkafka-cmake/librdkafka.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/cyrus-sasl-cmake/libsasl2.so + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Common/Config/libclickhouse_common_config.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/capnproto-cmake/libcapnpc.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/arrow-cmake/libparquet_shared.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/avro-cmake/libavrocpp.so.1.10.0-SNAPSHOT.0 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libhdfs3-cmake/libhdfs3.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/capnproto-cmake/libcapnp.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/capnproto-cmake/libkj.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/krb5-cmake/libkrb5.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_context.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_coroutine.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/flatbuffers/libflatbuffers.so.2 + ${CMAKE_CURRENT_BINARY_DIR}/../../base/widechar_width/libwidechar_width.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libcpuid-cmake/libcpuid.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/brotli-cmake/libbrotli.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/aws-s3-cmake/libaws_s3_checksums.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Foundation/lib_poco_foundation_pcre.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/unixodbc-cmake/libltdl.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/yaml-cpp-cmake/libyaml-cpp.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/arrow-cmake/libthrift.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_regex.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_iostreams.so + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libxml2-cmake/liblibxml2.so + DESTINATION lib) + + set(CPACK_PACKAGE_VERSION 0.1.0) set(CPACK_GENERATOR "RPM") From 76b662333bf5b71ab099d0f3bf2151db89268903 Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Tue, 4 Jan 2022 07:40:37 +0000 Subject: [PATCH 271/472] add rpm config --- utils/local-engine/CMakeLists.txt | 22 ++++++++++++++----- .../kyligence/jni/engine/LocalEngineTest.java | 3 ++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index a6ded28d0e54..29fc4ecf1f09 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -110,12 +110,10 @@ install(TARGETS ${LOCALENGINE_SHARED_LIB} LIBRARY DESTINATION lib ) -#install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCY_SET myset) +#install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCY_SET myset +# LIBRARY DESTINATION lib) #install(RUNTIME_DEPENDENCY_SET myset -# DIRECTORIES lib -# POST_INCLUDE_REGEXES ".*" -# POST_INCLUDE_FILES ".*" -# POST_EXCLUDE_FILES_STRICT "nononono" +# LIBRARY DESTINATION lib # ) install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCIES DIRECTORIES lib @@ -175,7 +173,9 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/../../base/mysqlxx/libmysqlxx.so ${CMAKE_CURRENT_BINARY_DIR}/../../src/Server/grpc_protos/libclickhouse_grpc_protos.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libgrpc++.so.1 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libgrpc++.so.1.33.2 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libgrpc.so.13 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libgrpc.so.13.0.0 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/hash/libabsl_hash.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/types/libabsl_bad_variant_access.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/hash/libabsl_city.so @@ -183,12 +183,16 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/container/libabsl_hashtablez_sampler.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/base/libabsl_exponential_biased.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/third_party/cares/cares/lib/libcares.so.2 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/third_party/cares/cares/lib/libcares.so.2.3.0 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/status/libabsl_status.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/types/libabsl_bad_optional_access.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/strings/libabsl_cord.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libaddress_sorting.so.13 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libaddress_sorting.so.13.0.0 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libupb.so.13 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libupb.so.13.0.0 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libgpr.so.13 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/grpc/libgpr.so.13.0.0 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/synchronization/libabsl_synchronization.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/debugging/libabsl_stacktrace.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/abseil-cpp/absl/debugging/libabsl_symbolize.so @@ -213,21 +217,26 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/amqpcpp-cmake/libamqp-cpp.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/sqlite-cmake/libsqlite.so ${CMAKE_CURRENT_BINARY_DIR}/../../libcassandra.so.2 + ${CMAKE_CURRENT_BINARY_DIR}/../../libcassandra.so.2.15.3 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libuv/libuv.so.1 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libuv/libuv.so.1.0.0 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libpqxx-cmake/liblibpqxx.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libpq-cmake/liblibpq.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/arrow-cmake/libarrow_shared.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/lz4-cmake/liblz4.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/snappy/libsnappy.so.1 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/snappy/libsnappy.so.1.1.7 ${CMAKE_CURRENT_BINARY_DIR}/../../src/libclickhouse_common_io.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/double-conversion-cmake/libdouble-conversion.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/dragonbox-cmake/libdragonbox_to_chars.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/re2/libre2.so.9 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/re2/libre2.so.9.0.0 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/re2_st/libre2_st.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_program_options.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/croaring-cmake/libroaring.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/zstd-cmake/libzstd.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/xz/liblzma.so.5 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/xz/liblzma.so.5.3.1 ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/aws-s3-cmake/libaws_s3.so ${CMAKE_CURRENT_BINARY_DIR}/../../base/common/libcommon.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/cityhash102/libcityhash.so @@ -243,6 +252,7 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/XML/lib_poco_xml.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/Foundation/lib_poco_foundation.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/zlib-ng/libz.so.1 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/zlib-ng/libz.so.1.2.11.zlib-ng ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/poco-cmake/XML/lib_poco_xml_expat.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/replxx-cmake/libreplxx.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/cctz-cmake/libcctz.so @@ -286,6 +296,7 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_context.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/boost-cmake/lib_boost_coroutine.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/flatbuffers/libflatbuffers.so.2 + ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/flatbuffers/libflatbuffers.so.2.0.0 ${CMAKE_CURRENT_BINARY_DIR}/../../base/widechar_width/libwidechar_width.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/libcpuid-cmake/libcpuid.so ${CMAKE_CURRENT_BINARY_DIR}/../../contrib/brotli-cmake/libbrotli.so @@ -309,6 +320,7 @@ set(CPACK_CMAKE_GENERATOR Ninja) set(CPACK_PACKAGE_CONTACT "neng.liu@kyligence.io") set(CPACK_PACKAGE_VENDOR "Kyligence") set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) +set(CPACK_RPM_PACKAGE_AUTOREQPROV "no") set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${CPACK_PACKAGE_RELEASE}.${CMAKE_SYSTEM_PROCESSOR}") include(CPack) diff --git a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java index a926f99b8626..19771d11dfeb 100644 --- a/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java +++ b/utils/local-engine/java/src/test/java/io/kyligence/jni/engine/LocalEngineTest.java @@ -18,7 +18,8 @@ public class LocalEngineTest { @Before public void setup() { System.out.println("start load"); - System.load("/home/kyligence/Documents/code/ClickHouse/cmake-build-release/utils/local-engine/liblocal_engine_jni.so"); +// System.setProperty("LD_LIBRARY_PATH" , "/usr/local/lib/"); + System.load("/usr/local/lib/liblocal_engine_jni.so"); System.out.println("load success"); } From f2ad0e32f40f1a57313e67fb15ed83747ecfa09a Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Wed, 5 Jan 2022 10:43:22 +0000 Subject: [PATCH 272/472] remove runtime dependency --- utils/local-engine/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index 29fc4ecf1f09..cb5ba8287a9e 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -115,9 +115,9 @@ install(TARGETS ${LOCALENGINE_SHARED_LIB} #install(RUNTIME_DEPENDENCY_SET myset # LIBRARY DESTINATION lib # ) -install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCIES - DIRECTORIES lib - ) +#install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCIES +# DIRECTORIES lib +# ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/../../src/AggregateFunctions/libclickhouse_aggregate_functions.so ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/libclickhouse_functions.so From d8365fbdfc35fc188a6f594b46bcee08cc0c8a1f Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Mon, 10 Jan 2022 03:17:46 +0000 Subject: [PATCH 273/472] support equal to --- utils/local-engine/Builder/SerializedPlanBuilder.h | 6 ++++-- utils/local-engine/Parser/SerializedPlanParser.cpp | 5 +++-- utils/local-engine/Parser/SerializedPlanParser.h | 3 ++- utils/local-engine/tests/gtest_local_engine.cpp | 5 ++++- utils/local-engine/tests/testConfig.h.in | 2 +- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h index 05cfe07fb64a..575a224e51ff 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.h +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -16,7 +16,8 @@ enum Function LESS_THAN, MULTIPLY, SUM, - TO_DATE + TO_DATE, + EQUAL_TO }; using SchemaPtr = substrait::NamedStruct *; @@ -33,7 +34,8 @@ class SerializedPlanBuilder .registerFunction(LESS_THAN, "LESS_THAN") .registerFunction(MULTIPLY, "MULTIPLY") .registerFunction(SUM, "SUM") - .registerFunction(TO_DATE, "TO_DATE"); + .registerFunction(TO_DATE, "TO_DATE") + .registerFunction(EQUAL_TO, "EQUAL_TO"); return *this; } SerializedPlanBuilder& registerFunction(int id, std::string name); diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index e3d744ccd374..23c31e2e83c0 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -22,7 +22,7 @@ #include #include #include - +#include DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocalFile(const substrait::ReadRel & rel) { @@ -31,7 +31,8 @@ DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocal auto files_info = std::make_shared(); for (const auto & item : rel.local_files().items()) { - files_info->files.push_back(item.uri_file()); + Poco::URI uri(item.uri_file()); + files_info->files.push_back(uri.getPath()); } return std::make_shared(files_info, parseNameStruct(rel.base_schema())); } diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index a8ce2aa243e5..92dfb8a3ef4a 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -66,7 +66,8 @@ static const std::map SCALAR_FUNCTIONS = { {"LESS_THAN", "less"}, {"MULTIPLY", "multiply"}, {"SUM", "sum"}, - {"TO_DATE", "toDate"} + {"TO_DATE", "toDate"}, + {"EQUAL_TO", "equals"} }; class SerializedPlanParser diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index b8e1822fac4c..8d7bf7458676 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -68,10 +68,13 @@ TEST(TestSelect, TestFilter) mul_exp, dbms::literal(5.0) }); + auto * type_0 = dbms::scalarFunction(dbms::EQUAL_TO, {dbms::selection(6), + dbms::literal("0")}); + auto * filter = dbms::scalarFunction(dbms::AND, {less_exp, type_0}); auto plan = plan_builder .registerSupportedFunctions() - .filter(less_exp) + .filter(filter) .read(TEST_DATA(/data/iris.parquet), std::move(schema)).build(); // ASSERT_TRUE(plan->relations(0).has_read()); ASSERT_EQ(plan->relations_size(), 1); diff --git a/utils/local-engine/tests/testConfig.h.in b/utils/local-engine/tests/testConfig.h.in index d7e6a8680b31..b68f7778bc79 100644 --- a/utils/local-engine/tests/testConfig.h.in +++ b/utils/local-engine/tests/testConfig.h.in @@ -1 +1 @@ -#define TEST_DATA(file) "@TEST_DATA_DIR@"#file \ No newline at end of file +#define TEST_DATA(file) "file://@TEST_DATA_DIR@"#file \ No newline at end of file From c1c365fe547bab9aa9a6f1e83a30e0e704877a0a Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Mon, 10 Jan 2022 06:26:43 +0000 Subject: [PATCH 274/472] fix ut fail --- utils/local-engine/tests/gtest_local_engine.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 8d7bf7458676..2261adf40c9d 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +//#include using namespace dbms; @@ -69,7 +69,7 @@ TEST(TestSelect, TestFilter) dbms::literal(5.0) }); auto * type_0 = dbms::scalarFunction(dbms::EQUAL_TO, {dbms::selection(6), - dbms::literal("0")}); + dbms::literal("类型0")}); auto * filter = dbms::scalarFunction(dbms::AND, {less_exp, type_0}); auto plan = plan_builder @@ -88,7 +88,7 @@ TEST(TestSelect, TestFilter) { std::cout << "fetch batch" << std::endl; local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); - ASSERT_EQ(spark_row_info->getNumRows(), 99); + ASSERT_EQ(spark_row_info->getNumRows(), 50); local_engine::SparkColumnToCHColumn converter; auto block = converter.convertCHColumnToSparkRow(*spark_row_info, local_executor.getHeader()); ASSERT_EQ(spark_row_info->getNumRows(), block->rows()); From 7e18c9a1f5ea13ffa84d4fac525b0096d9db51ee Mon Sep 17 00:00:00 2001 From: Zhichao Zhang Date: Thu, 13 Jan 2022 23:06:01 +0800 Subject: [PATCH 275/472] Fix start index --- .../Builder/SerializedPlanBuilder.h | 2 +- .../Parser/SerializedPlanParser.cpp | 4 +-- .../tests/benchmark_local_engine.cpp | 31 ++++++++++--------- .../local-engine/tests/gtest_local_engine.cpp | 25 +++++++++------ 4 files changed, 34 insertions(+), 28 deletions(-) diff --git a/utils/local-engine/Builder/SerializedPlanBuilder.h b/utils/local-engine/Builder/SerializedPlanBuilder.h index 575a224e51ff..4054f9f728e0 100644 --- a/utils/local-engine/Builder/SerializedPlanBuilder.h +++ b/utils/local-engine/Builder/SerializedPlanBuilder.h @@ -9,7 +9,7 @@ namespace dbms enum Function { - IS_NOT_NULL=1, + IS_NOT_NULL=0, GREATER_THAN_OR_EQUAL, AND, LESS_THAN_OR_EQUAL, diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 23c31e2e83c0..6d6469369e69 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -145,7 +145,7 @@ DB::QueryPlanPtr dbms::SerializedPlanParser::parseOp(const substrait::Rel & rel) { if (expr.has_selection()) { - const auto * field = actions_dag->getInputs()[expr.selection().direct_reference().struct_field().field() - 1]; + const auto * field = actions_dag->getInputs()[expr.selection().direct_reference().struct_field().field()]; required_columns.emplace_back(DB::NameWithAlias (field->result_name, field->result_name)); } else @@ -349,7 +349,7 @@ const DB::ActionsDAG::Node * dbms::SerializedPlanParser::parseArgument(DB::Actio { throw std::runtime_error("Can only have direct struct references in selections"); } - const auto * field = action_dag->getInputs()[rel.selection().direct_reference().struct_field().field() - 1]; + const auto * field = action_dag->getInputs()[rel.selection().direct_reference().struct_field().field()]; return action_dag->tryFindInIndex(field->result_name); } default: diff --git a/utils/local-engine/tests/benchmark_local_engine.cpp b/utils/local-engine/tests/benchmark_local_engine.cpp index 7eea9b92c200..314c39cabd75 100644 --- a/utils/local-engine/tests/benchmark_local_engine.cpp +++ b/utils/local-engine/tests/benchmark_local_engine.cpp @@ -73,7 +73,8 @@ static void BM_SimpleAggregate(benchmark::State& state) { // .column("l_comment", "String") .build(); dbms::SerializedPlanBuilder plan_builder; - auto * measure = dbms::measureFunction(dbms::SUM, {dbms::selection(5)}); + // sum(l_quantity) + auto * measure = dbms::measureFunction(dbms::SUM, {dbms::selection(6)}); auto plan = plan_builder.registerSupportedFunctions().aggregate({}, {measure}).read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); auto query_plan = parser.parse(std::move(plan)); @@ -97,9 +98,9 @@ static void BM_TPCH_Q6(benchmark::State& state) { // .column("l_partkey", "I64") // .column("l_suppkey", "I64") // .column("l_linenumber", "I32") - .column("l_quantity", "FP64") - .column("l_extendedprice", "FP64") .column("l_discount", "FP64") + .column("l_extendedprice", "FP64") + .column("l_quantity", "FP64") // .column("l_tax", "FP64") // .column("l_returnflag", "String") // .column("l_linestatus", "String") @@ -111,13 +112,13 @@ static void BM_TPCH_Q6(benchmark::State& state) { // .column("l_comment", "String") .build(); dbms::SerializedPlanBuilder plan_builder; - auto *agg_mul = dbms::scalarFunction(dbms::MULTIPLY, {dbms::selection(2), dbms::selection(3)}); + auto *agg_mul = dbms::scalarFunction(dbms::MULTIPLY, {dbms::selection(1), dbms::selection(0)}); auto * measure1 = dbms::measureFunction(dbms::SUM, {agg_mul}); - auto * measure2 = dbms::measureFunction(dbms::SUM, {dbms::selection(2)}); - auto * measure3 = dbms::measureFunction(dbms::SUM, {dbms::selection(1)}); + auto * measure2 = dbms::measureFunction(dbms::SUM, {dbms::selection(1)}); + auto * measure3 = dbms::measureFunction(dbms::SUM, {dbms::selection(2)}); auto plan = plan_builder.registerSupportedFunctions() .aggregate({}, {measure1, measure2, measure3}) - .project({dbms::selection(1), dbms::selection(2), dbms::selection(3)}) + .project({dbms::selection(2), dbms::selection(1), dbms::selection(0)}) .filter(dbms::scalarFunction(dbms::AND, { dbms::scalarFunction(AND, { dbms::scalarFunction(AND, { @@ -125,20 +126,20 @@ static void BM_TPCH_Q6(benchmark::State& state) { dbms::scalarFunction(AND, { dbms::scalarFunction(AND, { dbms::scalarFunction(AND, { - scalarFunction(IS_NOT_NULL, {selection(4)}), - scalarFunction(IS_NOT_NULL, {selection(3)}) + scalarFunction(IS_NOT_NULL, {selection(3)}), + scalarFunction(IS_NOT_NULL, {selection(0)}) }), - scalarFunction(IS_NOT_NULL, {selection(1)}) + scalarFunction(IS_NOT_NULL, {selection(2)}) }), - dbms::scalarFunction(GREATER_THAN_OR_EQUAL, {selection(4), literalDate(8766)}) + dbms::scalarFunction(GREATER_THAN_OR_EQUAL, {selection(3), literalDate(8766)}) }), - scalarFunction(LESS_THAN, {selection(4), literalDate(9131)}) + scalarFunction(LESS_THAN, {selection(3), literalDate(9131)}) }), - scalarFunction(GREATER_THAN_OR_EQUAL, {selection(3), literal(0.05)}) + scalarFunction(GREATER_THAN_OR_EQUAL, {selection(0), literal(0.05)}) }), - scalarFunction(LESS_THAN_OR_EQUAL, {selection(3), literal(0.07)}) + scalarFunction(LESS_THAN_OR_EQUAL, {selection(0), literal(0.07)}) }), - scalarFunction(LESS_THAN, {selection(1), literal(24.0)}) + scalarFunction(LESS_THAN, {selection(2), literal(24.0)}) })) .read("/home/kyligence/Documents/test-dataset/intel-gazelle-test-"+std::to_string(state.range(0))+".snappy.parquet", std::move(schema)).build(); dbms::SerializedPlanParser parser(SerializedPlanParser::global_context); diff --git a/utils/local-engine/tests/gtest_local_engine.cpp b/utils/local-engine/tests/gtest_local_engine.cpp index 2261adf40c9d..c954fad6d4b6 100644 --- a/utils/local-engine/tests/gtest_local_engine.cpp +++ b/utils/local-engine/tests/gtest_local_engine.cpp @@ -53,6 +53,7 @@ bool inside_main=true; TEST(TestSelect, TestFilter) { dbms::SerializedSchemaBuilder schema_builder; + // sorted by key auto* schema = schema_builder .column("sepal_length", "FP64") .column("sepal_width", "FP64") @@ -61,15 +62,18 @@ TEST(TestSelect, TestFilter) .column("type", "I64").column("type_string", "String") .build(); dbms::SerializedPlanBuilder plan_builder; + // sepal_length * 0.8 auto * mul_exp = dbms::scalarFunction(dbms::MULTIPLY, - {dbms::selection(3), + {dbms::selection(2), dbms::literal(0.8)}); + // sepal_length * 0.8 < 4.0 auto * less_exp = dbms::scalarFunction(dbms::LESS_THAN, { mul_exp, - dbms::literal(5.0) + dbms::literal(4.0) }); - auto * type_0 = dbms::scalarFunction(dbms::EQUAL_TO, {dbms::selection(6), - dbms::literal("类型0")}); + // type_string = '类型1' + auto * type_0 = dbms::scalarFunction(dbms::EQUAL_TO, {dbms::selection(5), + dbms::literal("类型1")}); auto * filter = dbms::scalarFunction(dbms::AND, {less_exp, type_0}); auto plan = plan_builder @@ -88,7 +92,7 @@ TEST(TestSelect, TestFilter) { std::cout << "fetch batch" << std::endl; local_engine::SparkRowInfoPtr spark_row_info = local_executor.next(); - ASSERT_EQ(spark_row_info->getNumRows(), 50); + ASSERT_EQ(spark_row_info->getNumRows(), 1); local_engine::SparkColumnToCHColumn converter; auto block = converter.convertCHColumnToSparkRow(*spark_row_info, local_executor.getHeader()); ASSERT_EQ(spark_row_info->getNumRows(), block->rows()); @@ -98,6 +102,7 @@ TEST(TestSelect, TestFilter) TEST(TestSelect, TestAgg) { dbms::SerializedSchemaBuilder schema_builder; + // sorted by key auto* schema = schema_builder .column("sepal_length", "FP64") .column("sepal_width", "FP64") @@ -107,16 +112,16 @@ TEST(TestSelect, TestAgg) .build(); dbms::SerializedPlanBuilder plan_builder; auto * mul_exp = dbms::scalarFunction(dbms::MULTIPLY, - {dbms::selection(3), + {dbms::selection(2), dbms::literal(0.8)}); auto * less_exp = dbms::scalarFunction(dbms::LESS_THAN, { mul_exp, - dbms::literal(5.0) + dbms::literal(4.0) }); auto * mul_exp2 = dbms::scalarFunction(dbms::MULTIPLY, - {dbms::selection(3), + {dbms::selection(2), dbms::literal(1.1)}); - auto * measure = dbms::measureFunction(dbms::SUM, {dbms::selection(3)}); + auto * measure = dbms::measureFunction(dbms::SUM, {dbms::selection(2)}); auto plan = plan_builder .registerSupportedFunctions() .aggregate({}, {measure}) @@ -141,7 +146,7 @@ TEST(TestSelect, TestAgg) ASSERT_EQ(spark_row_info->getNumRows(), block->rows()); auto reader = SparkRowReader(spark_row_info->getNumCols()); reader.pointTo(reinterpret_cast(spark_row_info->getBufferAddress() + spark_row_info->getOffsets()[1]), spark_row_info->getLengths()[0]); - std::cout << "result: " << reader.getDouble(0) << std::endl; + ASSERT_EQ(reader.getDouble(0), 103.2); } } From ddc9ee3951214fe75fb27d7a92617d77824fd09c Mon Sep 17 00:00:00 2001 From: "neng.liu" Date: Fri, 14 Jan 2022 05:37:56 +0000 Subject: [PATCH 276/472] support duckdb parquet reader --- src/Storages/HDFS/StorageHDFS.cpp | 3 + utils/local-engine/CMakeLists.txt | 30 +- .../Parser/SerializedPlanParser.cpp | 3 +- utils/local-engine/Storages/CMakeLists.txt | 0 .../Storages/InputStreamFileSystem.h | 212 + .../Storages/ParquetRowInputFormat.cpp | 198 + .../Storages/ParquetRowInputFormat.h | 47 + utils/local-engine/Storages/duckdb.cpp | 255103 +++++++++++++++ utils/local-engine/Storages/duckdb.hpp | 22137 ++ .../Storages/parquet-amalgamation.cpp | 41986 +++ .../Storages/parquet-amalgamation.hpp | 7676 + .../tests/benchmark_local_engine.cpp | 4 +- .../local-engine/tests/gtest_local_engine.cpp | 13 + 13 files changed, 327382 insertions(+), 30 deletions(-) create mode 100644 utils/local-engine/Storages/CMakeLists.txt create mode 100644 utils/local-engine/Storages/InputStreamFileSystem.h create mode 100644 utils/local-engine/Storages/ParquetRowInputFormat.cpp create mode 100644 utils/local-engine/Storages/ParquetRowInputFormat.h create mode 100644 utils/local-engine/Storages/duckdb.cpp create mode 100644 utils/local-engine/Storages/duckdb.hpp create mode 100644 utils/local-engine/Storages/parquet-amalgamation.cpp create mode 100644 utils/local-engine/Storages/parquet-amalgamation.hpp diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 9600eb975b4c..11c0a5c8a92b 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -236,6 +236,9 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c HDFSFileInfo ls; ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); + if (ls.file_info == NULL) { + throw std::runtime_error("Unable to connect to HDFS: " + String(hdfsGetLastError())); + } Strings result; for (int i = 0; i < ls.length; ++i) { diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index cb5ba8287a9e..34b380df19e5 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -66,32 +66,16 @@ add_subdirectory(proto) add_subdirectory(Builder) add_headers_and_sources(builder Builder) add_headers_and_sources(parser Parser) -#include (../../cmake/find/parquet.cmake) +add_headers_and_sources(storages Storages) include_directories(${CMAKE_CURRENT_BINARY_DIR}/proto) include_directories(${ClickHouse_SOURCE_DIR}/utils/local-engine) -#add_executable (local_engine -# local_engine.cpp -# ${builder_headers} -# ${builder_sources} -# ${parser_headers} -# ${parser_sources} -# ) -#target_include_directories(local_engine PRIVATE -# ${RAPIDJSON_INCLUDE_DIR} -# ${SUBSTRAIT_HEADERS} -# ${ARROW_INCLUDE_DIR} -# ) + include_directories( ${RAPIDJSON_INCLUDE_DIR} ${ARROW_INCLUDE_DIR} ) - - -#target_link_libraries(local_engine ${CLICKHOUSE_SERVER_LINK} ) -#create_javah() - set(LOCALENGINE_SHARED_LIB local_engine_jni) add_library(${LOCALENGINE_SHARED_LIB} SHARED @@ -100,9 +84,9 @@ add_library(${LOCALENGINE_SHARED_LIB} SHARED ${builder_sources} ${parser_headers} ${parser_sources} + ${storages_sources} ) set_property(TARGET ${LOCALENGINE_SHARED_LIB} PROPERTY POSITION_INDEPENDENT_CODE ON) -#add_executable(${LOCALENGINE_SHARED_LIB} ${JNI_NATIVE_SOURCES}) add_dependencies(${LOCALENGINE_SHARED_LIB} local_engine_headers) target_link_libraries(${LOCALENGINE_SHARED_LIB} ${CLICKHOUSE_SERVER_LINK} ) @@ -110,14 +94,6 @@ install(TARGETS ${LOCALENGINE_SHARED_LIB} LIBRARY DESTINATION lib ) -#install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCY_SET myset -# LIBRARY DESTINATION lib) -#install(RUNTIME_DEPENDENCY_SET myset -# LIBRARY DESTINATION lib -# ) -#install(TARGETS ${LOCALENGINE_SHARED_LIB} RUNTIME_DEPENDENCIES -# DIRECTORIES lib -# ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/../../src/AggregateFunctions/libclickhouse_aggregate_functions.so ${CMAKE_CURRENT_BINARY_DIR}/../../src/Functions/libclickhouse_functions.so diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index 6d6469369e69..d9b152f57050 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -23,6 +23,7 @@ #include #include #include +#include DB::BatchParquetFileSourcePtr dbms::SerializedPlanParser::parseReadRealWithLocalFile(const substrait::ReadRel & rel) { @@ -402,7 +403,7 @@ DB::Chunk DB::BatchParquetFileSource::generate() read_buf = std::move(nested_buffer); - auto format = DB::ParquetBlockInputFormat::getParquetFormat(*read_buf, header); + ProcessorPtr format = std::make_shared(*read_buf, header); pipeline = std::make_unique(); pipeline->init(Pipe(format)); diff --git a/utils/local-engine/Storages/CMakeLists.txt b/utils/local-engine/Storages/CMakeLists.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/utils/local-engine/Storages/InputStreamFileSystem.h b/utils/local-engine/Storages/InputStreamFileSystem.h new file mode 100644 index 000000000000..5c41759783b0 --- /dev/null +++ b/utils/local-engine/Storages/InputStreamFileSystem.h @@ -0,0 +1,212 @@ +#pragma once + +#include "duckdb.hpp" +#include +#include +#include + +using namespace DB; + +namespace local_engine +{ +class InputStreamFileSystem; + +class InputStreamFileHandle : public duckdb::FileHandle +{ +public: + InputStreamFileHandle(::duckdb::FileSystem& fileSystem, int id) + : FileHandle(fileSystem, std::to_string(id)), + stream_id(id), + offset_(0) {} + + int getStreamId() + { + return stream_id; + } + + uint64_t offset() const { + return offset_; + } + + void setOffset(uint64_t newOffset) { + offset_ = newOffset; + } + +protected: + void Close() override + { + // do nothing + } + +private: + int stream_id; + uint64_t offset_; +}; + +class InputStreamFileSystem : public ::duckdb::FileSystem +{ +public: + InputStreamFileSystem() {} + ~InputStreamFileSystem() override = default; + + std::unique_ptr<::duckdb::FileHandle> OpenFile( + const std::string& path, + uint8_t /*flags*/, + ::duckdb::FileLockType /*lock = ::duckdb::FileLockType::NO_LOCK*/, + ::duckdb::FileCompressionType /*compression = + ::duckdb::FileCompressionType::UNCOMPRESSED*/ + , + ::duckdb::FileOpener* /*opener = nullptr*/) override { + int stream_id = std::stoi(path); + + std::lock_guard streams_lock(streamsMutex_); + auto it = streams_.find(stream_id); + if (it == streams_.end()) + { + throw std::runtime_error("Unknown stream with ID " + path); + } + ++it->second.first; + return std::make_unique(*this, stream_id); + } + + std::unique_ptr<::duckdb::FileHandle> openStream( + ReadBuffer & stream) { + std::lock_guard lock(streamsMutex_); + auto stream_id = nextStreamId_++; + streams_.emplace( + std::make_pair(stream_id, std::make_pair(1, &stream))); + return std::make_unique(*this, stream_id); + } + + void Read( + ::duckdb::FileHandle& handle, + void* buffer, + int64_t nr_bytes, + uint64_t location) override { + + auto& stream_handle = dynamic_cast(handle); + auto it = streams_.find(stream_handle.getStreamId()); + if (it == streams_.end()) + { + throw std::runtime_error("Unknown stream with ID " + std::to_string(stream_handle.getStreamId())); + } + auto * read_buffer = dynamic_cast(it->second.second); + if (stream_handle.offset() != location) + { + read_buffer->seek(location, SEEK_SET); + } + read_buffer->read(static_cast(buffer), nr_bytes); + stream_handle.setOffset(location + nr_bytes); + } + + void CloseStream(int stream_id) { + std::lock_guard lock(streamsMutex_); + auto it = streams_.find(stream_id); + if (it == streams_.end()) + { + throw std::runtime_error("Unknown stream with ID " + std::to_string(stream_id)); + } + if (it->second.first == 1) { + streams_.erase(it); + } else { + --it->second.first; + } + } + + void Write( + ::duckdb::FileHandle& /*handle*/, + void* /*buffer*/, + int64_t /*nr_bytes*/, + uint64_t /*location*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::Write"); + } + + int64_t Read( + ::duckdb::FileHandle& /*handle*/, + void* /*buffer*/, + int64_t /*nr_bytes*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::Read"); + } + + int64_t Write( + ::duckdb::FileHandle& /*handle*/, + void* /*buffer*/, + int64_t /*nr_bytes*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::Write"); + } + + int64_t GetFileSize(::duckdb::FileHandle& handle) override { + auto& stream_handle = dynamic_cast(handle); + auto it = streams_.find(stream_handle.getStreamId()); + if (it == streams_.end()) + { + throw std::runtime_error("Unknown stream with ID " + std::to_string(stream_handle.getStreamId())); + } + if (auto * file_stream = dynamic_cast(it->second.second)) + { + return file_stream->size(); + } + throw std::runtime_error("Unexpected call to InputStreamFileSystem::GetFileSize"); + } + + time_t GetLastModifiedTime(::duckdb::FileHandle& /*handle*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::GetLastModifiedTime"); + } + + void Truncate(::duckdb::FileHandle& /*handle*/, int64_t /*new_size*/) + override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::Truncate"); + } + + bool DirectoryExists(const std::string& /*directory*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::DirectoryExists"); + } + + void CreateDirectory(const std::string& /*directory*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::CreateDirectory"); + } + + void RemoveDirectory(const std::string& /*directory*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::RemoveDirectory"); + } + + bool ListFiles( + const std::string& /*directory*/, + const std::function& /*callback*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::ListFiles"); + } + + void MoveFile(const std::string& /*source*/, const std::string& /*target*/) + override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::MoveFile"); + } + + bool FileExists(const std::string& /*filename*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::FileExists"); + } + + void RemoveFile(const std::string& /*filename*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::RemoveFile"); + } + + void FileSync(::duckdb::FileHandle& /*handle*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::FileSync"); + } + + std::vector Glob(const std::string& /*path*/) override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::Glob"); + } + + virtual std::string GetName() const override { + throw std::runtime_error("Unexpected call to InputStreamFileSystem::GetName"); + } +private: + int nextStreamId_ = 1; + // Maps stream ID to file handle counter and input stream + std::unordered_map< + int, + std::pair> + streams_; + std::mutex streamsMutex_; +}; +} diff --git a/utils/local-engine/Storages/ParquetRowInputFormat.cpp b/utils/local-engine/Storages/ParquetRowInputFormat.cpp new file mode 100644 index 000000000000..99e06a49152e --- /dev/null +++ b/utils/local-engine/Storages/ParquetRowInputFormat.cpp @@ -0,0 +1,198 @@ +#include "ParquetRowInputFormat.h" + + +#include +#include + +local_engine::ParquetRowInputFormat::ParquetRowInputFormat(ReadBuffer & in_, Block header_) : IInputFormat(std::move(header_), in_) +{ +} +std::unique_ptr local_engine::ParquetRowInputFormat::inputStreamFileSystem = std::make_unique(); +void local_engine::ParquetRowInputFormat::prepareReader() +{ + if (!reader) + reader = std::make_unique(this->allocator, inputStreamFileSystem->openStream(in)); + int index = 0; + int cols = this->getPort().getHeader().columns(); + column_indices.reserve(cols); + row_type.reserve(cols); + column_indices.assign(cols, 0); + row_type.assign(cols, duckdb::LogicalType(duckdb::LogicalType::SQLNULL)); + for (const auto& col : reader->names) + { + if (this->getPort().getHeader().has(col)) + { + int position = this->getPort().getHeader().getPositionByName(col); + column_indices[position] = index; + row_type[position] = convertCHTypeToDuckDbType(this->getPort().getHeader().getByName(col).type); + } + index++; + } + for (duckdb::idx_t i = 0; i < reader->NumRowGroups(); ++i) + { + row_group_ids.push_back(i); + } + state = std::make_unique(); + reader->InitializeScan(*state, column_indices, row_group_ids, nullptr); +} + +Chunk local_engine::ParquetRowInputFormat::generate() +{ + Chunk res; + + if (!reader) + prepareReader(); + ::duckdb::DataChunk output; + output.Initialize(row_type); + reader->Scan(*state, output); + if (output.size() > 0) + { + duckDbChunkToCHChunk(output, res); + } + return res; +} +duckdb::LogicalType local_engine::ParquetRowInputFormat::convertCHTypeToDuckDbType(DataTypePtr type) +{ + WhichDataType which(type); + if (which.isInt8()) + { + return duckdb::LogicalType(duckdb::LogicalType::TINYINT); + } + else if (which.isInt16()) + { + return duckdb::LogicalType(duckdb::LogicalType::SMALLINT); + } + else if (which.isInt32()) + { + return duckdb::LogicalType(duckdb::LogicalType::INTEGER); + } + else if (which.isInt64()) + { + return duckdb::LogicalType(duckdb::LogicalType::BIGINT); + } + else if (which.isString()) + { + return duckdb::LogicalType(duckdb::LogicalType::VARCHAR); + } + else if (which.isFloat32()) + { + return duckdb::LogicalType(duckdb::LogicalType::FLOAT); + } + else if (which.isFloat64()) + { + return duckdb::LogicalType(duckdb::LogicalType::DOUBLE); + } + else if (which.isDate()) + { + return duckdb::LogicalType(duckdb::LogicalType::DATE); + } + else { + throw std::runtime_error("doesn't support CH type " + type->getName()); + } +} +void local_engine::ParquetRowInputFormat::duckDbChunkToCHChunk(duckdb::DataChunk & dataChunk, Chunk & chunk) +{ + Columns columns_list; + UInt64 num_rows = 0; + auto header = this->getPort().getHeader(); + + columns_list.reserve(header.columns()); + + for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) + { + const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); + MutableColumnPtr read_column = header_column.type->createColumn(); + readColumnFromDuckVector(*read_column, dataChunk.data[column_i], dataChunk.size()); + ColumnWithTypeAndName column; + column.name = header_column.name; + column.type = header_column.type; + column.column = std::move(read_column); + + num_rows = column.column->size(); + columns_list.push_back(std::move(column.column)); + } + chunk.setColumns(columns_list, num_rows); +} +void local_engine::ParquetRowInputFormat::readColumnFromDuckVector(IColumn & internal_column, duckdb::Vector & vector, idx_t num_rows) +{ + switch(vector.GetType().id()) + { + + case duckdb::LogicalTypeId::TINYINT: + fillColumnWithNumericData>(vector, internal_column, num_rows); + break; + case duckdb::LogicalTypeId::SMALLINT: + fillColumnWithNumericData>(vector, internal_column, num_rows); + break; + case duckdb::LogicalTypeId::INTEGER: + fillColumnWithNumericData>(vector, internal_column, num_rows); + break; + case duckdb::LogicalTypeId::BIGINT: + fillColumnWithNumericData>(vector, internal_column, num_rows); + break; + case duckdb::LogicalTypeId::DATE: + fillColumnWithDate32Data(vector, internal_column, num_rows); + break; + case duckdb::LogicalTypeId::FLOAT: + fillColumnWithNumericData>(vector, internal_column, num_rows); + break; + case duckdb::LogicalTypeId::DOUBLE: + fillColumnWithNumericData>(vector, internal_column, num_rows); + break; + case duckdb::LogicalTypeId::VARCHAR: + fillColumnWithStringData(vector, internal_column, num_rows); + break; + default: + throw std::runtime_error("unsupported type " + LogicalTypeIdToString(vector.GetType().id())); + } +} +template +void local_engine::ParquetRowInputFormat::fillColumnWithNumericData(duckdb::Vector & vector, IColumn & internal_column, idx_t num_rows) +{ + auto & column_data = static_cast(internal_column).getData(); + column_data.reserve(num_rows); + const auto * raw_data = reinterpret_cast(vector.GetData()); + column_data.insert_assume_reserved(raw_data, raw_data + num_rows); +} +void local_engine::ParquetRowInputFormat::fillColumnWithStringData(duckdb::Vector & vector, IColumn & internal_column, idx_t num_rows) +{ + assert(vector.GetVectorType() == duckdb::VectorType::FLAT_VECTOR); + auto* duck_data = duckdb::FlatVector::GetData(vector); + PaddedPODArray & column_chars_t = assert_cast(internal_column).getChars(); + PaddedPODArray & column_offsets = assert_cast(internal_column).getOffsets(); + column_offsets.reserve(num_rows); + size_t chars_t_size = 0; + + for (idx_t i = 0; i < num_rows; ++i) + { + chars_t_size += duck_data[i].GetSize(); + } + chars_t_size += num_rows; + column_chars_t.reserve(chars_t_size); + column_offsets.reserve(num_rows); + for (idx_t i = 0; i < num_rows; ++i) + { + column_chars_t.insert_assume_reserved(duck_data[i].GetDataUnsafe(), duck_data[i].GetDataUnsafe() + duck_data[i].GetSize()); + column_chars_t.emplace_back('\0'); + column_offsets.emplace_back(column_chars_t.size()); + } +} +void local_engine::ParquetRowInputFormat::fillColumnWithDate32Data(duckdb::Vector & vector, IColumn & internal_column, idx_t num_rows) +{ + PaddedPODArray & column_data = assert_cast &>(internal_column).getData(); + column_data.reserve(num_rows); + auto* duck_data = duckdb::FlatVector::GetData(vector); + for (idx_t i = 0; i < num_rows; ++i) + { + UInt32 days_num = static_cast(duck_data[i].days); + if (days_num > DATE_LUT_MAX_DAY_NUM) + throw std::runtime_error("data is out of range (ClickHouse Date)"); + column_data.emplace_back(days_num); + } +} +void local_engine::ParquetRowInputFormat::resetParser() +{ + IInputFormat::resetParser(); + state.reset(); + state = std::make_unique(); +} diff --git a/utils/local-engine/Storages/ParquetRowInputFormat.h b/utils/local-engine/Storages/ParquetRowInputFormat.h new file mode 100644 index 000000000000..1a3b934c8cc4 --- /dev/null +++ b/utils/local-engine/Storages/ParquetRowInputFormat.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include + +#include "parquet-amalgamation.hpp" +#include "duckdb.hpp" +#include "InputStreamFileSystem.h" + +using namespace DB; + +namespace local_engine +{ +class ParquetRowInputFormat : public IInputFormat +{ +public: + ParquetRowInputFormat(ReadBuffer & in_, Block header_); + + void resetParser() override; + + String getName() const override { return "ParquetBlockInputFormat"; } + +protected: + Chunk generate() override; + +private: + void prepareReader(); + static duckdb::LogicalType convertCHTypeToDuckDbType(DataTypePtr type); + void duckDbChunkToCHChunk(duckdb::DataChunk & dataChunk, Chunk & chunk); + void readColumnFromDuckVector(IColumn & internal_column, duckdb::Vector & vector, idx_t num_rows); + template > + static void fillColumnWithNumericData(duckdb::Vector & vector, IColumn & internal_column, idx_t num_rows); + static void fillColumnWithStringData(duckdb::Vector & vector, IColumn & internal_column, idx_t num_rows); + static void fillColumnWithDate32Data(duckdb::Vector & vector, IColumn & internal_column, idx_t num_rows); + + static std::unique_ptr inputStreamFileSystem; + duckdb::Allocator allocator; + std::unique_ptr reader; + std::unique_ptr state; + std::vector column_indices; + std::vector row_group_ids; + std::vector<::duckdb::LogicalType> row_type; +}; +} + + + diff --git a/utils/local-engine/Storages/duckdb.cpp b/utils/local-engine/Storages/duckdb.cpp new file mode 100644 index 000000000000..dc8e7f729040 --- /dev/null +++ b/utils/local-engine/Storages/duckdb.cpp @@ -0,0 +1,255103 @@ +#include "duckdb.hpp" + +#ifndef DUCKDB_AMALGAMATION +#error header mismatch +#endif + +#if (!defined(DEBUG) && !defined NDEBUG) +#define NDEBUG +#endif + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_search_path.hpp +// +// +//===----------------------------------------------------------------------===// + + + +#include + + + + + +namespace duckdb { + +class ClientContext; + +//! The schema search path, in order by which entries are searched if no schema entry is provided +class CatalogSearchPath { +public: + DUCKDB_API explicit CatalogSearchPath(ClientContext &client_p); + DUCKDB_API CatalogSearchPath(const CatalogSearchPath &other) = delete; + + DUCKDB_API void Set(const string &new_value, bool is_set_schema); + DUCKDB_API const vector &Get(); + DUCKDB_API const vector &GetSetPaths() { + return set_paths; + } + DUCKDB_API const string &GetDefault(); + DUCKDB_API const string &GetOrDefault(const string &name); + +private: + static vector ParsePaths(const string &value); + + void SetPaths(vector new_paths); + +private: + ClientContext &context; + vector paths; + //! Only the paths that were explicitly set (minus the always included paths) + vector set_paths; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + +namespace duckdb { + +//! An aggregate function in the catalog +class AggregateFunctionCatalogEntry : public StandardEntry { +public: + AggregateFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateAggregateFunctionInfo *info) + : StandardEntry(CatalogType::AGGREGATE_FUNCTION_ENTRY, schema, catalog, info->name), + functions(info->functions.functions) { + } + + //! The aggregate functions + vector functions; +}; +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/collate_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { + +//! A collation catalog entry +class CollateCatalogEntry : public StandardEntry { +public: + CollateCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateCollationInfo *info) + : StandardEntry(CatalogType::COLLATION_ENTRY, schema, catalog, info->name), function(info->function), + combinable(info->combinable), not_required_for_equality(info->not_required_for_equality) { + } + + //! The collation function to push in case collation is required + ScalarFunction function; + //! Whether or not the collation can be combined with other collations. + bool combinable; + //! Whether or not the collation is required for equality comparisons or not. For many collations a binary + //! comparison for equality comparisons is correct, allowing us to skip the collation in these cases which greatly + //! speeds up processing. + bool not_required_for_equality; +}; +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/copy_function_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { + +class Catalog; +struct CreateCopyFunctionInfo; + +//! A table function in the catalog +class CopyFunctionCatalogEntry : public StandardEntry { +public: + CopyFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateCopyFunctionInfo *info); + + //! The copy function + CopyFunction function; +}; +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/index_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +struct DataTableInfo; +class Index; + +//! An index catalog entry +class IndexCatalogEntry : public StandardEntry { +public: + //! Create a real TableCatalogEntry and initialize storage for it + IndexCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateIndexInfo *info); + ~IndexCatalogEntry() override; + + Index *index; + shared_ptr info; + string sql; + +public: + string ToSQL() override; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + +namespace duckdb { + +//! A macro function in the catalog +class MacroCatalogEntry : public StandardEntry { +public: + MacroCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateMacroInfo *info); + + //! The macro function + unique_ptr function; + +public: + //! Serialize the meta information of the MacroCatalogEntry a serializer + virtual void Serialize(Serializer &serializer); + //! Deserializes to a CreateMacroInfo + static unique_ptr Deserialize(Deserializer &source); +}; +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +class Catalog; +struct CreatePragmaFunctionInfo; + +//! A table function in the catalog +class PragmaFunctionCatalogEntry : public StandardEntry { +public: + PragmaFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreatePragmaFunctionInfo *info); + + //! The pragma functions + vector functions; +}; +} // namespace duckdb + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/view_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + +namespace duckdb { + +class ColumnStatistics; +class DataTable; +struct CreateViewInfo; + +//! A view catalog entry +class ViewCatalogEntry : public StandardEntry { +public: + //! Create a real TableCatalogEntry and initialize storage for it + ViewCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateViewInfo *info); + + //! The query of the view + unique_ptr query; + //! The SQL query (if any) + string sql; + //! The set of aliases associated with the view + vector aliases; + //! The returned types of the view + vector types; + +public: + unique_ptr AlterEntry(ClientContext &context, AlterInfo *info) override; + + //! Serialize the meta information of the ViewCatalogEntry a serializer + virtual void Serialize(Serializer &serializer); + //! Deserializes to a CreateTableInfo + static unique_ptr Deserialize(Deserializer &source); + + unique_ptr Copy(ClientContext &context) override; + + string ToSQL() override; + +private: + void Initialize(CreateViewInfo *info); +}; +} // namespace duckdb + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/default/default_schemas.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +class DefaultSchemaGenerator : public DefaultGenerator { +public: + explicit DefaultSchemaGenerator(Catalog &catalog); + +public: + unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; + vector GetDefaultEntries() override; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/dependency_manager.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/dependency.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { +class CatalogEntry; + +enum class DependencyType { + DEPENDENCY_REGULAR = 0, + DEPENDENCY_AUTOMATIC = 1, + DEPENDENCY_OWNS = 2, + DEPENDENCY_OWNED_BY = 3 +}; + +struct Dependency { + Dependency(CatalogEntry *entry, DependencyType dependency_type = DependencyType::DEPENDENCY_REGULAR) + : // NOLINT: Allow implicit conversion from `CatalogEntry` + entry(entry), dependency_type(dependency_type) { + } + + //! The catalog entry this depends on + CatalogEntry *entry; + //! The type of dependency + DependencyType dependency_type; +}; + +struct DependencyHashFunction { + uint64_t operator()(const Dependency &a) const { + std::hash hash_func; + return hash_func((void *)a.entry); + } +}; + +struct DependencyEquality { + bool operator()(const Dependency &a, const Dependency &b) const { + return a.entry == b.entry; + } +}; + +using dependency_set_t = unordered_set; + +} // namespace duckdb + + +#include + +namespace duckdb { +class Catalog; +class ClientContext; + +//! The DependencyManager is in charge of managing dependencies between catalog entries +class DependencyManager { + friend class CatalogSet; + +public: + explicit DependencyManager(Catalog &catalog); + + //! Erase the object from the DependencyManager; this should only happen when the object itself is destroyed + void EraseObject(CatalogEntry *object); + + //! Scans all dependencies, returning pairs of (object, dependent) + void Scan(const std::function &callback); + + void AddOwnership(ClientContext &context, CatalogEntry *owner, CatalogEntry *entry); + +private: + Catalog &catalog; + //! Map of objects that DEPEND on [object], i.e. [object] can only be deleted when all entries in the dependency map + //! are deleted. + unordered_map dependents_map; + //! Map of objects that the source object DEPENDS on, i.e. when any of the entries in the vector perform a CASCADE + //! drop then [object] is deleted as well + unordered_map> dependencies_map; + +private: + void AddObject(ClientContext &context, CatalogEntry *object, unordered_set &dependencies); + void DropObject(ClientContext &context, CatalogEntry *object, bool cascade); + void AlterObject(ClientContext &context, CatalogEntry *old_obj, CatalogEntry *new_obj); + void EraseObjectInternal(CatalogEntry *object); +}; +} // namespace duckdb + + + + + + + + + + + + + + + + + + + + + +namespace duckdb { + +string SimilarCatalogEntry::GetQualifiedName() const { + D_ASSERT(Found()); + + return schema->name + "." + name; +} + +Catalog::Catalog(DatabaseInstance &db) + : db(db), schemas(make_unique(*this, make_unique(*this))), + dependency_manager(make_unique(*this)) { + catalog_version = 0; +} +Catalog::~Catalog() { +} + +Catalog &Catalog::GetCatalog(ClientContext &context) { + return context.db->GetCatalog(); +} + +CatalogEntry *Catalog::CreateTable(ClientContext &context, BoundCreateTableInfo *info) { + auto schema = GetSchema(context, info->base->schema); + return CreateTable(context, schema, info); +} + +CatalogEntry *Catalog::CreateTable(ClientContext &context, unique_ptr info) { + auto binder = Binder::CreateBinder(context); + auto bound_info = binder->BindCreateTableInfo(move(info)); + return CreateTable(context, bound_info.get()); +} + +CatalogEntry *Catalog::CreateTable(ClientContext &context, SchemaCatalogEntry *schema, BoundCreateTableInfo *info) { + return schema->CreateTable(context, info); +} + +CatalogEntry *Catalog::CreateView(ClientContext &context, CreateViewInfo *info) { + auto schema = GetSchema(context, info->schema); + return CreateView(context, schema, info); +} + +CatalogEntry *Catalog::CreateView(ClientContext &context, SchemaCatalogEntry *schema, CreateViewInfo *info) { + return schema->CreateView(context, info); +} + +CatalogEntry *Catalog::CreateSequence(ClientContext &context, CreateSequenceInfo *info) { + auto schema = GetSchema(context, info->schema); + return CreateSequence(context, schema, info); +} + +CatalogEntry *Catalog::CreateType(ClientContext &context, CreateTypeInfo *info) { + auto schema = GetSchema(context, info->schema); + return CreateType(context, schema, info); +} + +CatalogEntry *Catalog::CreateSequence(ClientContext &context, SchemaCatalogEntry *schema, CreateSequenceInfo *info) { + return schema->CreateSequence(context, info); +} + +CatalogEntry *Catalog::CreateType(ClientContext &context, SchemaCatalogEntry *schema, CreateTypeInfo *info) { + return schema->CreateType(context, info); +} + +CatalogEntry *Catalog::CreateTableFunction(ClientContext &context, CreateTableFunctionInfo *info) { + auto schema = GetSchema(context, info->schema); + return CreateTableFunction(context, schema, info); +} + +CatalogEntry *Catalog::CreateTableFunction(ClientContext &context, SchemaCatalogEntry *schema, + CreateTableFunctionInfo *info) { + return schema->CreateTableFunction(context, info); +} + +CatalogEntry *Catalog::CreateCopyFunction(ClientContext &context, CreateCopyFunctionInfo *info) { + auto schema = GetSchema(context, info->schema); + return CreateCopyFunction(context, schema, info); +} + +CatalogEntry *Catalog::CreateCopyFunction(ClientContext &context, SchemaCatalogEntry *schema, + CreateCopyFunctionInfo *info) { + return schema->CreateCopyFunction(context, info); +} + +CatalogEntry *Catalog::CreatePragmaFunction(ClientContext &context, CreatePragmaFunctionInfo *info) { + auto schema = GetSchema(context, info->schema); + return CreatePragmaFunction(context, schema, info); +} + +CatalogEntry *Catalog::CreatePragmaFunction(ClientContext &context, SchemaCatalogEntry *schema, + CreatePragmaFunctionInfo *info) { + return schema->CreatePragmaFunction(context, info); +} + +CatalogEntry *Catalog::CreateFunction(ClientContext &context, CreateFunctionInfo *info) { + auto schema = GetSchema(context, info->schema); + return CreateFunction(context, schema, info); +} + +CatalogEntry *Catalog::CreateFunction(ClientContext &context, SchemaCatalogEntry *schema, CreateFunctionInfo *info) { + return schema->CreateFunction(context, info); +} + +CatalogEntry *Catalog::CreateCollation(ClientContext &context, CreateCollationInfo *info) { + auto schema = GetSchema(context, info->schema); + return CreateCollation(context, schema, info); +} + +CatalogEntry *Catalog::CreateCollation(ClientContext &context, SchemaCatalogEntry *schema, CreateCollationInfo *info) { + return schema->CreateCollation(context, info); +} + +CatalogEntry *Catalog::CreateSchema(ClientContext &context, CreateSchemaInfo *info) { + D_ASSERT(!info->schema.empty()); + if (info->schema == TEMP_SCHEMA) { + throw CatalogException("Cannot create built-in schema \"%s\"", info->schema); + } + + unordered_set dependencies; + auto entry = make_unique(this, info->schema, info->internal); + auto result = entry.get(); + if (!schemas->CreateEntry(context, info->schema, move(entry), dependencies)) { + if (info->on_conflict == OnCreateConflict::ERROR_ON_CONFLICT) { + throw CatalogException("Schema with name %s already exists!", info->schema); + } else { + D_ASSERT(info->on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT); + } + return nullptr; + } + return result; +} + +void Catalog::DropSchema(ClientContext &context, DropInfo *info) { + D_ASSERT(!info->name.empty()); + ModifyCatalog(); + if (!schemas->DropEntry(context, info->name, info->cascade)) { + if (!info->if_exists) { + throw CatalogException("Schema with name \"%s\" does not exist!", info->name); + } + } +} + +void Catalog::DropEntry(ClientContext &context, DropInfo *info) { + ModifyCatalog(); + if (info->type == CatalogType::SCHEMA_ENTRY) { + // DROP SCHEMA + DropSchema(context, info); + return; + } + + auto lookup = LookupEntry(context, info->type, info->schema, info->name, info->if_exists); + if (!lookup.Found()) { + return; + } + + lookup.schema->DropEntry(context, info); +} + +CatalogEntry *Catalog::AddFunction(ClientContext &context, CreateFunctionInfo *info) { + auto schema = GetSchema(context, info->schema); + return AddFunction(context, schema, info); +} + +CatalogEntry *Catalog::AddFunction(ClientContext &context, SchemaCatalogEntry *schema, CreateFunctionInfo *info) { + return schema->AddFunction(context, info); +} + +SchemaCatalogEntry *Catalog::GetSchema(ClientContext &context, const string &schema_name, bool if_exists, + QueryErrorContext error_context) { + D_ASSERT(!schema_name.empty()); + if (schema_name == TEMP_SCHEMA) { + return context.temporary_objects.get(); + } + auto entry = schemas->GetEntry(context, schema_name); + if (!entry && !if_exists) { + throw CatalogException(error_context.FormatError("Schema with name %s does not exist!", schema_name)); + } + return (SchemaCatalogEntry *)entry; +} + +void Catalog::ScanSchemas(ClientContext &context, std::function callback) { + // create all default schemas first + schemas->Scan(context, [&](CatalogEntry *entry) { callback(entry); }); +} + +SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const string &entry_name, CatalogType type, + const vector &schemas) { + + vector sets; + std::transform(schemas.begin(), schemas.end(), std::back_inserter(sets), + [type](SchemaCatalogEntry *s) -> CatalogSet * { return &s->GetCatalogSet(type); }); + pair most_similar {"", (idx_t)-1}; + SchemaCatalogEntry *schema_of_most_similar = nullptr; + for (auto schema : schemas) { + auto entry = schema->GetCatalogSet(type).SimilarEntry(context, entry_name); + if (!entry.first.empty() && (most_similar.first.empty() || most_similar.second > entry.second)) { + most_similar = entry; + schema_of_most_similar = schema; + } + } + + return {most_similar.first, most_similar.second, schema_of_most_similar}; +} + +CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name, + CatalogType type, const vector &schemas, + QueryErrorContext error_context) { + auto entry = SimilarEntryInSchemas(context, entry_name, type, schemas); + + vector unseen_schemas; + this->schemas->Scan([&schemas, &unseen_schemas](CatalogEntry *entry) { + auto schema_entry = (SchemaCatalogEntry *)entry; + if (std::find(schemas.begin(), schemas.end(), schema_entry) == schemas.end()) { + unseen_schemas.emplace_back(schema_entry); + } + }); + auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas); + + string did_you_mean; + if (unseen_entry.Found() && unseen_entry.distance < entry.distance) { + did_you_mean = "\nDid you mean \"" + unseen_entry.GetQualifiedName() + "\"?"; + } else if (entry.Found()) { + did_you_mean = "\nDid you mean \"" + entry.name + "\"?"; + } + + return CatalogException(error_context.FormatError("%s with name %s does not exist!%s", CatalogTypeToString(type), + entry_name, did_you_mean)); +} + +CatalogEntryLookup Catalog::LookupEntry(ClientContext &context, CatalogType type, const string &schema_name, + const string &name, bool if_exists, QueryErrorContext error_context) { + if (!schema_name.empty()) { + auto schema = GetSchema(context, schema_name, if_exists, error_context); + if (!schema) { + D_ASSERT(if_exists); + return {nullptr, nullptr}; + } + + auto entry = schema->GetCatalogSet(type).GetEntry(context, name); + if (!entry && !if_exists) { + throw CreateMissingEntryException(context, name, type, {schema}, error_context); + } + + return {schema, entry}; + } + + const auto &paths = context.catalog_search_path->Get(); + for (const auto &path : paths) { + auto lookup = LookupEntry(context, type, path, name, true, error_context); + if (lookup.Found()) { + return lookup; + } + } + + if (!if_exists) { + vector schemas; + for (const auto &path : paths) { + auto schema = GetSchema(context, path, true); + if (schema) { + schemas.emplace_back(schema); + } + } + + throw CreateMissingEntryException(context, name, type, schemas, error_context); + } + + return {nullptr, nullptr}; +} + +CatalogEntry *Catalog::GetEntry(ClientContext &context, const string &schema, const string &name) { + vector entry_types {CatalogType::TABLE_ENTRY, CatalogType::SEQUENCE_ENTRY}; + + for (auto entry_type : entry_types) { + CatalogEntry *result = GetEntry(context, entry_type, schema, name, true); + if (result != nullptr) { + return result; + } + } + + throw CatalogException("CatalogElement \"%s.%s\" does not exist!", schema, name); +} + +CatalogEntry *Catalog::GetEntry(ClientContext &context, CatalogType type, const string &schema_name, const string &name, + bool if_exists, QueryErrorContext error_context) { + return LookupEntry(context, type, schema_name, name, if_exists, error_context).entry; +} + +template <> +TableCatalogEntry *Catalog::GetEntry(ClientContext &context, const string &schema_name, const string &name, + bool if_exists, QueryErrorContext error_context) { + auto entry = GetEntry(context, CatalogType::TABLE_ENTRY, schema_name, name, if_exists); + if (!entry) { + return nullptr; + } + if (entry->type != CatalogType::TABLE_ENTRY) { + throw CatalogException(error_context.FormatError("%s is not a table", name)); + } + return (TableCatalogEntry *)entry; +} + +template <> +SequenceCatalogEntry *Catalog::GetEntry(ClientContext &context, const string &schema_name, const string &name, + bool if_exists, QueryErrorContext error_context) { + return (SequenceCatalogEntry *)GetEntry(context, CatalogType::SEQUENCE_ENTRY, schema_name, name, if_exists, + error_context); +} + +template <> +TableFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, const string &schema_name, const string &name, + bool if_exists, QueryErrorContext error_context) { + return (TableFunctionCatalogEntry *)GetEntry(context, CatalogType::TABLE_FUNCTION_ENTRY, schema_name, name, + if_exists, error_context); +} + +template <> +CopyFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, const string &schema_name, const string &name, + bool if_exists, QueryErrorContext error_context) { + return (CopyFunctionCatalogEntry *)GetEntry(context, CatalogType::COPY_FUNCTION_ENTRY, schema_name, name, if_exists, + error_context); +} + +template <> +PragmaFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, const string &schema_name, const string &name, + bool if_exists, QueryErrorContext error_context) { + return (PragmaFunctionCatalogEntry *)GetEntry(context, CatalogType::PRAGMA_FUNCTION_ENTRY, schema_name, name, + if_exists, error_context); +} + +template <> +AggregateFunctionCatalogEntry *Catalog::GetEntry(ClientContext &context, const string &schema_name, const string &name, + bool if_exists, QueryErrorContext error_context) { + auto entry = GetEntry(context, CatalogType::AGGREGATE_FUNCTION_ENTRY, schema_name, name, if_exists, error_context); + if (entry->type != CatalogType::AGGREGATE_FUNCTION_ENTRY) { + throw CatalogException(error_context.FormatError("%s is not an aggregate function", name)); + } + return (AggregateFunctionCatalogEntry *)entry; +} + +template <> +CollateCatalogEntry *Catalog::GetEntry(ClientContext &context, const string &schema_name, const string &name, + bool if_exists, QueryErrorContext error_context) { + return (CollateCatalogEntry *)GetEntry(context, CatalogType::COLLATION_ENTRY, schema_name, name, if_exists, + error_context); +} + +void Catalog::Alter(ClientContext &context, AlterInfo *info) { + ModifyCatalog(); + auto lookup = LookupEntry(context, info->GetCatalogType(), info->schema, info->name); + D_ASSERT(lookup.Found()); // It must have thrown otherwise. + return lookup.schema->Alter(context, info); +} + +idx_t Catalog::GetCatalogVersion() { + return catalog_version; +} + +idx_t Catalog::ModifyCatalog() { + return catalog_version++; +} + +} // namespace duckdb + + + +namespace duckdb { + +CopyFunctionCatalogEntry::CopyFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, + CreateCopyFunctionInfo *info) + : StandardEntry(CatalogType::COPY_FUNCTION_ENTRY, schema, catalog, info->name), function(info->function) { +} + +} // namespace duckdb + + + +namespace duckdb { + +IndexCatalogEntry::IndexCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateIndexInfo *info) + : StandardEntry(CatalogType::INDEX_ENTRY, schema, catalog, info->index_name), index(nullptr), sql(info->sql) { +} + +IndexCatalogEntry::~IndexCatalogEntry() { + // remove the associated index from the info + if (!info || !index) { + return; + } + info->indexes.RemoveIndex(index); +} + +string IndexCatalogEntry::ToSQL() { + if (sql.empty()) { + throw InternalException("Cannot convert INDEX to SQL because it was not created with a SQL statement"); + } + return sql; +} + +} // namespace duckdb + + + + +namespace duckdb { + +MacroCatalogEntry::MacroCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateMacroInfo *info) + : StandardEntry(CatalogType::MACRO_ENTRY, schema, catalog, info->name), function(move(info->function)) { + this->temporary = info->temporary; + this->internal = info->internal; +} + +void MacroCatalogEntry::Serialize(Serializer &serializer) { + D_ASSERT(!internal); + serializer.WriteString(schema->name); + serializer.WriteString(name); + function->expression->Serialize(serializer); + serializer.Write((uint32_t)function->parameters.size()); + for (auto ¶m : function->parameters) { + param->Serialize(serializer); + } + serializer.Write((uint32_t)function->default_parameters.size()); + for (auto &kv : function->default_parameters) { + serializer.WriteString(kv.first); + kv.second->Serialize(serializer); + } +} + +unique_ptr MacroCatalogEntry::Deserialize(Deserializer &source) { + auto info = make_unique(); + info->schema = source.Read(); + info->name = source.Read(); + info->function = make_unique(ParsedExpression::Deserialize(source)); + auto param_count = source.Read(); + for (idx_t i = 0; i < param_count; i++) { + info->function->parameters.push_back(ParsedExpression::Deserialize(source)); + } + auto default_param_count = source.Read(); + for (idx_t i = 0; i < default_param_count; i++) { + auto name = source.Read(); + info->function->default_parameters[name] = ParsedExpression::Deserialize(source); + } + return info; +} + +} // namespace duckdb + + + +namespace duckdb { + +PragmaFunctionCatalogEntry::PragmaFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, + CreatePragmaFunctionInfo *info) + : StandardEntry(CatalogType::PRAGMA_FUNCTION_ENTRY, schema, catalog, info->name), functions(move(info->functions)) { +} + +} // namespace duckdb + + + + + + + + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/catalog_entry/type_catalog_entry.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { +class Serializer; +class Deserializer; + +//! A type catalog entry +class TypeCatalogEntry : public StandardEntry { +public: + //! Create a TypeCatalogEntry and initialize storage for it + TypeCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateTypeInfo *info); + + unique_ptr user_type; + +public: + //! Serialize the meta information of the TypeCatalogEntry a serializer + virtual void Serialize(Serializer &serializer); + //! Deserializes to a TypeCatalogEntry + static unique_ptr Deserialize(Deserializer &source); + + string ToSQL() override; +}; +} // namespace duckdb + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/default/default_functions.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { +class SchemaCatalogEntry; + +class DefaultFunctionGenerator : public DefaultGenerator { +public: + DefaultFunctionGenerator(Catalog &catalog, SchemaCatalogEntry *schema); + + SchemaCatalogEntry *schema; + +public: + unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; + vector GetDefaultEntries() override; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/default/default_views.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { +class SchemaCatalogEntry; + +class DefaultViewGenerator : public DefaultGenerator { +public: + DefaultViewGenerator(Catalog &catalog, SchemaCatalogEntry *schema); + + SchemaCatalogEntry *schema; + +public: + unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; + vector GetDefaultEntries() override; +}; + +} // namespace duckdb + + + + + + + + + + + + + + + + + + +#include +#include + +namespace duckdb { + +SchemaCatalogEntry::SchemaCatalogEntry(Catalog *catalog, string name_p, bool internal) + : CatalogEntry(CatalogType::SCHEMA_ENTRY, catalog, move(name_p)), + tables(*catalog, make_unique(*catalog, this)), indexes(*catalog), table_functions(*catalog), + copy_functions(*catalog), pragma_functions(*catalog), + functions(*catalog, make_unique(*catalog, this)), sequences(*catalog), + collations(*catalog), types(*catalog) { + this->internal = internal; +} + +CatalogEntry *SchemaCatalogEntry::AddEntry(ClientContext &context, unique_ptr entry, + OnCreateConflict on_conflict, unordered_set dependencies) { + auto entry_name = entry->name; + auto entry_type = entry->type; + auto result = entry.get(); + + // first find the set for this entry + auto &set = GetCatalogSet(entry_type); + + if (name != TEMP_SCHEMA) { + dependencies.insert(this); + } else { + entry->temporary = true; + } + if (on_conflict == OnCreateConflict::REPLACE_ON_CONFLICT) { + // CREATE OR REPLACE: first try to drop the entry + auto old_entry = set.GetEntry(context, entry_name); + if (old_entry) { + if (old_entry->type != entry_type) { + throw CatalogException("Existing object %s is of type %s, trying to replace with type %s", entry_name, + CatalogTypeToString(old_entry->type), CatalogTypeToString(entry_type)); + } + (void)set.DropEntry(context, entry_name, false); + } + } + // now try to add the entry + if (!set.CreateEntry(context, entry_name, move(entry), dependencies)) { + // entry already exists! + if (on_conflict == OnCreateConflict::ERROR_ON_CONFLICT) { + throw CatalogException("%s with name \"%s\" already exists!", CatalogTypeToString(entry_type), entry_name); + } else { + return nullptr; + } + } + return result; +} + +CatalogEntry *SchemaCatalogEntry::AddEntry(ClientContext &context, unique_ptr entry, + OnCreateConflict on_conflict) { + unordered_set dependencies; + return AddEntry(context, move(entry), on_conflict, dependencies); +} + +CatalogEntry *SchemaCatalogEntry::CreateSequence(ClientContext &context, CreateSequenceInfo *info) { + auto sequence = make_unique(catalog, this, info); + return AddEntry(context, move(sequence), info->on_conflict); +} + +CatalogEntry *SchemaCatalogEntry::CreateType(ClientContext &context, CreateTypeInfo *info) { + auto sequence = make_unique(catalog, this, info); + return AddEntry(context, move(sequence), info->on_conflict); +} + +CatalogEntry *SchemaCatalogEntry::CreateTable(ClientContext &context, BoundCreateTableInfo *info) { + auto table = make_unique(catalog, this, info); + table->storage->info->cardinality = table->storage->GetTotalRows(); + return AddEntry(context, move(table), info->Base().on_conflict, info->dependencies); +} + +CatalogEntry *SchemaCatalogEntry::CreateView(ClientContext &context, CreateViewInfo *info) { + auto view = make_unique(catalog, this, info); + return AddEntry(context, move(view), info->on_conflict); +} + +CatalogEntry *SchemaCatalogEntry::CreateIndex(ClientContext &context, CreateIndexInfo *info, TableCatalogEntry *table) { + unordered_set dependencies; + dependencies.insert(table); + auto index = make_unique(catalog, this, info); + return AddEntry(context, move(index), info->on_conflict, dependencies); +} + +CatalogEntry *SchemaCatalogEntry::CreateCollation(ClientContext &context, CreateCollationInfo *info) { + auto collation = make_unique(catalog, this, info); + return AddEntry(context, move(collation), info->on_conflict); +} + +CatalogEntry *SchemaCatalogEntry::CreateTableFunction(ClientContext &context, CreateTableFunctionInfo *info) { + auto table_function = make_unique(catalog, this, info); + return AddEntry(context, move(table_function), info->on_conflict); +} + +CatalogEntry *SchemaCatalogEntry::CreateCopyFunction(ClientContext &context, CreateCopyFunctionInfo *info) { + auto copy_function = make_unique(catalog, this, info); + return AddEntry(context, move(copy_function), info->on_conflict); +} + +CatalogEntry *SchemaCatalogEntry::CreatePragmaFunction(ClientContext &context, CreatePragmaFunctionInfo *info) { + auto pragma_function = make_unique(catalog, this, info); + return AddEntry(context, move(pragma_function), info->on_conflict); +} + +CatalogEntry *SchemaCatalogEntry::CreateFunction(ClientContext &context, CreateFunctionInfo *info) { + unique_ptr function; + switch (info->type) { + case CatalogType::SCALAR_FUNCTION_ENTRY: + function = make_unique_base(catalog, this, + (CreateScalarFunctionInfo *)info); + break; + case CatalogType::MACRO_ENTRY: + // create a macro function + function = make_unique_base(catalog, this, (CreateMacroInfo *)info); + break; + case CatalogType::AGGREGATE_FUNCTION_ENTRY: + D_ASSERT(info->type == CatalogType::AGGREGATE_FUNCTION_ENTRY); + // create an aggregate function + function = make_unique_base(catalog, this, + (CreateAggregateFunctionInfo *)info); + break; + default: + throw InternalException("Unknown function type \"%s\"", CatalogTypeToString(info->type)); + } + return AddEntry(context, move(function), info->on_conflict); +} + +CatalogEntry *SchemaCatalogEntry::AddFunction(ClientContext &context, CreateFunctionInfo *info) { + auto entry = GetCatalogSet(info->type).GetEntry(context, info->name); + if (!entry) { + return CreateFunction(context, info); + } + + info->on_conflict = OnCreateConflict::REPLACE_ON_CONFLICT; + switch (info->type) { + case CatalogType::SCALAR_FUNCTION_ENTRY: { + auto scalar_info = (CreateScalarFunctionInfo *)info; + auto &scalars = *(ScalarFunctionCatalogEntry *)entry; + for (const auto &scalar : scalars.functions) { + scalar_info->functions.emplace_back(scalar); + } + break; + } + case CatalogType::AGGREGATE_FUNCTION_ENTRY: { + auto agg_info = (CreateAggregateFunctionInfo *)info; + auto &aggs = *(AggregateFunctionCatalogEntry *)entry; + for (const auto &agg : aggs.functions) { + agg_info->functions.AddFunction(agg); + } + break; + } + default: + // Macros can only be replaced because there is only one of each name. + throw InternalException("Unsupported function type \"%s\" for adding", CatalogTypeToString(info->type)); + } + return CreateFunction(context, info); +} + +void SchemaCatalogEntry::DropEntry(ClientContext &context, DropInfo *info) { + auto &set = GetCatalogSet(info->type); + + // first find the entry + auto existing_entry = set.GetEntry(context, info->name); + if (!existing_entry) { + if (!info->if_exists) { + throw CatalogException("%s with name \"%s\" does not exist!", CatalogTypeToString(info->type), info->name); + } + return; + } + if (existing_entry->type != info->type) { + throw CatalogException("Existing object %s is of type %s, trying to replace with type %s", info->name, + CatalogTypeToString(existing_entry->type), CatalogTypeToString(info->type)); + } + if (!set.DropEntry(context, info->name, info->cascade)) { + throw InternalException("Could not drop element because of an internal error"); + } +} + +void SchemaCatalogEntry::Alter(ClientContext &context, AlterInfo *info) { + CatalogType type = info->GetCatalogType(); + auto &set = GetCatalogSet(type); + if (info->type == AlterType::CHANGE_OWNERSHIP) { + if (!set.AlterOwnership(context, (ChangeOwnershipInfo *)info)) { + throw CatalogException("Couldn't change ownership!"); + } + } else { + string name = info->name; + if (!set.AlterEntry(context, name, info)) { + throw CatalogException("Entry with name \"%s\" does not exist!", name); + } + } +} + +void SchemaCatalogEntry::Scan(ClientContext &context, CatalogType type, + const std::function &callback) { + auto &set = GetCatalogSet(type); + set.Scan(context, callback); +} + +void SchemaCatalogEntry::Scan(CatalogType type, const std::function &callback) { + auto &set = GetCatalogSet(type); + set.Scan(callback); +} + +void SchemaCatalogEntry::Serialize(Serializer &serializer) { + serializer.WriteString(name); +} + +unique_ptr SchemaCatalogEntry::Deserialize(Deserializer &source) { + auto info = make_unique(); + info->schema = source.Read(); + return info; +} + +string SchemaCatalogEntry::ToSQL() { + std::stringstream ss; + ss << "CREATE SCHEMA " << name << ";"; + return ss.str(); +} + +CatalogSet &SchemaCatalogEntry::GetCatalogSet(CatalogType type) { + switch (type) { + case CatalogType::VIEW_ENTRY: + case CatalogType::TABLE_ENTRY: + return tables; + case CatalogType::INDEX_ENTRY: + return indexes; + case CatalogType::TABLE_FUNCTION_ENTRY: + return table_functions; + case CatalogType::COPY_FUNCTION_ENTRY: + return copy_functions; + case CatalogType::PRAGMA_FUNCTION_ENTRY: + return pragma_functions; + case CatalogType::AGGREGATE_FUNCTION_ENTRY: + case CatalogType::SCALAR_FUNCTION_ENTRY: + case CatalogType::MACRO_ENTRY: + return functions; + case CatalogType::SEQUENCE_ENTRY: + return sequences; + case CatalogType::COLLATION_ENTRY: + return collations; + case CatalogType::TYPE_ENTRY: + return types; + default: + throw InternalException("Unsupported catalog type in schema"); + } +} + +} // namespace duckdb + + + + + + + + +#include +#include + +namespace duckdb { + +SequenceCatalogEntry::SequenceCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateSequenceInfo *info) + : StandardEntry(CatalogType::SEQUENCE_ENTRY, schema, catalog, info->name), usage_count(info->usage_count), + counter(info->start_value), increment(info->increment), start_value(info->start_value), + min_value(info->min_value), max_value(info->max_value), cycle(info->cycle) { + this->temporary = info->temporary; +} + +void SequenceCatalogEntry::Serialize(Serializer &serializer) { + serializer.WriteString(schema->name); + serializer.WriteString(name); + // serializer.Write(counter); + serializer.Write(usage_count); + serializer.Write(increment); + serializer.Write(min_value); + serializer.Write(max_value); + serializer.Write(counter); + serializer.Write(cycle); +} + +unique_ptr SequenceCatalogEntry::Deserialize(Deserializer &source) { + auto info = make_unique(); + info->schema = source.Read(); + info->name = source.Read(); + // info->counter = source.Read(); + info->usage_count = source.Read(); + info->increment = source.Read(); + info->min_value = source.Read(); + info->max_value = source.Read(); + info->start_value = source.Read(); + info->cycle = source.Read(); + return info; +} + +string SequenceCatalogEntry::ToSQL() { + std::stringstream ss; + ss << "CREATE SEQUENCE "; + ss << name; + ss << " INCREMENT BY " << increment; + ss << " MINVALUE " << min_value; + ss << " MAXVALUE " << max_value; + ss << " START " << counter; + ss << " " << (cycle ? "CYCLE" : "NO CYCLE") << ";"; + return ss.str(); +} +} // namespace duckdb + + + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/constraints/check_constraint.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { + +//! The CheckConstraint contains an expression that must evaluate to TRUE for +//! every row in a table +class CheckConstraint : public Constraint { +public: + DUCKDB_API explicit CheckConstraint(unique_ptr expression); + + unique_ptr expression; + +public: + DUCKDB_API string ToString() const override; + + DUCKDB_API unique_ptr Copy() override; + + //! Serialize to a stand-alone binary blob + DUCKDB_API void Serialize(Serializer &serializer) override; + //! Deserializes a CheckConstraint + DUCKDB_API static unique_ptr Deserialize(Deserializer &source); +}; + +} // namespace duckdb + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/constraints/unique_constraint.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +class UniqueConstraint : public Constraint { +public: + DUCKDB_API UniqueConstraint(uint64_t index, bool is_primary_key); + DUCKDB_API UniqueConstraint(vector columns, bool is_primary_key); + + //! The index of the column for which this constraint holds. Only used when the constraint relates to a single + //! column, equal to DConstants::INVALID_INDEX if not used + uint64_t index; + //! The set of columns for which this constraint holds by name. Only used when the index field is not used. + vector columns; + //! Whether or not this is a PRIMARY KEY constraint, or a UNIQUE constraint. + bool is_primary_key; + +public: + DUCKDB_API string ToString() const override; + + DUCKDB_API unique_ptr Copy() override; + + //! Serialize to a stand-alone binary blob + DUCKDB_API void Serialize(Serializer &serializer) override; + //! Deserializes a ParsedConstraint + DUCKDB_API static unique_ptr Deserialize(Deserializer &source); +}; + +} // namespace duckdb + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/constraints/bound_not_null_constraint.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +class BoundNotNullConstraint : public BoundConstraint { +public: + explicit BoundNotNullConstraint(column_t index) : BoundConstraint(ConstraintType::NOT_NULL), index(index) { + } + + //! Column index this constraint pertains to + column_t index; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/constraints/bound_unique_constraint.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +class BoundUniqueConstraint : public BoundConstraint { +public: + BoundUniqueConstraint(vector keys, unordered_set key_set, bool is_primary_key) + : BoundConstraint(ConstraintType::UNIQUE), keys(move(keys)), key_set(move(key_set)), + is_primary_key(is_primary_key) { +#ifdef DEBUG + D_ASSERT(keys.size() == key_set.size()); + for (auto &key : keys) { + D_ASSERT(key_set.find(key) != key_set.end()); + } +#endif + } + + //! The keys that define the unique constraint + vector keys; + //! The same keys but stored as an unordered set + unordered_set key_set; + //! Whether or not the unique constraint is a primary key + bool is_primary_key; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/constraints/bound_check_constraint.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + +namespace duckdb { + +//! The CheckConstraint contains an expression that must evaluate to TRUE for +//! every row in a table +class BoundCheckConstraint : public BoundConstraint { +public: + BoundCheckConstraint() : BoundConstraint(ConstraintType::CHECK) { + } + + //! The expression + unique_ptr expression; + //! The columns used by the CHECK constraint + unordered_set bound_columns; +}; + +} // namespace duckdb + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/storage_manager.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/write_ahead_log.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/enums/wal_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +enum class WALType : uint8_t { + INVALID = 0, + // ----------------------------- + // Catalog + // ----------------------------- + CREATE_TABLE = 1, + DROP_TABLE = 2, + + CREATE_SCHEMA = 3, + DROP_SCHEMA = 4, + + CREATE_VIEW = 5, + DROP_VIEW = 6, + + CREATE_SEQUENCE = 8, + DROP_SEQUENCE = 9, + SEQUENCE_VALUE = 10, + + CREATE_MACRO = 11, + DROP_MACRO = 12, + + CREATE_TYPE = 13, + DROP_TYPE = 14, + + ALTER_INFO = 20, + // ----------------------------- + // Data + // ----------------------------- + USE_TABLE = 25, + INSERT_TUPLE = 26, + DELETE_TUPLE = 27, + UPDATE_TUPLE = 28, + // ----------------------------- + // Flush + // ----------------------------- + CHECKPOINT = 99, + WAL_FLUSH = 100 +}; +} + + + + + +namespace duckdb { + +struct AlterInfo; + +class BufferedSerializer; +class Catalog; +class DatabaseInstance; +class SchemaCatalogEntry; +class SequenceCatalogEntry; +class MacroCatalogEntry; +class ViewCatalogEntry; +class TypeCatalogEntry; +class TableCatalogEntry; +class Transaction; +class TransactionManager; + +//! The WriteAheadLog (WAL) is a log that is used to provide durability. Prior +//! to committing a transaction it writes the changes the transaction made to +//! the database to the log, which can then be replayed upon startup in case the +//! server crashes or is shut down. +class WriteAheadLog { +public: + explicit WriteAheadLog(DatabaseInstance &database); + + //! Whether or not the WAL has been initialized + bool initialized; + //! Skip writing to the WAL + bool skip_writing; + +public: + //! Replay the WAL + static bool Replay(DatabaseInstance &database, string &path); + + //! Initialize the WAL in the specified directory + void Initialize(string &path); + //! Returns the current size of the WAL in bytes + int64_t GetWALSize(); + //! Gets the total bytes written to the WAL since startup + idx_t GetTotalWritten(); + + void WriteCreateTable(TableCatalogEntry *entry); + void WriteDropTable(TableCatalogEntry *entry); + + void WriteCreateSchema(SchemaCatalogEntry *entry); + void WriteDropSchema(SchemaCatalogEntry *entry); + + void WriteCreateView(ViewCatalogEntry *entry); + void WriteDropView(ViewCatalogEntry *entry); + + void WriteCreateSequence(SequenceCatalogEntry *entry); + void WriteDropSequence(SequenceCatalogEntry *entry); + void WriteSequenceValue(SequenceCatalogEntry *entry, SequenceValue val); + + void WriteCreateMacro(MacroCatalogEntry *entry); + void WriteDropMacro(MacroCatalogEntry *entry); + + void WriteCreateType(TypeCatalogEntry *entry); + void WriteDropType(TypeCatalogEntry *entry); + //! Sets the table used for subsequent insert/delete/update commands + void WriteSetTable(string &schema, string &table); + + void WriteAlter(AlterInfo &info); + + void WriteInsert(DataChunk &chunk); + void WriteDelete(DataChunk &chunk); + //! Write a single (sub-) column update to the WAL. Chunk must be a pair of (COL, ROW_ID). + //! The column_path vector is a *path* towards a column within the table + //! i.e. if we have a table with a single column S STRUCT(A INT, B INT) + //! and we update the validity mask of "S.B" + //! the column path is: + //! 0 (first column of table) + //! -> 1 (second subcolumn of struct) + //! -> 0 (first subcolumn of INT) + void WriteUpdate(DataChunk &chunk, const vector &column_path); + + //! Truncate the WAL to a previous size, and clear anything currently set in the writer + void Truncate(int64_t size); + //! Delete the WAL file on disk. The WAL should not be used after this point. + void Delete(); + void Flush(); + + void WriteCheckpoint(block_id_t meta_block); + +private: + DatabaseInstance &database; + unique_ptr writer; + string wal_path; +}; + +} // namespace duckdb + + +namespace duckdb { +class BlockManager; +class Catalog; +class DatabaseInstance; +class TransactionManager; +class TableCatalogEntry; + +//! StorageManager is responsible for managing the physical storage of the +//! database on disk +class StorageManager { +public: + StorageManager(DatabaseInstance &db, string path, bool read_only); + ~StorageManager(); + + //! The BlockManager to read/store meta information and data in blocks + unique_ptr block_manager; + //! The BufferManager of the database + unique_ptr buffer_manager; + //! The database this storagemanager belongs to + DatabaseInstance &db; + +public: + static StorageManager &GetStorageManager(ClientContext &context); + static StorageManager &GetStorageManager(DatabaseInstance &db); + + //! Initialize a database or load an existing database from the given path + void Initialize(); + //! Get the WAL of the StorageManager, returns nullptr if in-memory + WriteAheadLog *GetWriteAheadLog() { + return wal.initialized ? &wal : nullptr; + } + + DatabaseInstance &GetDatabase() { + return db; + } + + void CreateCheckpoint(bool delete_wal = false, bool force_checkpoint = false); + + string GetDBPath() { + return path; + } + bool InMemory(); + +private: + //! Load the database from a directory + void LoadDatabase(); + + //! The path of the database + string path; + //! The WriteAheadLog of the storage manager + WriteAheadLog wal; + + //! Whether or not the database is opened in read-only mode + bool read_only; +}; + +} // namespace duckdb + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/art.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/art_key.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/bit_operations.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + +namespace duckdb { + +#define BSWAP16(x) ((uint16_t)((((uint16_t)(x)&0xff00) >> 8) | (((uint16_t)(x)&0x00ff) << 8))) + +#define BSWAP32(x) \ + ((uint32_t)((((uint32_t)(x)&0xff000000) >> 24) | (((uint32_t)(x)&0x00ff0000) >> 8) | \ + (((uint32_t)(x)&0x0000ff00) << 8) | (((uint32_t)(x)&0x000000ff) << 24))) + +#define BSWAP64(x) \ + ((uint64_t)((((uint64_t)(x)&0xff00000000000000ull) >> 56) | (((uint64_t)(x)&0x00ff000000000000ull) >> 40) | \ + (((uint64_t)(x)&0x0000ff0000000000ull) >> 24) | (((uint64_t)(x)&0x000000ff00000000ull) >> 8) | \ + (((uint64_t)(x)&0x00000000ff000000ull) << 8) | (((uint64_t)(x)&0x0000000000ff0000ull) << 24) | \ + (((uint64_t)(x)&0x000000000000ff00ull) << 40) | (((uint64_t)(x)&0x00000000000000ffull) << 56))) + +bool IsLittleEndian(); +uint8_t FlipSign(uint8_t key_byte); +uint32_t EncodeFloat(float x); +uint64_t EncodeDouble(double x); + +template +void EncodeData(data_ptr_t dataptr, T value, bool is_little_endian) { + throw NotImplementedException("Cannot create data from this type"); +} + +template <> +void EncodeData(data_ptr_t dataptr, bool value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, int8_t value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, int16_t value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, int32_t value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, int64_t value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, uint8_t value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, uint16_t value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, uint32_t value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, uint64_t value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, hugeint_t value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, double value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, float value, bool is_little_endian); +template <> +void EncodeData(data_ptr_t dataptr, interval_t value, bool is_little_endian); + +void EncodeStringDataPrefix(data_ptr_t dataptr, string_t value, idx_t prefix_len); + +} // namespace duckdb + + + +namespace duckdb { + +class Key { +public: + Key(unique_ptr data, idx_t len); + + idx_t len; + unique_ptr data; + +public: + template + static unique_ptr CreateKey(T element, bool is_little_endian) { + auto data = Key::CreateData(element, is_little_endian); + return make_unique(move(data), sizeof(element)); + } + +public: + data_t &operator[](std::size_t i) { + return data[i]; + } + const data_t &operator[](std::size_t i) const { + return data[i]; + } + bool operator>(const Key &k) const; + bool operator<(const Key &k) const; + bool operator>=(const Key &k) const; + bool operator==(const Key &k) const; + + string ToString(bool is_little_endian, PhysicalType type); + +private: + template + static unique_ptr CreateData(T value, bool is_little_endian) { + auto data = unique_ptr(new data_t[sizeof(value)]); + EncodeData(data.get(), value, is_little_endian); + return data; + } +}; + +template <> +unique_ptr Key::CreateKey(string_t value, bool is_little_endian); +template <> +unique_ptr Key::CreateKey(const char *value, bool is_little_endian); + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/leaf.hpp +// +// +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/node.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { +enum class NodeType : uint8_t { N4 = 0, N16 = 1, N48 = 2, N256 = 3, NLeaf = 4 }; + +class ART; + +class Node { +public: + static const uint8_t EMPTY_MARKER = 48; + +public: + Node(ART &art, NodeType type, size_t compressed_prefix_size); + virtual ~Node() { + } + + //! length of the compressed path (prefix) + uint32_t prefix_length; + //! number of non-null children + uint16_t count; + //! node type + NodeType type; + //! compressed path (prefix) + unique_ptr prefix; + +public: + //! Get the position of a child corresponding exactly to the specific byte, returns DConstants::INVALID_INDEX if not + //! exists + virtual idx_t GetChildPos(uint8_t k) { + return DConstants::INVALID_INDEX; + } + //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX + //! if there are no children matching the criteria + virtual idx_t GetChildGreaterEqual(uint8_t k, bool &equal) { + throw InternalException("Unimplemented GetChildGreaterEqual for ARTNode"); + } + //! Get the position of the biggest element in node + virtual idx_t GetMin(); + //! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position. if pos == + //! DConstants::INVALID_INDEX, then the first valid position in the node will be returned. + virtual idx_t GetNextPos(idx_t pos) { + return DConstants::INVALID_INDEX; + } + //! Get the child at the specified position in the node. pos should be between [0, count). Throws an assertion if + //! the element is not found. + virtual unique_ptr *GetChild(idx_t pos); + + //! Compare the key with the prefix of the node, return the number matching bytes + static uint32_t PrefixMismatch(ART &art, Node *node, Key &key, uint64_t depth); + //! Insert leaf into inner node + static void InsertLeaf(ART &art, unique_ptr &node, uint8_t key, unique_ptr &new_node); + //! Erase entry from node + static void Erase(ART &art, unique_ptr &node, idx_t pos); + +protected: + //! Copies the prefix from the source to the destination node + static void CopyPrefix(ART &art, Node *src, Node *dst); +}; + +} // namespace duckdb + + +namespace duckdb { + +class Leaf : public Node { +public: + Leaf(ART &art, unique_ptr value, row_t row_id); + + unique_ptr value; + idx_t capacity; + idx_t num_elements; + + row_t GetRowId(idx_t index) { + return row_ids[index]; + } + +public: + void Insert(row_t row_id); + void Remove(row_t row_id); + +private: + unique_ptr row_ids; +}; + +} // namespace duckdb + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/node4.hpp +// +// +//===----------------------------------------------------------------------===// + + + + +namespace duckdb { + +class Node4 : public Node { +public: + Node4(ART &art, size_t compression_length); + + uint8_t key[4]; + unique_ptr child[4]; + +public: + //! Get position of a byte, returns -1 if not exists + idx_t GetChildPos(uint8_t k) override; + //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX + //! if there are no children matching the criteria + idx_t GetChildGreaterEqual(uint8_t k, bool &equal) override; + //! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position + idx_t GetNextPos(idx_t pos) override; + //! Get Node4 Child + unique_ptr *GetChild(idx_t pos) override; + + idx_t GetMin() override; + + //! Insert Leaf to the Node4 + static void Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child); + //! Remove Leaf from Node4 + static void Erase(ART &art, unique_ptr &node, int pos); +}; +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/node16.hpp +// +// +//===----------------------------------------------------------------------===// + + + + +namespace duckdb { + +class Node16 : public Node { +public: + Node16(ART &art, size_t compression_lengthh); + + uint8_t key[16]; + unique_ptr child[16]; + +public: + //! Get position of a byte, returns -1 if not exists + idx_t GetChildPos(uint8_t k) override; + //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX + //! if there are no children matching the criteria + idx_t GetChildGreaterEqual(uint8_t k, bool &equal) override; + //! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position + idx_t GetNextPos(idx_t pos) override; + //! Get Node16 Child + unique_ptr *GetChild(idx_t pos) override; + + idx_t GetMin() override; + + //! Insert node into Node16 + static void Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child); + //! Shrink to node 4 + static void Erase(ART &art, unique_ptr &node, int pos); +}; +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/node48.hpp +// +// +//===----------------------------------------------------------------------===// + + + + +namespace duckdb { + +class Node48 : public Node { +public: + Node48(ART &art, size_t compression_length); + + uint8_t child_index[256]; + unique_ptr child[48]; + +public: + //! Get position of a byte, returns -1 if not exists + idx_t GetChildPos(uint8_t k) override; + //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX + //! if there are no children matching the criteria + idx_t GetChildGreaterEqual(uint8_t k, bool &equal) override; + //! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position + idx_t GetNextPos(idx_t pos) override; + //! Get Node48 Child + unique_ptr *GetChild(idx_t pos) override; + + idx_t GetMin() override; + + //! Insert node in Node48 + static void Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child); + + //! Shrink to node 16 + static void Erase(ART &art, unique_ptr &node, int pos); +}; +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/execution/index/art/node256.hpp +// +// +//===----------------------------------------------------------------------===// + + + + +namespace duckdb { + +class Node256 : public Node { +public: + Node256(ART &art, size_t compression_length); + + unique_ptr child[256]; + +public: + //! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists + idx_t GetChildPos(uint8_t k) override; + //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX + //! if there are no children matching the criteria + idx_t GetChildGreaterEqual(uint8_t k, bool &equal) override; + //! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position + idx_t GetNextPos(idx_t pos) override; + //! Get Node256 Child + unique_ptr *GetChild(idx_t pos) override; + + idx_t GetMin() override; + + //! Insert node From Node256 + static void Insert(ART &art, unique_ptr &node, uint8_t key_byte, unique_ptr &child); + + //! Shrink to node 48 + static void Erase(ART &art, unique_ptr &node, int pos); +}; +} // namespace duckdb + + +namespace duckdb { +struct IteratorEntry { + IteratorEntry() { + } + IteratorEntry(Node *node, idx_t pos) : node(node), pos(pos) { + } + + Node *node = nullptr; + idx_t pos = 0; +}; + +struct Iterator { + //! The current Leaf Node, valid if depth>0 + Leaf *node = nullptr; + //! The current depth + int32_t depth = 0; + //! Stack, the size is determined at runtime + vector stack; + + bool start = false; + + void SetEntry(idx_t depth, IteratorEntry entry); +}; + +struct ARTIndexScanState : public IndexScanState { + ARTIndexScanState() : checked(false), result_index(0) { + } + + Value values[2]; + ExpressionType expressions[2]; + bool checked; + vector result_ids; + Iterator iterator; + //! Stores the current leaf + Leaf *cur_leaf = nullptr; + //! Offset to leaf + idx_t result_index = 0; +}; + +class ART : public Index { +public: + ART(const vector &column_ids, const vector> &unbound_expressions, + bool is_unique = false, bool is_primary = false); + ~ART() override; + + //! Root of the tree + unique_ptr tree; + //! True if machine is little endian + bool is_little_endian; + +public: + //! Initialize a scan on the index with the given expression and column ids + //! to fetch from the base table for a single predicate + unique_ptr InitializeScanSinglePredicate(Transaction &transaction, Value value, + ExpressionType expressionType) override; + + //! Initialize a scan on the index with the given expression and column ids + //! to fetch from the base table for two predicates + unique_ptr InitializeScanTwoPredicates(Transaction &transaction, Value low_value, + ExpressionType low_expression_type, Value high_value, + ExpressionType high_expression_type) override; + + //! Perform a lookup on the index + bool Scan(Transaction &transaction, DataTable &table, IndexScanState &state, idx_t max_count, + vector &result_ids) override; + //! Append entries to the index + bool Append(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override; + //! Verify that data can be appended to the index + void VerifyAppend(DataChunk &chunk) override; + //! Delete entries in the index + void Delete(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override; + //! Insert data into the index. + bool Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override; + + bool SearchEqual(ARTIndexScanState *state, idx_t max_count, vector &result_ids); + //! Search Equal used for Joins that do not need to fetch data + void SearchEqualJoinNoFetch(Value &equal_value, idx_t &result_size); + +private: + DataChunk expression_result; + +private: + //! Insert a row id into a leaf node + bool InsertToLeaf(Leaf &leaf, row_t row_id); + //! Insert the leaf value into the tree + bool Insert(unique_ptr &node, unique_ptr key, unsigned depth, row_t row_id); + + //! Erase element from leaf (if leaf has more than one value) or eliminate the leaf itself + void Erase(unique_ptr &node, Key &key, unsigned depth, row_t row_id); + + //! Check if the key of the leaf is equal to the searched key + bool LeafMatches(Node *node, Key &key, unsigned depth); + + //! Find the node with a matching key, optimistic version + Node *Lookup(unique_ptr &node, Key &key, unsigned depth); + + //! Find the first node that is bigger (or equal to) a specific key + bool Bound(unique_ptr &node, Key &key, Iterator &iterator, bool inclusive); + + //! Gets next node for range queries + bool IteratorNext(Iterator &iter); + + bool SearchGreater(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector &result_ids); + bool SearchLess(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector &result_ids); + bool SearchCloseRange(ARTIndexScanState *state, bool left_inclusive, bool right_inclusive, idx_t max_count, + vector &result_ids); + +private: + template + bool IteratorScan(ARTIndexScanState *state, Iterator *it, Key *upper_bound, idx_t max_count, + vector &result_ids); + + void GenerateKeys(DataChunk &input, vector> &keys); +}; + +} // namespace duckdb + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/parsed_expression_iterator.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +#include + +namespace duckdb { + +class ParsedExpressionIterator { +public: + static void EnumerateChildren(const ParsedExpression &expression, + const std::function &callback); + static void EnumerateChildren(ParsedExpression &expr, const std::function &callback); + static void EnumerateChildren(ParsedExpression &expr, + const std::function &child)> &callback); + + static void EnumerateTableRefChildren(TableRef &ref, + const std::function &child)> &callback); + static void EnumerateQueryNodeChildren(QueryNode &node, + const std::function &child)> &callback); +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/expression_binder/alter_binder.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { +class TableCatalogEntry; + +//! The ALTER binder is responsible for binding an expression within alter statements +class AlterBinder : public ExpressionBinder { +public: + AlterBinder(Binder &binder, ClientContext &context, TableCatalogEntry &table, vector &bound_columns, + LogicalType target_type); + + TableCatalogEntry &table; + vector &bound_columns; + +protected: + BindResult BindExpression(unique_ptr *expr_ptr, idx_t depth, + bool root_expression = false) override; + + BindResult BindColumn(ColumnRefExpression &expr); + + string UnsupportedAggregateMessage() override; +}; + +} // namespace duckdb + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/parser/keyword_helper.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +class KeywordHelper { +public: + //! Returns true if the given text matches a keyword of the parser + static bool IsKeyword(const string &text); + + //! Returns true if the given string needs to be quoted when written as an identifier + static bool RequiresQuotes(const string &text); + + //! Writes a string that is optionally quoted + escaped so it can be used as an identifier + static string WriteOptionallyQuoted(const string &text); +}; + +} // namespace duckdb + + +#include +#include + +namespace duckdb { + +idx_t TableCatalogEntry::GetColumnIndex(string &column_name, bool if_exists) { + auto entry = name_map.find(column_name); + if (entry == name_map.end()) { + // entry not found: try lower-casing the name + entry = name_map.find(StringUtil::Lower(column_name)); + if (entry == name_map.end()) { + if (if_exists) { + return DConstants::INVALID_INDEX; + } + throw BinderException("Table \"%s\" does not have a column with name \"%s\"", name, column_name); + } + } + column_name = columns[entry->second].name; + return idx_t(entry->second); +} + +TableCatalogEntry::TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, BoundCreateTableInfo *info, + std::shared_ptr inherited_storage) + : StandardEntry(CatalogType::TABLE_ENTRY, schema, catalog, info->Base().table), storage(move(inherited_storage)), + columns(move(info->Base().columns)), constraints(move(info->Base().constraints)), + bound_constraints(move(info->bound_constraints)) { + this->temporary = info->Base().temporary; + // add lower case aliases + for (idx_t i = 0; i < columns.size(); i++) { + D_ASSERT(name_map.find(columns[i].name) == name_map.end()); + name_map[columns[i].name] = i; + } + // add the "rowid" alias, if there is no rowid column specified in the table + if (name_map.find("rowid") == name_map.end()) { + name_map["rowid"] = COLUMN_IDENTIFIER_ROW_ID; + } + if (!storage) { + // create the physical storage + vector colum_def_copy; + for (auto &col_def : columns) { + colum_def_copy.push_back(col_def.Copy()); + } + storage = make_shared(catalog->db, schema->name, name, move(colum_def_copy), move(info->data)); + + // create the unique indexes for the UNIQUE and PRIMARY KEY constraints + for (idx_t i = 0; i < bound_constraints.size(); i++) { + auto &constraint = bound_constraints[i]; + if (constraint->type == ConstraintType::UNIQUE) { + // unique constraint: create a unique index + auto &unique = (BoundUniqueConstraint &)*constraint; + // fetch types and create expressions for the index from the columns + vector column_ids; + vector> unbound_expressions; + vector> bound_expressions; + idx_t key_nr = 0; + for (auto &key : unique.keys) { + D_ASSERT(key < columns.size()); + + unbound_expressions.push_back(make_unique( + columns[key].name, columns[key].type, ColumnBinding(0, column_ids.size()))); + + bound_expressions.push_back(make_unique(columns[key].type, key_nr++)); + column_ids.push_back(key); + } + // create an adaptive radix tree around the expressions + auto art = make_unique(column_ids, move(unbound_expressions), true, unique.is_primary_key); + storage->AddIndex(move(art), bound_expressions); + } + } + } +} + +bool TableCatalogEntry::ColumnExists(const string &name) { + return name_map.find(name) != name_map.end(); +} + +unique_ptr TableCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) { + D_ASSERT(!internal); + if (info->type != AlterType::ALTER_TABLE) { + throw CatalogException("Can only modify table with ALTER TABLE statement"); + } + auto table_info = (AlterTableInfo *)info; + switch (table_info->alter_table_type) { + case AlterTableType::RENAME_COLUMN: { + auto rename_info = (RenameColumnInfo *)table_info; + return RenameColumn(context, *rename_info); + } + case AlterTableType::RENAME_TABLE: { + auto rename_info = (RenameTableInfo *)table_info; + auto copied_table = Copy(context); + copied_table->name = rename_info->new_table_name; + return copied_table; + } + case AlterTableType::ADD_COLUMN: { + auto add_info = (AddColumnInfo *)table_info; + return AddColumn(context, *add_info); + } + case AlterTableType::REMOVE_COLUMN: { + auto remove_info = (RemoveColumnInfo *)table_info; + return RemoveColumn(context, *remove_info); + } + case AlterTableType::SET_DEFAULT: { + auto set_default_info = (SetDefaultInfo *)table_info; + return SetDefault(context, *set_default_info); + } + case AlterTableType::ALTER_COLUMN_TYPE: { + auto change_type_info = (ChangeColumnTypeInfo *)table_info; + return ChangeColumnType(context, *change_type_info); + } + default: + throw InternalException("Unrecognized alter table type!"); + } +} + +static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) { + if (expr.type == ExpressionType::COLUMN_REF) { + auto &colref = (ColumnRefExpression &)expr; + if (colref.column_names[0] == info.old_name) { + colref.column_names[0] = info.new_name; + } + } + ParsedExpressionIterator::EnumerateChildren( + expr, [&](const ParsedExpression &child) { RenameExpression((ParsedExpression &)child, info); }); +} + +unique_ptr TableCatalogEntry::RenameColumn(ClientContext &context, RenameColumnInfo &info) { + auto create_info = make_unique(schema->name, name); + create_info->temporary = temporary; + idx_t rename_idx = GetColumnIndex(info.old_name); + for (idx_t i = 0; i < columns.size(); i++) { + ColumnDefinition copy = columns[i].Copy(); + + create_info->columns.push_back(move(copy)); + if (rename_idx == i) { + create_info->columns[i].name = info.new_name; + } + } + for (idx_t c_idx = 0; c_idx < constraints.size(); c_idx++) { + auto copy = constraints[c_idx]->Copy(); + switch (copy->type) { + case ConstraintType::NOT_NULL: + // NOT NULL constraint: no adjustments necessary + break; + case ConstraintType::CHECK: { + // CHECK constraint: need to rename column references that refer to the renamed column + auto &check = (CheckConstraint &)*copy; + RenameExpression(*check.expression, info); + break; + } + case ConstraintType::UNIQUE: { + // UNIQUE constraint: possibly need to rename columns + auto &unique = (UniqueConstraint &)*copy; + for (idx_t i = 0; i < unique.columns.size(); i++) { + if (unique.columns[i] == info.old_name) { + unique.columns[i] = info.new_name; + } + } + break; + } + default: + throw InternalException("Unsupported constraint for entry!"); + } + create_info->constraints.push_back(move(copy)); + } + auto binder = Binder::CreateBinder(context); + auto bound_create_info = binder->BindCreateTableInfo(move(create_info)); + return make_unique(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), storage); +} + +unique_ptr TableCatalogEntry::AddColumn(ClientContext &context, AddColumnInfo &info) { + auto create_info = make_unique(schema->name, name); + create_info->temporary = temporary; + for (idx_t i = 0; i < columns.size(); i++) { + create_info->columns.push_back(columns[i].Copy()); + } + Binder::BindLogicalType(context, info.new_column.type, schema->name); + info.new_column.oid = columns.size(); + create_info->columns.push_back(info.new_column.Copy()); + + auto binder = Binder::CreateBinder(context); + auto bound_create_info = binder->BindCreateTableInfo(move(create_info)); + auto new_storage = + make_shared(context, *storage, info.new_column, bound_create_info->bound_defaults.back().get()); + return make_unique(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), + new_storage); +} + +unique_ptr TableCatalogEntry::RemoveColumn(ClientContext &context, RemoveColumnInfo &info) { + auto create_info = make_unique(schema->name, name); + create_info->temporary = temporary; + idx_t removed_index = GetColumnIndex(info.removed_column, info.if_exists); + if (removed_index == DConstants::INVALID_INDEX) { + return nullptr; + } + for (idx_t i = 0; i < columns.size(); i++) { + if (removed_index != i) { + create_info->columns.push_back(columns[i].Copy()); + } + } + if (create_info->columns.empty()) { + throw CatalogException("Cannot drop column: table only has one column remaining!"); + } + // handle constraints for the new table + D_ASSERT(constraints.size() == bound_constraints.size()); + for (idx_t constr_idx = 0; constr_idx < constraints.size(); constr_idx++) { + auto &constraint = constraints[constr_idx]; + auto &bound_constraint = bound_constraints[constr_idx]; + switch (bound_constraint->type) { + case ConstraintType::NOT_NULL: { + auto ¬_null_constraint = (BoundNotNullConstraint &)*bound_constraint; + if (not_null_constraint.index != removed_index) { + // the constraint is not about this column: we need to copy it + // we might need to shift the index back by one though, to account for the removed column + idx_t new_index = not_null_constraint.index; + if (not_null_constraint.index > removed_index) { + new_index -= 1; + } + create_info->constraints.push_back(make_unique(new_index)); + } + break; + } + case ConstraintType::CHECK: { + // CHECK constraint + auto &bound_check = (BoundCheckConstraint &)*bound_constraint; + // check if the removed column is part of the check constraint + if (bound_check.bound_columns.find(removed_index) != bound_check.bound_columns.end()) { + if (bound_check.bound_columns.size() > 1) { + // CHECK constraint that concerns mult + throw CatalogException( + "Cannot drop column \"%s\" because there is a CHECK constraint that depends on it", + info.removed_column); + } else { + // CHECK constraint that ONLY concerns this column, strip the constraint + } + } else { + // check constraint does not concern the removed column: simply re-add it + create_info->constraints.push_back(constraint->Copy()); + } + break; + } + case ConstraintType::UNIQUE: { + auto copy = constraint->Copy(); + auto &unique = (UniqueConstraint &)*copy; + if (unique.index != DConstants::INVALID_INDEX) { + if (unique.index == removed_index) { + throw CatalogException( + "Cannot drop column \"%s\" because there is a UNIQUE constraint that depends on it", + info.removed_column); + } else if (unique.index > removed_index) { + unique.index--; + } + } + create_info->constraints.push_back(move(copy)); + break; + } + default: + throw InternalException("Unsupported constraint for entry!"); + } + } + + auto binder = Binder::CreateBinder(context); + auto bound_create_info = binder->BindCreateTableInfo(move(create_info)); + auto new_storage = make_shared(context, *storage, removed_index); + return make_unique(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), + new_storage); +} + +unique_ptr TableCatalogEntry::SetDefault(ClientContext &context, SetDefaultInfo &info) { + auto create_info = make_unique(schema->name, name); + idx_t default_idx = GetColumnIndex(info.column_name); + for (idx_t i = 0; i < columns.size(); i++) { + auto copy = columns[i].Copy(); + if (default_idx == i) { + // set the default value of this column + copy.default_value = info.expression ? info.expression->Copy() : nullptr; + } + create_info->columns.push_back(move(copy)); + } + + for (idx_t i = 0; i < constraints.size(); i++) { + auto constraint = constraints[i]->Copy(); + create_info->constraints.push_back(move(constraint)); + } + + auto binder = Binder::CreateBinder(context); + auto bound_create_info = binder->BindCreateTableInfo(move(create_info)); + return make_unique(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), storage); +} + +unique_ptr TableCatalogEntry::ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info) { + auto create_info = make_unique(schema->name, name); + idx_t change_idx = GetColumnIndex(info.column_name); + for (idx_t i = 0; i < columns.size(); i++) { + auto copy = columns[i].Copy(); + if (change_idx == i) { + // set the default value of this column + copy.type = info.target_type; + } + create_info->columns.push_back(move(copy)); + } + + for (idx_t i = 0; i < constraints.size(); i++) { + auto constraint = constraints[i]->Copy(); + switch (constraint->type) { + case ConstraintType::CHECK: { + auto &bound_check = (BoundCheckConstraint &)*bound_constraints[i]; + if (bound_check.bound_columns.find(change_idx) != bound_check.bound_columns.end()) { + throw BinderException("Cannot change the type of a column that has a CHECK constraint specified"); + } + break; + } + case ConstraintType::NOT_NULL: + break; + case ConstraintType::UNIQUE: { + auto &bound_unique = (BoundUniqueConstraint &)*bound_constraints[i]; + if (bound_unique.key_set.find(change_idx) != bound_unique.key_set.end()) { + throw BinderException( + "Cannot change the type of a column that has a UNIQUE or PRIMARY KEY constraint specified"); + } + break; + } + default: + throw InternalException("Unsupported constraint for entry!"); + } + create_info->constraints.push_back(move(constraint)); + } + + auto binder = Binder::CreateBinder(context); + // bind the specified expression + vector bound_columns; + AlterBinder expr_binder(*binder, context, *this, bound_columns, info.target_type); + auto expression = info.expression->Copy(); + auto bound_expression = expr_binder.Bind(expression); + auto bound_create_info = binder->BindCreateTableInfo(move(create_info)); + if (bound_columns.empty()) { + bound_columns.push_back(COLUMN_IDENTIFIER_ROW_ID); + } + + auto new_storage = + make_shared(context, *storage, change_idx, info.target_type, move(bound_columns), *bound_expression); + return make_unique(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), + new_storage); +} + +ColumnDefinition &TableCatalogEntry::GetColumn(const string &name) { + auto entry = name_map.find(name); + if (entry == name_map.end() || entry->second == COLUMN_IDENTIFIER_ROW_ID) { + throw CatalogException("Column with name %s does not exist!", name); + } + return columns[entry->second]; +} + +vector TableCatalogEntry::GetTypes() { + vector types; + for (auto &it : columns) { + types.push_back(it.type); + } + return types; +} + +void TableCatalogEntry::Serialize(Serializer &serializer) { + D_ASSERT(!internal); + serializer.WriteString(schema->name); + serializer.WriteString(name); + D_ASSERT(columns.size() <= NumericLimits::Maximum()); + serializer.Write((uint32_t)columns.size()); + for (auto &column : columns) { + column.Serialize(serializer); + } + D_ASSERT(constraints.size() <= NumericLimits::Maximum()); + serializer.Write((uint32_t)constraints.size()); + for (auto &constraint : constraints) { + constraint->Serialize(serializer); + } +} + +string TableCatalogEntry::ToSQL() { + std::stringstream ss; + + ss << "CREATE TABLE "; + + if (schema->name != DEFAULT_SCHEMA) { + ss << KeywordHelper::WriteOptionallyQuoted(schema->name) << "."; + } + + ss << KeywordHelper::WriteOptionallyQuoted(name) << "("; + + // find all columns that have NOT NULL specified, but are NOT primary key columns + unordered_set not_null_columns; + unordered_set unique_columns; + unordered_set pk_columns; + unordered_set multi_key_pks; + vector extra_constraints; + for (auto &constraint : constraints) { + if (constraint->type == ConstraintType::NOT_NULL) { + auto ¬_null = (NotNullConstraint &)*constraint; + not_null_columns.insert(not_null.index); + } else if (constraint->type == ConstraintType::UNIQUE) { + auto &pk = (UniqueConstraint &)*constraint; + vector constraint_columns = pk.columns; + if (pk.index != DConstants::INVALID_INDEX) { + // no columns specified: single column constraint + if (pk.is_primary_key) { + pk_columns.insert(pk.index); + } else { + unique_columns.insert(pk.index); + } + } else { + // multi-column constraint, this constraint needs to go at the end after all columns + if (pk.is_primary_key) { + // multi key pk column: insert set of columns into multi_key_pks + for (auto &col : pk.columns) { + multi_key_pks.insert(col); + } + } + extra_constraints.push_back(constraint->ToString()); + } + } else { + extra_constraints.push_back(constraint->ToString()); + } + } + + for (idx_t i = 0; i < columns.size(); i++) { + if (i > 0) { + ss << ", "; + } + auto &column = columns[i]; + ss << KeywordHelper::WriteOptionallyQuoted(column.name) << " " << column.type.ToString(); + bool not_null = not_null_columns.find(column.oid) != not_null_columns.end(); + bool is_single_key_pk = pk_columns.find(column.oid) != pk_columns.end(); + bool is_multi_key_pk = multi_key_pks.find(column.name) != multi_key_pks.end(); + bool is_unique = unique_columns.find(column.oid) != unique_columns.end(); + if (not_null && !is_single_key_pk && !is_multi_key_pk) { + // NOT NULL but not a primary key column + ss << " NOT NULL"; + } + if (is_single_key_pk) { + // single column pk: insert constraint here + ss << " PRIMARY KEY"; + } + if (is_unique) { + // single column unique: insert constraint here + ss << " UNIQUE"; + } + if (column.default_value) { + ss << " DEFAULT(" << column.default_value->ToString() << ")"; + } + } + // print any extra constraints that still need to be printed + for (auto &extra_constraint : extra_constraints) { + ss << ", "; + ss << extra_constraint; + } + + ss << ");"; + return ss.str(); +} + +unique_ptr TableCatalogEntry::Deserialize(Deserializer &source) { + auto info = make_unique(); + + info->schema = source.Read(); + info->table = source.Read(); + auto column_count = source.Read(); + + for (uint32_t i = 0; i < column_count; i++) { + auto column = ColumnDefinition::Deserialize(source); + info->columns.push_back(move(column)); + } + auto constraint_count = source.Read(); + + for (uint32_t i = 0; i < constraint_count; i++) { + auto constraint = Constraint::Deserialize(source); + info->constraints.push_back(move(constraint)); + } + return info; +} + +unique_ptr TableCatalogEntry::Copy(ClientContext &context) { + auto create_info = make_unique(schema->name, name); + for (idx_t i = 0; i < columns.size(); i++) { + create_info->columns.push_back(columns[i].Copy()); + } + + for (idx_t i = 0; i < constraints.size(); i++) { + auto constraint = constraints[i]->Copy(); + create_info->constraints.push_back(move(constraint)); + } + + auto binder = Binder::CreateBinder(context); + auto bound_create_info = binder->BindCreateTableInfo(move(create_info)); + return make_unique(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), storage); +} + +void TableCatalogEntry::SetAsRoot() { + storage->SetAsRoot(); +} + +void TableCatalogEntry::CommitAlter(AlterInfo &info) { + D_ASSERT(info.type == AlterType::ALTER_TABLE); + auto &alter_table = (AlterTableInfo &)info; + string column_name; + switch (alter_table.alter_table_type) { + case AlterTableType::REMOVE_COLUMN: { + auto &remove_info = (RemoveColumnInfo &)alter_table; + column_name = remove_info.removed_column; + break; + } + case AlterTableType::ALTER_COLUMN_TYPE: { + auto &change_info = (ChangeColumnTypeInfo &)alter_table; + column_name = change_info.column_name; + break; + } + default: + break; + } + if (column_name.empty()) { + return; + } + idx_t removed_index = DConstants::INVALID_INDEX; + for (idx_t i = 0; i < columns.size(); i++) { + if (columns[i].name == column_name) { + removed_index = i; + break; + } + } + D_ASSERT(removed_index != DConstants::INVALID_INDEX); + storage->CommitDropColumn(removed_index); +} + +void TableCatalogEntry::CommitDrop() { + storage->CommitDropTable(); +} + +} // namespace duckdb + + + +namespace duckdb { + +TableFunctionCatalogEntry::TableFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, + CreateTableFunctionInfo *info) + : StandardEntry(CatalogType::TABLE_FUNCTION_ENTRY, schema, catalog, info->name), functions(move(info->functions)) { + D_ASSERT(this->functions.size() > 0); +} + +} // namespace duckdb + + + + + + + + +#include +#include + +namespace duckdb { + +TypeCatalogEntry::TypeCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateTypeInfo *info) + : StandardEntry(CatalogType::TYPE_ENTRY, schema, catalog, info->name) { + user_type = make_unique(*info->type); +} + +void TypeCatalogEntry::Serialize(Serializer &serializer) { + serializer.WriteString(schema->name); + serializer.WriteString(name); + user_type->Serialize(serializer); +} + +unique_ptr TypeCatalogEntry::Deserialize(Deserializer &source) { + auto info = make_unique(); + info->schema = source.Read(); + info->name = source.Read(); + info->type = make_unique(LogicalType::Deserialize(source)); + return info; +} + +string TypeCatalogEntry::ToSQL() { + std::stringstream ss; + switch (user_type->id()) { + case (LogicalTypeId::ENUM): { + Vector values_insert_order(EnumType::GetValuesInsertOrder(*user_type)); + idx_t size = EnumType::GetSize(*user_type); + ss << "CREATE TYPE "; + ss << name; + ss << " AS ENUM ( "; + + for (idx_t i = 0; i < size; i++) { + ss << "'" << values_insert_order.GetValue(i).ToString() << "'"; + if (i != size - 1) { + ss << ", "; + } + } + ss << ");"; + break; + } + default: + throw InternalException("Logical Type can't be used as a User Defined Type"); + } + + return ss.str(); +} + +} // namespace duckdb + + + + + + + + + +#include + +namespace duckdb { + +void ViewCatalogEntry::Initialize(CreateViewInfo *info) { + query = move(info->query); + this->aliases = info->aliases; + this->types = info->types; + this->temporary = info->temporary; + this->sql = info->sql; + this->internal = info->internal; +} + +ViewCatalogEntry::ViewCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateViewInfo *info) + : StandardEntry(CatalogType::VIEW_ENTRY, schema, catalog, info->view_name) { + Initialize(info); +} + +unique_ptr ViewCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) { + D_ASSERT(!internal); + if (info->type != AlterType::ALTER_VIEW) { + throw CatalogException("Can only modify view with ALTER VIEW statement"); + } + auto view_info = (AlterViewInfo *)info; + switch (view_info->alter_view_type) { + case AlterViewType::RENAME_VIEW: { + auto rename_info = (RenameViewInfo *)view_info; + auto copied_view = Copy(context); + copied_view->name = rename_info->new_view_name; + return copied_view; + } + default: + throw InternalException("Unrecognized alter view type!"); + } +} + +void ViewCatalogEntry::Serialize(Serializer &serializer) { + D_ASSERT(!internal); + serializer.WriteString(schema->name); + serializer.WriteString(name); + serializer.WriteString(sql); + query->Serialize(serializer); + D_ASSERT(aliases.size() <= NumericLimits::Maximum()); + serializer.Write((uint32_t)aliases.size()); + for (auto &alias : aliases) { + serializer.WriteString(alias); + } + serializer.Write((uint32_t)types.size()); + for (auto &sql_type : types) { + sql_type.Serialize(serializer); + } +} + +unique_ptr ViewCatalogEntry::Deserialize(Deserializer &source) { + auto info = make_unique(); + info->schema = source.Read(); + info->view_name = source.Read(); + info->sql = source.Read(); + info->query = SelectStatement::Deserialize(source); + auto alias_count = source.Read(); + for (uint32_t i = 0; i < alias_count; i++) { + info->aliases.push_back(source.Read()); + } + auto type_count = source.Read(); + for (uint32_t i = 0; i < type_count; i++) { + info->types.push_back(LogicalType::Deserialize(source)); + } + return info; +} + +string ViewCatalogEntry::ToSQL() { + if (sql.empty()) { + //! Return empty sql with view name so pragma view_tables don't complain + return sql; + } + return sql + "\n;"; +} + +unique_ptr ViewCatalogEntry::Copy(ClientContext &context) { + D_ASSERT(!internal); + auto create_info = make_unique(schema->name, name); + create_info->query = unique_ptr_cast(query->Copy()); + for (idx_t i = 0; i < aliases.size(); i++) { + create_info->aliases.push_back(aliases[i]); + } + for (idx_t i = 0; i < types.size(); i++) { + create_info->types.push_back(types[i]); + } + create_info->temporary = temporary; + create_info->sql = sql; + + return make_unique(catalog, schema, create_info.get()); +} + +} // namespace duckdb + + + + +namespace duckdb { + +CatalogEntry::CatalogEntry(CatalogType type, Catalog *catalog_p, string name_p) + : oid(catalog_p->ModifyCatalog()), type(type), catalog(catalog_p), set(nullptr), name(move(name_p)), deleted(false), + temporary(false), internal(false), parent(nullptr) { +} + +CatalogEntry::~CatalogEntry() { +} + +void CatalogEntry::SetAsRoot() { +} + +// LCOV_EXCL_START +unique_ptr CatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) { + throw InternalException("Unsupported alter type for catalog entry!"); +} + +unique_ptr CatalogEntry::Copy(ClientContext &context) { + throw InternalException("Unsupported copy type for catalog entry!"); +} + +string CatalogEntry::ToSQL() { + throw InternalException("Unsupported catalog type for ToSQL()"); +} +// LCOV_EXCL_STOP + +} // namespace duckdb + + + + + + + + +namespace duckdb { + +CatalogSearchPath::CatalogSearchPath(ClientContext &context_p) : context(context_p) { + SetPaths(ParsePaths("")); +} + +void CatalogSearchPath::Set(const string &new_value, bool is_set_schema) { + auto new_paths = ParsePaths(new_value); + if (is_set_schema && new_paths.size() != 1) { + throw CatalogException("SET schema can set only 1 schema. This has %d", new_paths.size()); + } + auto &catalog = Catalog::GetCatalog(context); + for (const auto &path : new_paths) { + if (!catalog.GetSchema(context, StringUtil::Lower(path), true)) { + throw CatalogException("SET %s: No schema named %s found.", is_set_schema ? "schema" : "search_path", path); + } + } + this->set_paths = move(new_paths); + SetPaths(set_paths); +} + +const vector &CatalogSearchPath::Get() { + return paths; +} + +const string &CatalogSearchPath::GetOrDefault(const string &name) { + return name == INVALID_SCHEMA ? GetDefault() : name; // NOLINT +} + +const string &CatalogSearchPath::GetDefault() { + const auto &paths = Get(); + D_ASSERT(paths.size() >= 2); + D_ASSERT(paths[0] == TEMP_SCHEMA); + return paths[1]; +} + +void CatalogSearchPath::SetPaths(vector new_paths) { + paths.clear(); + paths.reserve(new_paths.size() + 3); + paths.emplace_back(TEMP_SCHEMA); + for (auto &path : new_paths) { + paths.push_back(move(path)); + } + paths.emplace_back(DEFAULT_SCHEMA); + paths.emplace_back("pg_catalog"); +} + +vector CatalogSearchPath::ParsePaths(const string &value) { + return StringUtil::SplitWithQuote(StringUtil::Lower(value)); +} + +} // namespace duckdb + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/transaction/transaction_manager.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + + + +namespace duckdb { + +class ClientContext; +class Catalog; +struct ClientLockWrapper; +class DatabaseInstance; +class Transaction; + +struct StoredCatalogSet { + //! Stored catalog set + unique_ptr stored_set; + //! The highest active query number when the catalog set was stored; used for cleaning up + transaction_t highest_active_query; +}; + +//! The Transaction Manager is responsible for creating and managing +//! transactions +class TransactionManager { + friend struct CheckpointLock; + +public: + explicit TransactionManager(DatabaseInstance &db); + ~TransactionManager(); + + //! Start a new transaction + Transaction *StartTransaction(ClientContext &context); + //! Commit the given transaction + string CommitTransaction(ClientContext &context, Transaction *transaction); + //! Rollback the given transaction + void RollbackTransaction(Transaction *transaction); + + transaction_t GetQueryNumber() { + return current_query_number++; + } + transaction_t LowestActiveId() { + return lowest_active_id; + } + transaction_t LowestActiveStart() { + return lowest_active_start; + } + + void Checkpoint(ClientContext &context, bool force = false); + + static TransactionManager &Get(ClientContext &context); + static TransactionManager &Get(DatabaseInstance &db); + +private: + bool CanCheckpoint(Transaction *current = nullptr); + //! Remove the given transaction from the list of active transactions + void RemoveTransaction(Transaction *transaction) noexcept; + void LockClients(vector &client_locks, ClientContext &context); + + //! The database instance + DatabaseInstance &db; + //! The current query number + atomic current_query_number; + //! The current start timestamp used by transactions + transaction_t current_start_timestamp; + //! The current transaction ID used by transactions + transaction_t current_transaction_id; + //! The lowest active transaction id + atomic lowest_active_id; + //! The lowest active transaction timestamp + atomic lowest_active_start; + //! Set of currently running transactions + vector> active_transactions; + //! Set of recently committed transactions + vector> recently_committed_transactions; + //! Transactions awaiting GC + vector> old_transactions; + //! Catalog sets + vector old_catalog_sets; + //! The lock used for transaction operations + mutex transaction_lock; + + bool thread_is_checkpointing; +}; + +} // namespace duckdb + + + + + + + + + +namespace duckdb { + +//! Class responsible to keep track of state when removing entries from the catalog. +//! When deleting, many types of errors can be thrown, since we want to avoid try/catch blocks +//! this class makes sure that whatever elements were modified are returned to a correct state +//! when exceptions are thrown. +//! The idea here is to use RAII (Resource acquisition is initialization) to mimic a try/catch/finally block. +//! If any exception is raised when this object exists, then its destructor will be called +//! and the entry will return to its previous state during deconstruction. +class EntryDropper { +public: + //! Both constructor and destructor are privates because they should only be called by DropEntryDependencies + explicit EntryDropper(CatalogSet &catalog_set, idx_t entry_index) + : catalog_set(catalog_set), entry_index(entry_index) { + old_deleted = catalog_set.entries[entry_index].get()->deleted; + } + + ~EntryDropper() { + catalog_set.entries[entry_index].get()->deleted = old_deleted; + } + +private: + //! The current catalog_set + CatalogSet &catalog_set; + //! Keeps track of the state of the entry before starting the delete + bool old_deleted; + //! Index of entry to be deleted + idx_t entry_index; +}; + +CatalogSet::CatalogSet(Catalog &catalog, unique_ptr defaults) + : catalog(catalog), defaults(move(defaults)) { +} + +bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_ptr value, + unordered_set &dependencies) { + auto &transaction = Transaction::GetTransaction(context); + // lock the catalog for writing + lock_guard write_lock(catalog.write_lock); + // lock this catalog set to disallow reading + lock_guard read_lock(catalog_lock); + + // first check if the entry exists in the unordered set + idx_t entry_index; + auto mapping_value = GetMapping(context, name); + if (mapping_value == nullptr || mapping_value->deleted) { + // if it does not: entry has never been created + + // first create a dummy deleted entry for this entry + // so transactions started before the commit of this transaction don't + // see it yet + entry_index = current_entry++; + auto dummy_node = make_unique(CatalogType::INVALID, value->catalog, name); + dummy_node->timestamp = 0; + dummy_node->deleted = true; + dummy_node->set = this; + + entries[entry_index] = move(dummy_node); + PutMapping(context, name, entry_index); + } else { + entry_index = mapping_value->index; + auto ¤t = *entries[entry_index]; + // if it does, we have to check version numbers + if (HasConflict(context, current.timestamp)) { + // current version has been written to by a currently active + // transaction + throw TransactionException("Catalog write-write conflict on create with \"%s\"", current.name); + } + // there is a current version that has been committed + // if it has not been deleted there is a conflict + if (!current.deleted) { + return false; + } + } + // create a new entry and replace the currently stored one + // set the timestamp to the timestamp of the current transaction + // and point it at the dummy node + value->timestamp = transaction.transaction_id; + value->set = this; + + // now add the dependency set of this object to the dependency manager + catalog.dependency_manager->AddObject(context, value.get(), dependencies); + + value->child = move(entries[entry_index]); + value->child->parent = value.get(); + // push the old entry in the undo buffer for this transaction + transaction.PushCatalogEntry(value->child.get()); + entries[entry_index] = move(value); + return true; +} + +bool CatalogSet::GetEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry *&catalog_entry) { + catalog_entry = entries[entry_index].get(); + // if it does: we have to retrieve the entry and to check version numbers + if (HasConflict(context, catalog_entry->timestamp)) { + // current version has been written to by a currently active + // transaction + throw TransactionException("Catalog write-write conflict on alter with \"%s\"", catalog_entry->name); + } + // there is a current version that has been committed by this transaction + if (catalog_entry->deleted) { + // if the entry was already deleted, it now does not exist anymore + // so we return that we could not find it + return false; + } + return true; +} + +bool CatalogSet::GetEntryInternal(ClientContext &context, const string &name, idx_t &entry_index, + CatalogEntry *&catalog_entry) { + auto mapping_value = GetMapping(context, name); + if (mapping_value == nullptr || mapping_value->deleted) { + // the entry does not exist, check if we can create a default entry + return false; + } + entry_index = mapping_value->index; + return GetEntryInternal(context, entry_index, catalog_entry); +} + +bool CatalogSet::AlterOwnership(ClientContext &context, ChangeOwnershipInfo *info) { + idx_t entry_index; + CatalogEntry *entry; + if (!GetEntryInternal(context, info->name, entry_index, entry)) { + return false; + } + + auto owner_entry = catalog.GetEntry(context, info->owner_schema, info->owner_name); + if (!owner_entry) { + return false; + } + + catalog.dependency_manager->AddOwnership(context, owner_entry, entry); + + return true; +} + +bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInfo *alter_info) { + auto &transaction = Transaction::GetTransaction(context); + // lock the catalog for writing + lock_guard write_lock(catalog.write_lock); + + // first check if the entry exists in the unordered set + idx_t entry_index; + CatalogEntry *entry; + if (!GetEntryInternal(context, name, entry_index, entry)) { + return false; + } + if (entry->internal) { + throw CatalogException("Cannot alter entry \"%s\" because it is an internal system entry", entry->name); + } + + // lock this catalog set to disallow reading + lock_guard read_lock(catalog_lock); + + // create a new entry and replace the currently stored one + // set the timestamp to the timestamp of the current transaction + // and point it to the updated table node + string original_name = entry->name; + auto value = entry->AlterEntry(context, alter_info); + if (!value) { + // alter failed, but did not result in an error + return true; + } + + if (value->name != original_name) { + auto mapping_value = GetMapping(context, value->name); + if (mapping_value && !mapping_value->deleted) { + auto entry = GetEntryForTransaction(context, entries[mapping_value->index].get()); + if (!entry->deleted) { + string rename_err_msg = + "Could not rename \"%s\" to \"%s\": another entry with this name already exists!"; + throw CatalogException(rename_err_msg, original_name, value->name); + } + } + PutMapping(context, value->name, entry_index); + DeleteMapping(context, original_name); + } + //! Check the dependency manager to verify that there are no conflicting dependencies with this alter + catalog.dependency_manager->AlterObject(context, entry, value.get()); + + value->timestamp = transaction.transaction_id; + value->child = move(entries[entry_index]); + value->child->parent = value.get(); + value->set = this; + + // serialize the AlterInfo into a temporary buffer + BufferedSerializer serializer; + alter_info->Serialize(serializer); + BinaryData serialized_alter = serializer.GetData(); + + // push the old entry in the undo buffer for this transaction + transaction.PushCatalogEntry(value->child.get(), serialized_alter.data.get(), serialized_alter.size); + entries[entry_index] = move(value); + + return true; +} + +void CatalogSet::DropEntryDependencies(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade) { + + // Stores the deleted value of the entry before starting the process + EntryDropper dropper(*this, entry_index); + + // To correctly delete the object and its dependencies, it temporarily is set to deleted. + entries[entry_index].get()->deleted = true; + + // check any dependencies of this object + entry.catalog->dependency_manager->DropObject(context, &entry, cascade); + + // dropper destructor is called here + // the destructor makes sure to return the value to the previous state + // dropper.~EntryDropper() +} + +void CatalogSet::DropEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade) { + auto &transaction = Transaction::GetTransaction(context); + + DropEntryDependencies(context, entry_index, entry, cascade); + + // create a new entry and replace the currently stored one + // set the timestamp to the timestamp of the current transaction + // and point it at the dummy node + auto value = make_unique(CatalogType::DELETED_ENTRY, entry.catalog, entry.name); + value->timestamp = transaction.transaction_id; + value->child = move(entries[entry_index]); + value->child->parent = value.get(); + value->set = this; + value->deleted = true; + + // push the old entry in the undo buffer for this transaction + transaction.PushCatalogEntry(value->child.get()); + + entries[entry_index] = move(value); +} + +bool CatalogSet::DropEntry(ClientContext &context, const string &name, bool cascade) { + // lock the catalog for writing + lock_guard write_lock(catalog.write_lock); + // we can only delete an entry that exists + idx_t entry_index; + CatalogEntry *entry; + if (!GetEntryInternal(context, name, entry_index, entry)) { + return false; + } + if (entry->internal) { + throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name); + } + + DropEntryInternal(context, entry_index, *entry, cascade); + return true; +} + +void CatalogSet::CleanupEntry(CatalogEntry *catalog_entry) { + // destroy the backed up entry: it is no longer required + D_ASSERT(catalog_entry->parent); + if (catalog_entry->parent->type != CatalogType::UPDATED_ENTRY) { + lock_guard lock(catalog_lock); + if (!catalog_entry->deleted) { + // delete the entry from the dependency manager, if it is not deleted yet + catalog_entry->catalog->dependency_manager->EraseObject(catalog_entry); + } + catalog_entry->parent->child = move(catalog_entry->child); + } +} + +bool CatalogSet::HasConflict(ClientContext &context, transaction_t timestamp) { + auto &transaction = Transaction::GetTransaction(context); + return (timestamp >= TRANSACTION_ID_START && timestamp != transaction.transaction_id) || + (timestamp < TRANSACTION_ID_START && timestamp > transaction.start_time); +} + +MappingValue *CatalogSet::GetMapping(ClientContext &context, const string &name, bool get_latest) { + MappingValue *mapping_value; + auto entry = mapping.find(name); + if (entry != mapping.end()) { + mapping_value = entry->second.get(); + } else { + return nullptr; + } + if (get_latest) { + return mapping_value; + } + while (mapping_value->child) { + if (UseTimestamp(context, mapping_value->timestamp)) { + break; + } + mapping_value = mapping_value->child.get(); + D_ASSERT(mapping_value); + } + return mapping_value; +} + +void CatalogSet::PutMapping(ClientContext &context, const string &name, idx_t entry_index) { + auto entry = mapping.find(name); + auto new_value = make_unique(entry_index); + new_value->timestamp = Transaction::GetTransaction(context).transaction_id; + if (entry != mapping.end()) { + if (HasConflict(context, entry->second->timestamp)) { + throw TransactionException("Catalog write-write conflict on name \"%s\"", name); + } + new_value->child = move(entry->second); + new_value->child->parent = new_value.get(); + } + mapping[name] = move(new_value); +} + +void CatalogSet::DeleteMapping(ClientContext &context, const string &name) { + auto entry = mapping.find(name); + D_ASSERT(entry != mapping.end()); + auto delete_marker = make_unique(entry->second->index); + delete_marker->deleted = true; + delete_marker->timestamp = Transaction::GetTransaction(context).transaction_id; + delete_marker->child = move(entry->second); + delete_marker->child->parent = delete_marker.get(); + mapping[name] = move(delete_marker); +} + +bool CatalogSet::UseTimestamp(ClientContext &context, transaction_t timestamp) { + auto &transaction = Transaction::GetTransaction(context); + if (timestamp == transaction.transaction_id) { + // we created this version + return true; + } + if (timestamp < transaction.start_time) { + // this version was commited before we started the transaction + return true; + } + return false; +} + +CatalogEntry *CatalogSet::GetEntryForTransaction(ClientContext &context, CatalogEntry *current) { + while (current->child) { + if (UseTimestamp(context, current->timestamp)) { + break; + } + current = current->child.get(); + D_ASSERT(current); + } + return current; +} + +CatalogEntry *CatalogSet::GetCommittedEntry(CatalogEntry *current) { + while (current->child) { + if (current->timestamp < TRANSACTION_ID_START) { + // this entry is committed: use it + break; + } + current = current->child.get(); + D_ASSERT(current); + } + return current; +} + +pair CatalogSet::SimilarEntry(ClientContext &context, const string &name) { + lock_guard lock(catalog_lock); + + string result; + idx_t current_score = (idx_t)-1; + for (auto &kv : mapping) { + auto mapping_value = GetMapping(context, kv.first); + if (mapping_value && !mapping_value->deleted) { + auto ldist = StringUtil::LevenshteinDistance(kv.first, name); + if (ldist < current_score) { + current_score = ldist; + result = kv.first; + } + } + } + return {result, current_score}; +} + +CatalogEntry *CatalogSet::CreateEntryInternal(ClientContext &context, unique_ptr entry) { + if (mapping.find(entry->name) != mapping.end()) { + return nullptr; + } + auto &name = entry->name; + auto entry_index = current_entry++; + auto catalog_entry = entry.get(); + + entry->timestamp = 0; + + PutMapping(context, name, entry_index); + mapping[name]->timestamp = 0; + entries[entry_index] = move(entry); + return catalog_entry; +} + +CatalogEntry *CatalogSet::GetEntry(ClientContext &context, const string &name) { + unique_lock lock(catalog_lock); + auto mapping_value = GetMapping(context, name); + if (mapping_value != nullptr && !mapping_value->deleted) { + // we found an entry for this name + // check the version numbers + + auto catalog_entry = entries[mapping_value->index].get(); + CatalogEntry *current = GetEntryForTransaction(context, catalog_entry); + if (current->deleted || (current->name != name && !UseTimestamp(context, mapping_value->timestamp))) { + return nullptr; + } + return current; + } + // no entry found with this name, check for defaults + if (!defaults || defaults->created_all_entries) { + // no defaults either: return null + return nullptr; + } + // this catalog set has a default map defined + // check if there is a default entry that we can create with this name + lock.unlock(); + auto entry = defaults->CreateDefaultEntry(context, name); + + lock.lock(); + if (!entry) { + // no default entry + return nullptr; + } + // there is a default entry! create it + auto result = CreateEntryInternal(context, move(entry)); + if (result) { + return result; + } + // we found a default entry, but failed + // this means somebody else created the entry first + // just retry? + lock.unlock(); + return GetEntry(context, name); +} + +void CatalogSet::UpdateTimestamp(CatalogEntry *entry, transaction_t timestamp) { + entry->timestamp = timestamp; + mapping[entry->name]->timestamp = timestamp; +} + +void CatalogSet::AdjustEnumDependency(CatalogEntry *entry, ColumnDefinition &column, bool remove) { + CatalogEntry *enum_type_catalog = (CatalogEntry *)EnumType::GetCatalog(column.type); + if (enum_type_catalog) { + if (remove) { + catalog.dependency_manager->dependents_map[enum_type_catalog].erase(entry->parent); + catalog.dependency_manager->dependencies_map[entry->parent].erase(enum_type_catalog); + } else { + catalog.dependency_manager->dependents_map[enum_type_catalog].insert(entry); + catalog.dependency_manager->dependencies_map[entry].insert(enum_type_catalog); + } + } +} + +void CatalogSet::AdjustDependency(CatalogEntry *entry, TableCatalogEntry *table, ColumnDefinition &column, + bool remove) { + bool found = false; + if (column.type.id() == LogicalTypeId::ENUM) { + for (auto &old_column : table->columns) { + if (old_column.name == column.name && old_column.type.id() != LogicalTypeId::ENUM) { + AdjustEnumDependency(entry, column, remove); + found = true; + } + } + if (!found) { + AdjustEnumDependency(entry, column, remove); + } + } +} + +void CatalogSet::AdjustTableDependencies(CatalogEntry *entry) { + if (entry->type == CatalogType::TABLE_ENTRY && entry->parent->type == CatalogType::TABLE_ENTRY) { + // If it's a table entry we have to check for possibly removing or adding user type dependencies + auto old_table = (TableCatalogEntry *)entry->parent; + auto new_table = (TableCatalogEntry *)entry; + + for (auto &new_column : new_table->columns) { + AdjustDependency(entry, old_table, new_column, false); + } + for (auto &old_column : old_table->columns) { + AdjustDependency(entry, new_table, old_column, true); + } + } +} + +void CatalogSet::Undo(CatalogEntry *entry) { + lock_guard write_lock(catalog.write_lock); + + lock_guard lock(catalog_lock); + + // entry has to be restored + // and entry->parent has to be removed ("rolled back") + + // i.e. we have to place (entry) as (entry->parent) again + auto &to_be_removed_node = entry->parent; + + AdjustTableDependencies(entry); + + if (!to_be_removed_node->deleted) { + // delete the entry from the dependency manager as well + catalog.dependency_manager->EraseObject(to_be_removed_node); + } + if (entry->name != to_be_removed_node->name) { + // rename: clean up the new name when the rename is rolled back + auto removed_entry = mapping.find(to_be_removed_node->name); + if (removed_entry->second->child) { + removed_entry->second->child->parent = nullptr; + mapping[to_be_removed_node->name] = move(removed_entry->second->child); + } else { + mapping.erase(removed_entry); + } + } + if (to_be_removed_node->parent) { + // if the to be removed node has a parent, set the child pointer to the + // to be restored node + to_be_removed_node->parent->child = move(to_be_removed_node->child); + entry->parent = to_be_removed_node->parent; + } else { + // otherwise we need to update the base entry tables + auto &name = entry->name; + to_be_removed_node->child->SetAsRoot(); + entries[mapping[name]->index] = move(to_be_removed_node->child); + entry->parent = nullptr; + } + + // restore the name if it was deleted + auto restored_entry = mapping.find(entry->name); + if (restored_entry->second->deleted || entry->type == CatalogType::INVALID) { + if (restored_entry->second->child) { + restored_entry->second->child->parent = nullptr; + mapping[entry->name] = move(restored_entry->second->child); + } else { + mapping.erase(restored_entry); + } + } + // we mark the catalog as being modified, since this action can lead to e.g. tables being dropped + entry->catalog->ModifyCatalog(); +} + +void CatalogSet::Scan(ClientContext &context, const std::function &callback) { + // lock the catalog set + unique_lock lock(catalog_lock); + if (defaults && !defaults->created_all_entries) { + // this catalog set has a default set defined: + auto default_entries = defaults->GetDefaultEntries(); + for (auto &default_entry : default_entries) { + auto map_entry = mapping.find(default_entry); + if (map_entry == mapping.end()) { + // we unlock during the CreateEntry, since it might reference other catalog sets... + // specifically for views this can happen since the view will be bound + lock.unlock(); + auto entry = defaults->CreateDefaultEntry(context, default_entry); + + lock.lock(); + CreateEntryInternal(context, move(entry)); + } + } + defaults->created_all_entries = true; + } + for (auto &kv : entries) { + auto entry = kv.second.get(); + entry = GetEntryForTransaction(context, entry); + if (!entry->deleted) { + callback(entry); + } + } +} + +void CatalogSet::Scan(const std::function &callback) { + // lock the catalog set + lock_guard lock(catalog_lock); + for (auto &kv : entries) { + auto entry = kv.second.get(); + entry = GetCommittedEntry(entry); + if (!entry->deleted) { + callback(entry); + } + } +} +} // namespace duckdb + + + + + + +namespace duckdb { + +struct DefaultMacro { + const char *schema; + const char *name; + const char *parameters[8]; + const char *macro; +}; + +static DefaultMacro internal_macros[] = { + {DEFAULT_SCHEMA, "current_user", {nullptr}, "'duckdb'"}, // user name of current execution context + {DEFAULT_SCHEMA, "current_catalog", {nullptr}, "'duckdb'"}, // name of current database (called "catalog" in the SQL standard) + {DEFAULT_SCHEMA, "current_database", {nullptr}, "'duckdb'"}, // name of current database + {DEFAULT_SCHEMA, "user", {nullptr}, "current_user"}, // equivalent to current_user + {DEFAULT_SCHEMA, "session_user", {nullptr}, "'duckdb'"}, // session user name + {"pg_catalog", "inet_client_addr", {nullptr}, "NULL"}, // address of the remote connection + {"pg_catalog", "inet_client_port", {nullptr}, "NULL"}, // port of the remote connection + {"pg_catalog", "inet_server_addr", {nullptr}, "NULL"}, // address of the local connection + {"pg_catalog", "inet_server_port", {nullptr}, "NULL"}, // port of the local connection + {"pg_catalog", "pg_my_temp_schema", {nullptr}, "0"}, // OID of session's temporary schema, or 0 if none + {"pg_catalog", "pg_is_other_temp_schema", {"schema_id", nullptr}, "false"}, // is schema another session's temporary schema? + + {"pg_catalog", "pg_conf_load_time", {nullptr}, "current_timestamp"}, // configuration load time + {"pg_catalog", "pg_postmaster_start_time", {nullptr}, "current_timestamp"}, // server start time + + {"pg_catalog", "pg_typeof", {"expression", nullptr}, "lower(typeof(expression))"}, // get the data type of any value + + // privilege functions + // {"has_any_column_privilege", {"user", "table", "privilege", nullptr}, "true"}, //boolean //does user have privilege for any column of table + {"pg_catalog", "has_any_column_privilege", {"table", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for any column of table + // {"has_column_privilege", {"user", "table", "column", "privilege", nullptr}, "true"}, //boolean //does user have privilege for column + {"pg_catalog", "has_column_privilege", {"table", "column", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for column + // {"has_database_privilege", {"user", "database", "privilege", nullptr}, "true"}, //boolean //does user have privilege for database + {"pg_catalog", "has_database_privilege", {"database", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for database + // {"has_foreign_data_wrapper_privilege", {"user", "fdw", "privilege", nullptr}, "true"}, //boolean //does user have privilege for foreign-data wrapper + {"pg_catalog", "has_foreign_data_wrapper_privilege", {"fdw", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for foreign-data wrapper + // {"has_function_privilege", {"user", "function", "privilege", nullptr}, "true"}, //boolean //does user have privilege for function + {"pg_catalog", "has_function_privilege", {"function", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for function + // {"has_language_privilege", {"user", "language", "privilege", nullptr}, "true"}, //boolean //does user have privilege for language + {"pg_catalog", "has_language_privilege", {"language", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for language + // {"has_schema_privilege", {"user", "schema, privilege", nullptr}, "true"}, //boolean //does user have privilege for schema + {"pg_catalog", "has_schema_privilege", {"schema", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for schema + // {"has_sequence_privilege", {"user", "sequence", "privilege", nullptr}, "true"}, //boolean //does user have privilege for sequence + {"pg_catalog", "has_sequence_privilege", {"sequence", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for sequence + // {"has_server_privilege", {"user", "server", "privilege", nullptr}, "true"}, //boolean //does user have privilege for foreign server + {"pg_catalog", "has_server_privilege", {"server", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for foreign server + // {"has_table_privilege", {"user", "table", "privilege", nullptr}, "true"}, //boolean //does user have privilege for table + {"pg_catalog", "has_table_privilege", {"table", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for table + // {"has_tablespace_privilege", {"user", "tablespace", "privilege", nullptr}, "true"}, //boolean //does user have privilege for tablespace + {"pg_catalog", "has_tablespace_privilege", {"tablespace", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for tablespace + + // various postgres system functions + {"pg_catalog", "pg_get_viewdef", {"oid", nullptr}, "(select sql from duckdb_views() v where v.view_oid=oid)"}, + {"pg_catalog", "pg_get_constraintdef", {"constraint_oid", "pretty_bool", nullptr}, "(select constraint_text from duckdb_constraints() d_constraint where d_constraint.table_oid=constraint_oid/1000000 and d_constraint.constraint_index=constraint_oid%1000000)"}, + {"pg_catalog", "pg_get_expr", {"pg_node_tree", "relation_oid", nullptr}, "pg_node_tree"}, + {"pg_catalog", "format_pg_type", {"type_name", nullptr}, "case when type_name='FLOAT' then 'real' when type_name='DOUBLE' then 'double precision' when type_name='DECIMAL' then 'numeric' when type_name='VARCHAR' then 'character varying' when type_name='BLOB' then 'bytea' when type_name='TIMESTAMP' then 'timestamp without time zone' when type_name='TIME' then 'time without time zone' else lower(type_name) end"}, + {"pg_catalog", "format_type", {"type_oid", "typemod", nullptr}, "(select format_pg_type(type_name) from duckdb_types() t where t.type_oid=type_oid) || case when typemod>0 then concat('(', typemod/1000, ',', typemod%1000, ')') else '' end"}, + + {"pg_catalog", "pg_has_role", {"user", "role", "privilege", nullptr}, "true"}, //boolean //does user have privilege for role + {"pg_catalog", "pg_has_role", {"role", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for role + + {"pg_catalog", "col_description", {"table_oid", "column_number", nullptr}, "NULL"}, // get comment for a table column + {"pg_catalog", "obj_description", {"object_oid", "catalog_name", nullptr}, "NULL"}, // get comment for a database object + {"pg_catalog", "shobj_description", {"object_oid", "catalog_name", nullptr}, "NULL"}, // get comment for a shared database object + + // visibility functions + {"pg_catalog", "pg_collation_is_visible", {"collation_oid", nullptr}, "true"}, + {"pg_catalog", "pg_conversion_is_visible", {"conversion_oid", nullptr}, "true"}, + {"pg_catalog", "pg_function_is_visible", {"function_oid", nullptr}, "true"}, + {"pg_catalog", "pg_opclass_is_visible", {"opclass_oid", nullptr}, "true"}, + {"pg_catalog", "pg_operator_is_visible", {"operator_oid", nullptr}, "true"}, + {"pg_catalog", "pg_opfamily_is_visible", {"opclass_oid", nullptr}, "true"}, + {"pg_catalog", "pg_table_is_visible", {"table_oid", nullptr}, "true"}, + {"pg_catalog", "pg_ts_config_is_visible", {"config_oid", nullptr}, "true"}, + {"pg_catalog", "pg_ts_dict_is_visible", {"dict_oid", nullptr}, "true"}, + {"pg_catalog", "pg_ts_parser_is_visible", {"parser_oid", nullptr}, "true"}, + {"pg_catalog", "pg_ts_template_is_visible", {"template_oid", nullptr}, "true"}, + {"pg_catalog", "pg_type_is_visible", {"type_oid", nullptr}, "true"}, + + {DEFAULT_SCHEMA, "round_even", {"x", "n", nullptr}, "CASE ((abs(x) * power(10, n+1)) % 10) WHEN 5 THEN round(x/2, n) * 2 ELSE round(x, n) END"}, + {DEFAULT_SCHEMA, "roundbankers", {"x", "n", nullptr}, "round_even(x, n)"}, + {DEFAULT_SCHEMA, "nullif", {"a", "b", nullptr}, "CASE WHEN a=b THEN NULL ELSE a END"}, + {DEFAULT_SCHEMA, "list_append", {"l", "e", nullptr}, "list_concat(l, list_value(e))"}, + {DEFAULT_SCHEMA, "array_append", {"arr", "el", nullptr}, "list_append(arr, el)"}, + {DEFAULT_SCHEMA, "list_prepend", {"e", "l", nullptr}, "list_concat(list_value(e), l)"}, + {DEFAULT_SCHEMA, "array_prepend", {"el", "arr", nullptr}, "list_prepend(el, arr)"}, + {DEFAULT_SCHEMA, "generate_subscripts", {"arr", "dim", nullptr}, "unnest(generate_series(1, array_length(arr, dim)))"}, + {nullptr, nullptr, {nullptr}, nullptr}}; + +static unique_ptr GetDefaultFunction(const string &schema, const string &name) { + for (idx_t index = 0; internal_macros[index].name != nullptr; index++) { + if (internal_macros[index].schema == schema && internal_macros[index].name == name) { + // parse the expression + auto expressions = Parser::ParseExpressionList(internal_macros[index].macro); + D_ASSERT(expressions.size() == 1); + + auto result = make_unique(move(expressions[0])); + for (idx_t param_idx = 0; internal_macros[index].parameters[param_idx] != nullptr; param_idx++) { + result->parameters.push_back( + make_unique(internal_macros[index].parameters[param_idx])); + } + + auto bind_info = make_unique(); + bind_info->schema = schema; + bind_info->name = internal_macros[index].name; + bind_info->temporary = true; + bind_info->internal = true; + bind_info->function = move(result); + return move(bind_info); + } + } + return nullptr; +} + +DefaultFunctionGenerator::DefaultFunctionGenerator(Catalog &catalog, SchemaCatalogEntry *schema) + : DefaultGenerator(catalog), schema(schema) { +} + +unique_ptr DefaultFunctionGenerator::CreateDefaultEntry(ClientContext &context, + const string &entry_name) { + auto info = GetDefaultFunction(schema->name, entry_name); + if (info) { + return make_unique_base(&catalog, schema, (CreateMacroInfo *)info.get()); + } + return nullptr; +} + +vector DefaultFunctionGenerator::GetDefaultEntries() { + vector result; + for (idx_t index = 0; internal_macros[index].name != nullptr; index++) { + if (internal_macros[index].schema == schema->name) { + result.emplace_back(internal_macros[index].name); + } + } + return result; +} + +} // namespace duckdb + + + +namespace duckdb { + +struct DefaultSchema { + const char *name; +}; + +static DefaultSchema internal_schemas[] = {{"information_schema"}, {"pg_catalog"}, {nullptr}}; + +static bool GetDefaultSchema(const string &schema) { + for (idx_t index = 0; internal_schemas[index].name != nullptr; index++) { + if (internal_schemas[index].name == schema) { + return true; + } + } + return false; +} + +DefaultSchemaGenerator::DefaultSchemaGenerator(Catalog &catalog) : DefaultGenerator(catalog) { +} + +unique_ptr DefaultSchemaGenerator::CreateDefaultEntry(ClientContext &context, const string &entry_name) { + if (GetDefaultSchema(entry_name)) { + return make_unique_base(&catalog, entry_name, true); + } + return nullptr; +} + +vector DefaultSchemaGenerator::GetDefaultEntries() { + vector result; + for (idx_t index = 0; internal_schemas[index].name != nullptr; index++) { + result.emplace_back(internal_schemas[index].name); + } + return result; +} + +} // namespace duckdb + + + + + + + +namespace duckdb { + +struct DefaultView { + const char *schema; + const char *name; + const char *sql; +}; + +static DefaultView internal_views[] = { + {DEFAULT_SCHEMA, "pragma_database_list", "SELECT * FROM pragma_database_list()"}, + {DEFAULT_SCHEMA, "sqlite_master", "select 'table' \"type\", table_name \"name\", table_name \"tbl_name\", 0 rootpage, sql from duckdb_tables union all select 'view' \"type\", view_name \"name\", view_name \"tbl_name\", 0 rootpage, sql from duckdb_views union all select 'index' \"type\", index_name \"name\", table_name \"tbl_name\", 0 rootpage, sql from duckdb_indexes;"}, + {DEFAULT_SCHEMA, "sqlite_schema", "SELECT * FROM sqlite_master"}, + {DEFAULT_SCHEMA, "sqlite_temp_master", "SELECT * FROM sqlite_master"}, + {DEFAULT_SCHEMA, "sqlite_temp_schema", "SELECT * FROM sqlite_master"}, + {DEFAULT_SCHEMA, "duckdb_constraints", "SELECT * FROM duckdb_constraints()"}, + {DEFAULT_SCHEMA, "duckdb_columns", "SELECT * FROM duckdb_columns() WHERE NOT internal"}, + {DEFAULT_SCHEMA, "duckdb_indexes", "SELECT * FROM duckdb_indexes()"}, + {DEFAULT_SCHEMA, "duckdb_schemas", "SELECT * FROM duckdb_schemas() WHERE NOT internal"}, + {DEFAULT_SCHEMA, "duckdb_tables", "SELECT * FROM duckdb_tables() WHERE NOT internal"}, + {DEFAULT_SCHEMA, "duckdb_types", "SELECT * FROM duckdb_types()"}, + {DEFAULT_SCHEMA, "duckdb_views", "SELECT * FROM duckdb_views() WHERE NOT internal"}, + {"pg_catalog", "pg_am", "SELECT 0 oid, 'art' amname, NULL amhandler, 'i' amtype"}, + {"pg_catalog", "pg_attribute", "SELECT table_oid attrelid, column_name attname, data_type_id atttypid, 0 attstattarget, NULL attlen, column_index attnum, 0 attndims, -1 attcacheoff, case when data_type ilike '%decimal%' then numeric_precision*1000+numeric_scale else -1 end atttypmod, false attbyval, NULL attstorage, NULL attalign, NOT is_nullable attnotnull, column_default IS NOT NULL atthasdef, false atthasmissing, '' attidentity, '' attgenerated, false attisdropped, true attislocal, 0 attinhcount, 0 attcollation, NULL attcompression, NULL attacl, NULL attoptions, NULL attfdwoptions, NULL attmissingval FROM duckdb_columns()"}, + {"pg_catalog", "pg_attrdef", "SELECT column_index oid, table_oid adrelid, column_index adnum, column_default adbin from duckdb_columns() where column_default is not null;"}, + {"pg_catalog", "pg_class", "SELECT table_oid oid, table_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, estimated_size::real reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, index_count > 0 relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'r' relkind, column_count relnatts, check_constraint_count relchecks, false relhasoids, has_primary_key relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_tables() UNION ALL SELECT view_oid oid, view_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'v' relkind, column_count relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_views() UNION ALL SELECT sequence_oid oid, sequence_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'S' relkind, 0 relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_sequences() UNION ALL SELECT index_oid oid, index_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, 't' relpersistence, 'i' relkind, NULL relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_indexes()"}, + {"pg_catalog", "pg_constraint", "SELECT table_oid*1000000+constraint_index oid, constraint_text conname, schema_oid connamespace, CASE WHEN constraint_type='CHECK' then 'c' WHEN constraint_type='UNIQUE' then 'u' WHEN constraint_type='PRIMARY KEY' THEN 'p' ELSE 'x' END contype, false condeferrable, false condeferred, true convalidated, table_oid conrelid, 0 contypid, 0 conindid, 0 conparentid, 0 confrelid, NULL confupdtype, NULL confdeltype, NULL confmatchtype, true conislocal, 0 coninhcount, false connoinherit, constraint_column_indexes conkey, NULL confkey, NULL conpfeqop, NULL conppeqop, NULL conffeqop, NULL conexclop, expression conbin FROM duckdb_constraints()"}, + {"pg_catalog", "pg_depend", "SELECT * FROM duckdb_dependencies()"}, + {"pg_catalog", "pg_description", "SELECT NULL objoid, NULL classoid, NULL objsubid, NULL description WHERE 1=0"}, + {"pg_catalog", "pg_enum", "SELECT NULL oid, NULL enumtypid, NULL enumsortorder, NULL enumlabel WHERE 1=0"}, + {"pg_catalog", "pg_index", "SELECT index_oid indexrelid, table_oid indrelid, 0 indnatts, 0 indnkeyatts, is_unique indisunique, is_primary indisprimary, false indisexclusion, true indimmediate, false indisclustered, true indisvalid, false indcheckxmin, true indisready, true indislive, false indisreplident, NULL::INT[] indkey, NULL::OID[] indcollation, NULL::OID[] indclass, NULL::INT[] indoption, expressions indexprs, NULL indpred FROM duckdb_indexes()"}, + {"pg_catalog", "pg_indexes", "SELECT schema_name schemaname, table_name tablename, index_name indexname, NULL \"tablespace\", sql indexdef FROM duckdb_indexes()"}, + {"pg_catalog", "pg_namespace", "SELECT oid, schema_name nspname, 0 nspowner, NULL nspacl FROM duckdb_schemas()"}, + {"pg_catalog", "pg_sequence", "SELECT sequence_oid seqrelid, 0 seqtypid, start_value seqstart, increment_by seqincrement, max_value seqmax, min_value seqmin, 0 seqcache, cycle seqcycle FROM duckdb_sequences()"}, + {"pg_catalog", "pg_sequences", "SELECT schema_name schemaname, sequence_name sequencename, 'duckdb' sequenceowner, 0 data_type, start_value, min_value, max_value, increment_by, cycle, 0 cache_size, last_value FROM duckdb_sequences()"}, + {"pg_catalog", "pg_tables", "SELECT schema_name schemaname, table_name tablename, 'duckdb' tableowner, NULL \"tablespace\", index_count > 0 hasindexes, false hasrules, false hastriggers FROM duckdb_tables()"}, + {"pg_catalog", "pg_tablespace", "SELECT 0 oid, 'pg_default' spcname, 0 spcowner, NULL spcacl, NULL spcoptions"}, + {"pg_catalog", "pg_type", "SELECT type_oid oid, format_pg_type(type_name) typname, schema_oid typnamespace, 0 typowner, type_size typlen, false typbyval, 'b' typtype, CASE WHEN type_category='NUMERIC' THEN 'N' WHEN type_category='STRING' THEN 'S' WHEN type_category='DATETIME' THEN 'D' WHEN type_category='BOOLEAN' THEN 'B' WHEN type_category='COMPOSITE' THEN 'C' WHEN type_category='USER' THEN 'U' ELSE 'X' END typcategory, false typispreferred, true typisdefined, NULL typdelim, NULL typrelid, NULL typsubscript, NULL typelem, NULL typarray, NULL typinput, NULL typoutput, NULL typreceive, NULL typsend, NULL typmodin, NULL typmodout, NULL typanalyze, 'd' typalign, 'p' typstorage, NULL typnotnull, NULL typbasetype, NULL typtypmod, NULL typndims, NULL typcollation, NULL typdefaultbin, NULL typdefault, NULL typacl FROM duckdb_types();"}, + {"pg_catalog", "pg_views", "SELECT schema_name schemaname, view_name viewname, 'duckdb' viewowner, sql definition FROM duckdb_views()"}, + {"information_schema", "columns", "SELECT NULL table_catalog, schema_name table_schema, table_name, column_name, column_index ordinal_position, column_default, CASE WHEN is_nullable THEN 'YES' ELSE 'NO' END is_nullable, data_type, character_maximum_length, NULL character_octet_length, numeric_precision, numeric_precision_radix, numeric_scale, NULL datetime_precision, NULL interval_type, NULL interval_precision, NULL character_set_catalog, NULL character_set_schema, NULL character_set_name, NULL collation_catalog, NULL collation_schema, NULL collation_name, NULL domain_catalog, NULL domain_schema, NULL domain_name, NULL udt_catalog, NULL udt_schema, NULL udt_name, NULL scope_catalog, NULL scope_schema, NULL scope_name, NULL maximum_cardinality, NULL dtd_identifier, NULL is_self_referencing, NULL is_identity, NULL identity_generation, NULL identity_start, NULL identity_increment, NULL identity_maximum, NULL identity_minimum, NULL identity_cycle, NULL is_generated, NULL generation_expression, NULL is_updatable FROM duckdb_columns;"}, + {"information_schema", "schemata", "SELECT NULL catalog_name, schema_name, 'duckdb' schema_owner, NULL default_character_set_catalog, NULL default_character_set_schema, NULL default_character_set_name, sql sql_path FROM duckdb_schemas()"}, + {"information_schema", "tables", "SELECT NULL table_catalog, schema_name table_schema, table_name, CASE WHEN temporary THEN 'LOCAL TEMPORARY' ELSE 'BASE TABLE' END table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'YES' is_insertable_into, 'NO' is_typed, CASE WHEN temporary THEN 'PRESERVE' ELSE NULL END commit_action FROM duckdb_tables() UNION ALL SELECT NULL table_catalog, schema_name table_schema, view_name table_name, 'VIEW' table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'NO' is_insertable_into, 'NO' is_typed, NULL commit_action FROM duckdb_views;"}, + {nullptr, nullptr, nullptr}}; + +static unique_ptr GetDefaultView(const string &schema, const string &name) { + for (idx_t index = 0; internal_views[index].name != nullptr; index++) { + if (internal_views[index].schema == schema && internal_views[index].name == name) { + auto result = make_unique(); + result->schema = schema; + result->sql = internal_views[index].sql; + + Parser parser; + parser.ParseQuery(internal_views[index].sql); + D_ASSERT(parser.statements.size() == 1 && parser.statements[0]->type == StatementType::SELECT_STATEMENT); + result->query = unique_ptr_cast(move(parser.statements[0])); + result->temporary = true; + result->internal = true; + result->view_name = name; + return result; + } + } + return nullptr; +} + +DefaultViewGenerator::DefaultViewGenerator(Catalog &catalog, SchemaCatalogEntry *schema) + : DefaultGenerator(catalog), schema(schema) { +} + +unique_ptr DefaultViewGenerator::CreateDefaultEntry(ClientContext &context, const string &entry_name) { + auto info = GetDefaultView(schema->name, entry_name); + if (info) { + auto binder = Binder::CreateBinder(context); + binder->BindCreateViewInfo(*info); + + return make_unique_base(&catalog, schema, info.get()); + } + return nullptr; +} + +vector DefaultViewGenerator::GetDefaultEntries() { + vector result; + for (idx_t index = 0; internal_views[index].name != nullptr; index++) { + if (internal_views[index].schema == schema->name) { + result.emplace_back(internal_views[index].name); + } + } + return result; + +} + +} // namespace duckdb + + + + + + + + + +namespace duckdb { + +DependencyManager::DependencyManager(Catalog &catalog) : catalog(catalog) { +} + +void DependencyManager::AddObject(ClientContext &context, CatalogEntry *object, + unordered_set &dependencies) { + // check for each object in the sources if they were not deleted yet + for (auto &dependency : dependencies) { + idx_t entry_index; + CatalogEntry *catalog_entry; + if (!dependency->set->GetEntryInternal(context, dependency->name, entry_index, catalog_entry)) { + throw InternalException("Dependency has already been deleted?"); + } + } + // indexes do not require CASCADE to be dropped, they are simply always dropped along with the table + auto dependency_type = object->type == CatalogType::INDEX_ENTRY ? DependencyType::DEPENDENCY_AUTOMATIC + : DependencyType::DEPENDENCY_REGULAR; + // add the object to the dependents_map of each object that it depends on + for (auto &dependency : dependencies) { + dependents_map[dependency].insert(Dependency(object, dependency_type)); + } + // create the dependents map for this object: it starts out empty + dependents_map[object] = dependency_set_t(); + dependencies_map[object] = dependencies; +} + +void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object, bool cascade) { + D_ASSERT(dependents_map.find(object) != dependents_map.end()); + + // first check the objects that depend on this object + auto &dependent_objects = dependents_map[object]; + for (auto &dep : dependent_objects) { + // look up the entry in the catalog set + auto &catalog_set = *dep.entry->set; + auto mapping_value = catalog_set.GetMapping(context, dep.entry->name, true /* get_latest */); + if (mapping_value == nullptr) { + continue; + } + idx_t entry_index = mapping_value->index; + CatalogEntry *dependency_entry; + + if (!catalog_set.GetEntryInternal(context, entry_index, dependency_entry)) { + // the dependent object was already deleted, no conflict + continue; + } + // conflict: attempting to delete this object but the dependent object still exists + if (cascade || dep.dependency_type == DependencyType::DEPENDENCY_AUTOMATIC || + dep.dependency_type == DependencyType::DEPENDENCY_OWNS) { + // cascade: drop the dependent object + catalog_set.DropEntryInternal(context, entry_index, *dependency_entry, cascade); + } else { + // no cascade and there are objects that depend on this object: throw error + throw CatalogException("Cannot drop entry \"%s\" because there are entries that " + "depend on it. Use DROP...CASCADE to drop all dependents.", + object->name); + } + } +} + +void DependencyManager::AlterObject(ClientContext &context, CatalogEntry *old_obj, CatalogEntry *new_obj) { + D_ASSERT(dependents_map.find(old_obj) != dependents_map.end()); + D_ASSERT(dependencies_map.find(old_obj) != dependencies_map.end()); + + // first check the objects that depend on this object + vector owned_objects_to_add; + auto &dependent_objects = dependents_map[old_obj]; + for (auto &dep : dependent_objects) { + // look up the entry in the catalog set + auto &catalog_set = *dep.entry->set; + idx_t entry_index; + CatalogEntry *dependency_entry; + if (!catalog_set.GetEntryInternal(context, dep.entry->name, entry_index, dependency_entry)) { + // the dependent object was already deleted, no conflict + continue; + } + if (dep.dependency_type == DependencyType::DEPENDENCY_OWNS) { + // the dependent object is owned by the current object + owned_objects_to_add.push_back(dep.entry); + continue; + } + // conflict: attempting to alter this object but the dependent object still exists + // no cascade and there are objects that depend on this object: throw error + throw CatalogException("Cannot alter entry \"%s\" because there are entries that " + "depend on it.", + old_obj->name); + } + // add the new object to the dependents_map of each object that it depends on + auto &old_dependencies = dependencies_map[old_obj]; + vector to_delete; + for (auto &dependency : old_dependencies) { + if (dependency->type == CatalogType::TYPE_ENTRY) { + auto user_type = (TypeCatalogEntry *)dependency; + auto table = (TableCatalogEntry *)new_obj; + bool deleted_dependency = true; + for (auto &column : table->columns) { + if (column.type == *user_type->user_type) { + deleted_dependency = false; + break; + } + } + if (deleted_dependency) { + to_delete.push_back(dependency); + continue; + } + } + dependents_map[dependency].insert(new_obj); + } + for (auto &dependency : to_delete) { + old_dependencies.erase(dependency); + dependents_map[dependency].erase(old_obj); + } + + // We might have to add a type dependency + vector to_add; + if (new_obj->type == CatalogType::TABLE_ENTRY) { + auto table = (TableCatalogEntry *)new_obj; + for (auto &column : table->columns) { + if (column.type.id() == LogicalTypeId::ENUM) { + auto enum_type_catalog = EnumType::GetCatalog(column.type); + if (enum_type_catalog) { + to_add.push_back(enum_type_catalog); + } + } + } + } + // add the new object to the dependency manager + dependents_map[new_obj] = dependency_set_t(); + dependencies_map[new_obj] = old_dependencies; + + for (auto &dependency : to_add) { + dependencies_map[new_obj].insert(dependency); + dependents_map[dependency].insert(new_obj); + } + + for (auto &dependency : owned_objects_to_add) { + dependents_map[new_obj].insert(Dependency(dependency, DependencyType::DEPENDENCY_OWNS)); + dependents_map[dependency].insert(Dependency(new_obj, DependencyType::DEPENDENCY_OWNED_BY)); + dependencies_map[new_obj].insert(dependency); + } +} + +void DependencyManager::EraseObject(CatalogEntry *object) { + // obtain the writing lock + EraseObjectInternal(object); +} + +void DependencyManager::EraseObjectInternal(CatalogEntry *object) { + if (dependents_map.find(object) == dependents_map.end()) { + // dependencies already removed + return; + } + D_ASSERT(dependents_map.find(object) != dependents_map.end()); + D_ASSERT(dependencies_map.find(object) != dependencies_map.end()); + // now for each of the dependencies, erase the entries from the dependents_map + for (auto &dependency : dependencies_map[object]) { + auto entry = dependents_map.find(dependency); + if (entry != dependents_map.end()) { + D_ASSERT(entry->second.find(object) != entry->second.end()); + entry->second.erase(object); + } + } + // erase the dependents and dependencies for this object + dependents_map.erase(object); + dependencies_map.erase(object); +} + +void DependencyManager::Scan(const std::function &callback) { + lock_guard write_lock(catalog.write_lock); + for (auto &entry : dependents_map) { + for (auto &dependent : entry.second) { + callback(entry.first, dependent.entry, dependent.dependency_type); + } + } +} + +void DependencyManager::AddOwnership(ClientContext &context, CatalogEntry *owner, CatalogEntry *entry) { + // lock the catalog for writing + lock_guard write_lock(catalog.write_lock); + + // If the owner is already owned by something else, throw an error + for (auto &dep : dependents_map[owner]) { + if (dep.dependency_type == DependencyType::DEPENDENCY_OWNED_BY) { + throw CatalogException(owner->name + " already owned by " + dep.entry->name); + } + } + + // If the entry is already owned, throw an error + for (auto &dep : dependents_map[entry]) { + // if the entry is already owned, throw error + if (dep.entry != owner) { + throw CatalogException(entry->name + " already depends on " + dep.entry->name); + } + // if the entry owns the owner, throw error + if (dep.entry == owner && dep.dependency_type == DependencyType::DEPENDENCY_OWNS) { + throw CatalogException(entry->name + " already owns " + owner->name + + ". Cannot have circular dependencies"); + } + } + + // Emplace guarantees that the same object cannot be inserted twice in the unordered_set + // In the case AddOwnership is called twice, because of emplace, the object will not be repeated in the set. + // We use an automatic dependency because if the Owner gets deleted, then the owned objects are also deleted + dependents_map[owner].emplace(Dependency(entry, DependencyType::DEPENDENCY_OWNS)); + dependents_map[entry].emplace(Dependency(owner, DependencyType::DEPENDENCY_OWNED_BY)); + dependencies_map[owner].emplace(entry); +} + +} // namespace duckdb + + +namespace duckdb { + +AllocatedData::AllocatedData(Allocator &allocator, data_ptr_t pointer, idx_t allocated_size) + : allocator(allocator), pointer(pointer), allocated_size(allocated_size) { +} +AllocatedData::~AllocatedData() { + Reset(); +} + +void AllocatedData::Reset() { + if (!pointer) { + return; + } + allocator.FreeData(pointer, allocated_size); + pointer = nullptr; +} + +Allocator::Allocator() + : allocate_function(Allocator::DefaultAllocate), free_function(Allocator::DefaultFree), + reallocate_function(Allocator::DefaultReallocate) { +} + +Allocator::Allocator(allocate_function_ptr_t allocate_function_p, free_function_ptr_t free_function_p, + reallocate_function_ptr_t reallocate_function_p, unique_ptr private_data) + : allocate_function(allocate_function_p), free_function(free_function_p), + reallocate_function(reallocate_function_p), private_data(move(private_data)) { +} + +data_ptr_t Allocator::AllocateData(idx_t size) { + return allocate_function(private_data.get(), size); +} + +void Allocator::FreeData(data_ptr_t pointer, idx_t size) { + if (!pointer) { + return; + } + return free_function(private_data.get(), pointer, size); +} + +data_ptr_t Allocator::ReallocateData(data_ptr_t pointer, idx_t size) { + if (!pointer) { + return pointer; + } + return reallocate_function(private_data.get(), pointer, size); +} + +} // namespace duckdb + + + + + + +namespace duckdb { + +ArrowSchemaWrapper::~ArrowSchemaWrapper() { + if (arrow_schema.release) { + for (int64_t child_idx = 0; child_idx < arrow_schema.n_children; child_idx++) { + auto &child = *arrow_schema.children[child_idx]; + if (child.release) { + child.release(&child); + } + } + arrow_schema.release(&arrow_schema); + arrow_schema.release = nullptr; + } +} + +ArrowArrayWrapper::~ArrowArrayWrapper() { + if (arrow_array.release) { + for (int64_t child_idx = 0; child_idx < arrow_array.n_children; child_idx++) { + auto &child = *arrow_array.children[child_idx]; + if (child.release) { + child.release(&child); + } + } + arrow_array.release(&arrow_array); + arrow_array.release = nullptr; + } +} + +ArrowArrayStreamWrapper::~ArrowArrayStreamWrapper() { + if (arrow_array_stream.release) { + arrow_array_stream.release(&arrow_array_stream); + arrow_array_stream.release = nullptr; + } +} + +void ArrowArrayStreamWrapper::GetSchema(ArrowSchemaWrapper &schema) { + D_ASSERT(arrow_array_stream.get_schema); + // LCOV_EXCL_START + if (arrow_array_stream.get_schema(&arrow_array_stream, &schema.arrow_schema)) { + throw InvalidInputException("arrow_scan: get_schema failed(): %s", string(GetError())); + } + if (!schema.arrow_schema.release) { + throw InvalidInputException("arrow_scan: released schema passed"); + } + if (schema.arrow_schema.n_children < 1) { + throw InvalidInputException("arrow_scan: empty schema passed"); + } + // LCOV_EXCL_STOP +} + +unique_ptr ArrowArrayStreamWrapper::GetNextChunk() { + auto current_chunk = make_unique(); + if (arrow_array_stream.get_next(&arrow_array_stream, ¤t_chunk->arrow_array)) { // LCOV_EXCL_START + throw InvalidInputException("arrow_scan: get_next failed(): %s", string(GetError())); + } // LCOV_EXCL_STOP + + return current_chunk; +} + +const char *ArrowArrayStreamWrapper::GetError() { // LCOV_EXCL_START + return arrow_array_stream.get_last_error(&arrow_array_stream); +} // LCOV_EXCL_STOP + +int ResultArrowArrayStreamWrapper::MyStreamGetSchema(struct ArrowArrayStream *stream, struct ArrowSchema *out) { + if (!stream->release) { + return -1; + } + auto my_stream = (ResultArrowArrayStreamWrapper *)stream->private_data; + auto &result = *my_stream->result; + if (!result.success) { + my_stream->last_error = "Query Failed"; + return -1; + } + if (result.type == QueryResultType::STREAM_RESULT) { + auto &stream_result = (StreamQueryResult &)result; + if (!stream_result.IsOpen()) { + my_stream->last_error = "Query Stream is closed"; + return -1; + } + } + result.ToArrowSchema(out); + return 0; +} + +int ResultArrowArrayStreamWrapper::MyStreamGetNext(struct ArrowArrayStream *stream, struct ArrowArray *out) { + if (!stream->release) { + return -1; + } + auto my_stream = (ResultArrowArrayStreamWrapper *)stream->private_data; + auto &result = *my_stream->result; + if (!result.success) { + my_stream->last_error = "Query Failed"; + return -1; + } + if (result.type == QueryResultType::STREAM_RESULT) { + auto &stream_result = (StreamQueryResult &)result; + if (!stream_result.IsOpen()) { + my_stream->last_error = "Query Stream is closed"; + return -1; + } + } + unique_ptr chunk_result = result.Fetch(); + if (!chunk_result) { + // Nothing to output + out->release = nullptr; + return 0; + } + for (idx_t i = 1; i < my_stream->vectors_per_chunk; i++) { + auto new_chunk = result.Fetch(); + if (!new_chunk) { + break; + } else { + chunk_result->Append(*new_chunk, true); + } + } + chunk_result->ToArrowArray(out); + return 0; +} + +void ResultArrowArrayStreamWrapper::MyStreamRelease(struct ArrowArrayStream *stream) { + if (!stream->release) { + return; + } + stream->release = nullptr; + delete (ResultArrowArrayStreamWrapper *)stream->private_data; +} + +const char *ResultArrowArrayStreamWrapper::MyStreamGetLastError(struct ArrowArrayStream *stream) { + if (!stream->release) { + return "stream was released"; + } + D_ASSERT(stream->private_data); + auto my_stream = (ResultArrowArrayStreamWrapper *)stream->private_data; + return my_stream->last_error.c_str(); +} +ResultArrowArrayStreamWrapper::ResultArrowArrayStreamWrapper(unique_ptr result_p, idx_t approx_batch_size) + : result(move(result_p)) { + //! We first initialize the private data of the stream + stream.private_data = this; + //! Ceil Approx_Batch_Size/STANDARD_VECTOR_SIZE + if (approx_batch_size == 0) { + throw std::runtime_error("Approximate Batch Size of Record Batch MUST be higher than 0"); + } + vectors_per_chunk = (approx_batch_size + STANDARD_VECTOR_SIZE - 1) / STANDARD_VECTOR_SIZE; + //! We initialize the stream functions + stream.get_schema = ResultArrowArrayStreamWrapper::MyStreamGetSchema; + stream.get_next = ResultArrowArrayStreamWrapper::MyStreamGetNext; + stream.release = ResultArrowArrayStreamWrapper::MyStreamRelease; + stream.get_last_error = ResultArrowArrayStreamWrapper::MyStreamGetLastError; +} + +} // namespace duckdb + + +namespace duckdb { + +void DuckDBAssertInternal(bool condition, const char *condition_name, const char *file, int linenr) { + if (condition) { + return; + } + throw InternalException("Assertion triggered in file \"%s\" on line %d: %s", file, linenr, condition_name); +} + +} // namespace duckdb +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/checksum.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +//! Compute a checksum over a buffer of size size +uint64_t Checksum(uint8_t *buffer, size_t size); + +} // namespace duckdb + + + +namespace duckdb { + +uint64_t Checksum(uint8_t *buffer, size_t size) { + uint64_t result = 5381; + uint64_t *ptr = (uint64_t *)buffer; + size_t i; + // for efficiency, we first hash uint64_t values + for (i = 0; i < size / 8; i++) { + result ^= Hash(ptr[i]); + } + if (size - i * 8 > 0) { + // the remaining 0-7 bytes we hash using a string hash + result ^= Hash(buffer + i * 8, size - i * 8); + } + return result; +} + +} // namespace duckdb + + +namespace duckdb { + +StreamWrapper::~StreamWrapper() { +} + +CompressedFile::CompressedFile(CompressedFileSystem &fs, unique_ptr child_handle_p, const string &path) + : FileHandle(fs, path), compressed_fs(fs), child_handle(move(child_handle_p)) { +} + +CompressedFile::~CompressedFile() { + Close(); +} + +void CompressedFile::Initialize(bool write) { + Close(); + + this->write = write; + stream_data.in_buf_size = compressed_fs.InBufferSize(); + stream_data.out_buf_size = compressed_fs.OutBufferSize(); + stream_data.in_buff = unique_ptr(new data_t[stream_data.in_buf_size]); + stream_data.in_buff_start = stream_data.in_buff.get(); + stream_data.in_buff_end = stream_data.in_buff.get(); + stream_data.out_buff = unique_ptr(new data_t[stream_data.out_buf_size]); + stream_data.out_buff_start = stream_data.out_buff.get(); + stream_data.out_buff_end = stream_data.out_buff.get(); + + stream_wrapper = compressed_fs.CreateStream(); + stream_wrapper->Initialize(*this, write); +} + +int64_t CompressedFile::ReadData(void *buffer, int64_t remaining) { + idx_t total_read = 0; + while (true) { + // first check if there are input bytes available in the output buffers + if (stream_data.out_buff_start != stream_data.out_buff_end) { + // there is! copy it into the output buffer + idx_t available = MinValue(remaining, stream_data.out_buff_end - stream_data.out_buff_start); + memcpy(data_ptr_t(buffer) + total_read, stream_data.out_buff_start, available); + + // increment the total read variables as required + stream_data.out_buff_start += available; + total_read += available; + remaining -= available; + if (remaining == 0) { + // done! read enough + return total_read; + } + } + if (!stream_wrapper) { + return total_read; + } + + // ran out of buffer: read more data from the child stream + stream_data.out_buff_start = stream_data.out_buff.get(); + stream_data.out_buff_end = stream_data.out_buff.get(); + D_ASSERT(stream_data.in_buff_start <= stream_data.in_buff_end); + D_ASSERT(stream_data.in_buff_end <= stream_data.in_buff_start + stream_data.in_buf_size); + + // read more input if none available + if (stream_data.in_buff_start == stream_data.in_buff_end) { + // empty input buffer: refill from the start + stream_data.in_buff_start = stream_data.in_buff.get(); + stream_data.in_buff_end = stream_data.in_buff_start; + auto sz = child_handle->Read(stream_data.in_buff.get(), stream_data.in_buf_size); + if (sz <= 0) { + stream_wrapper.reset(); + break; + } + stream_data.in_buff_end = stream_data.in_buff_start + sz; + } + + auto finished = stream_wrapper->Read(stream_data); + if (finished) { + stream_wrapper.reset(); + } + } + return total_read; +} + +int64_t CompressedFile::WriteData(data_ptr_t buffer, int64_t nr_bytes) { + stream_wrapper->Write(*this, stream_data, buffer, nr_bytes); + return nr_bytes; +} + +void CompressedFile::Close() { + if (stream_wrapper) { + stream_wrapper->Close(); + stream_wrapper.reset(); + } + stream_data.in_buff.reset(); + stream_data.out_buff.reset(); + stream_data.out_buff_start = nullptr; + stream_data.out_buff_end = nullptr; + stream_data.in_buff_start = nullptr; + stream_data.in_buff_end = nullptr; + stream_data.in_buf_size = 0; + stream_data.out_buf_size = 0; +} + +int64_t CompressedFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { + auto &compressed_file = (CompressedFile &)handle; + return compressed_file.ReadData(buffer, nr_bytes); +} + +int64_t CompressedFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { + auto &compressed_file = (CompressedFile &)handle; + return compressed_file.WriteData((data_ptr_t)buffer, nr_bytes); +} + +void CompressedFileSystem::Reset(FileHandle &handle) { + auto &compressed_file = (CompressedFile &)handle; + compressed_file.child_handle->Reset(); + compressed_file.Initialize(compressed_file.write); +} + +int64_t CompressedFileSystem::GetFileSize(FileHandle &handle) { + auto &compressed_file = (CompressedFile &)handle; + return compressed_file.child_handle->GetFileSize(); +} + +bool CompressedFileSystem::OnDiskFile(FileHandle &handle) { + auto &compressed_file = (CompressedFile &)handle; + return compressed_file.child_handle->OnDiskFile(); +} + +bool CompressedFileSystem::CanSeek() { + return false; +} + +} // namespace duckdb + + + + +namespace duckdb { + +constexpr const idx_t DConstants::INVALID_INDEX; +const row_t MAX_ROW_ID = 4611686018427388000ULL; // 2^62 +const column_t COLUMN_IDENTIFIER_ROW_ID = (column_t)-1; +const sel_t ZERO_VECTOR[STANDARD_VECTOR_SIZE] = {0}; +const double PI = 3.141592653589793; + +const transaction_t TRANSACTION_ID_START = 4611686018427388000ULL; // 2^62 +const transaction_t MAX_TRANSACTION_ID = NumericLimits::Maximum(); // 2^63 +const transaction_t NOT_DELETED_ID = NumericLimits::Maximum() - 1; // 2^64 - 1 +const transaction_t MAXIMUM_QUERY_ID = NumericLimits::Maximum(); // 2^64 + +uint64_t NextPowerOfTwo(uint64_t v) { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v++; + return v; +} + +} // namespace duckdb +/* +** This code taken from the SQLite test library. Originally found on +** the internet. The original header comment follows this comment. +** The code is largerly unchanged, but there have been some modifications. +*/ +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/crypto/md5.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { + +class MD5Context { +public: + static constexpr idx_t MD5_HASH_LENGTH_BINARY = 16; + static constexpr idx_t MD5_HASH_LENGTH_TEXT = 32; + +public: + MD5Context(); + + void Add(const_data_ptr_t data, idx_t len) { + MD5Update(data, len); + } + void Add(const char *data); + void Add(string_t string) { + MD5Update((const_data_ptr_t)string.GetDataUnsafe(), string.GetSize()); + } + void Add(const string &data) { + MD5Update((const_data_ptr_t)data.c_str(), data.size()); + } + + //! Write the 16-byte (binary) digest to the specified location + void Finish(data_ptr_t out_digest); + //! Write the 32-character digest (in hexadecimal format) to the specified location + void FinishHex(char *out_digest); + //! Returns the 32-character digest (in hexadecimal format) as a string + string FinishHex(); + +private: + void MD5Update(const_data_ptr_t data, idx_t len); + static void DigestToBase16(const_data_ptr_t digest, char *zBuf); + + uint32_t buf[4]; + uint32_t bits[2]; + unsigned char in[64]; +}; + +} // namespace duckdb + + +namespace duckdb { + +/* + * Note: this code is harmless on little-endian machines. + */ +static void ByteReverse(unsigned char *buf, unsigned longs) { + uint32_t t; + do { + t = (uint32_t)((unsigned)buf[3] << 8 | buf[2]) << 16 | ((unsigned)buf[1] << 8 | buf[0]); + *(uint32_t *)buf = t; + buf += 4; + } while (--longs); +} +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) ((x) ^ (y) ^ (z)) +#define F4(x, y, z) ((y) ^ ((x) | ~(z))) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) ((w) += f(x, y, z) + (data), (w) = (w) << (s) | (w) >> (32 - (s)), (w) += (x)) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +static void MD5Transform(uint32_t buf[4], const uint32_t in[16]) { + uint32_t a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +MD5Context::MD5Context() { + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + bits[0] = 0; + bits[1] = 0; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +void MD5Context::MD5Update(const_data_ptr_t input, idx_t len) { + uint32_t t; + + /* Update bitcount */ + + t = bits[0]; + if ((bits[0] = t + ((uint32_t)len << 3)) < t) { + bits[1]++; /* Carry from low to high */ + } + bits[1] += len >> 29; + + t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ + + /* Handle any leading odd-sized chunks */ + + if (t) { + unsigned char *p = (unsigned char *)in + t; + + t = 64 - t; + if (len < t) { + memcpy(p, input, len); + return; + } + memcpy(p, input, t); + ByteReverse(in, 16); + MD5Transform(buf, (uint32_t *)in); + input += t; + len -= t; + } + + /* Process data in 64-byte chunks */ + + while (len >= 64) { + memcpy(in, input, 64); + ByteReverse(in, 16); + MD5Transform(buf, (uint32_t *)in); + input += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + memcpy(in, input, len); +} + +/* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ +void MD5Context::Finish(data_ptr_t out_digest) { + unsigned count; + unsigned char *p; + + /* Compute number of bytes mod 64 */ + count = (bits[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = in + count; + *p++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, count); + ByteReverse(in, 16); + MD5Transform(buf, (uint32_t *)in); + + /* Now fill the next block with 56 bytes */ + memset(in, 0, 56); + } else { + /* Pad block to 56 bytes */ + memset(p, 0, count - 8); + } + ByteReverse(in, 14); + + /* Append length in bits and transform */ + ((uint32_t *)in)[14] = bits[0]; + ((uint32_t *)in)[15] = bits[1]; + + MD5Transform(buf, (uint32_t *)in); + ByteReverse((unsigned char *)buf, 4); + memcpy(out_digest, buf, 16); +} + +void MD5Context::DigestToBase16(const_data_ptr_t digest, char *zbuf) { + static char const HEX_CODES[] = "0123456789abcdef"; + int i, j; + + for (j = i = 0; i < 16; i++) { + int a = digest[i]; + zbuf[j++] = HEX_CODES[(a >> 4) & 0xf]; + zbuf[j++] = HEX_CODES[a & 0xf]; + } +} + +void MD5Context::FinishHex(char *out_digest) { + data_t digest[MD5_HASH_LENGTH_BINARY]; + Finish(digest); + DigestToBase16(digest, out_digest); +} + +string MD5Context::FinishHex() { + char digest[MD5_HASH_LENGTH_TEXT]; + FinishHex(digest); + return string(digest, MD5_HASH_LENGTH_TEXT); +} + +void MD5Context::Add(const char *data) { + MD5Update((const_data_ptr_t)data, strlen(data)); +} + +} // namespace duckdb +// This file is licensed under Apache License 2.0 +// Source code taken from https://github.com/google/benchmark +// It is highly modified + + + + +namespace duckdb { + +inline uint64_t ChronoNow() { + return std::chrono::duration_cast( + std::chrono::time_point_cast(std::chrono::high_resolution_clock::now()) + .time_since_epoch()) + .count(); +} + +inline uint64_t Now() { +#if defined(RDTSC) +#if defined(__i386__) + uint64_t ret; + __asm__ volatile("rdtsc" : "=A"(ret)); + return ret; +#elif defined(__x86_64__) || defined(__amd64__) + uint64_t low, high; + __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); + return (high << 32) | low; +#elif defined(__powerpc__) || defined(__ppc__) + uint64_t tbl, tbu0, tbu1; + asm("mftbu %0" : "=r"(tbu0)); + asm("mftb %0" : "=r"(tbl)); + asm("mftbu %0" : "=r"(tbu1)); + tbl &= -static_cast(tbu0 == tbu1); + return (tbu1 << 32) | tbl; +#elif defined(__sparc__) + uint64_t tick; + asm(".byte 0x83, 0x41, 0x00, 0x00"); + asm("mov %%g1, %0" : "=r"(tick)); + return tick; +#elif defined(__ia64__) + uint64_t itc; + asm("mov %0 = ar.itc" : "=r"(itc)); + return itc; +#elif defined(COMPILER_MSVC) && defined(_M_IX86) + _asm rdtsc +#elif defined(COMPILER_MSVC) + return __rdtsc(); +#elif defined(__aarch64__) + uint64_t virtual_timer_value; + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); + return virtual_timer_value; +#elif defined(__ARM_ARCH) +#if (__ARM_ARCH >= 6) + uint32_t pmccntr; + uint32_t pmuseren; + uint32_t pmcntenset; + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); + return static_cast(pmccntr) * 64; // Should optimize to << 6 + } + } +#endif + return ChronoNow(); +#else + return ChronoNow(); +#endif +#else + return ChronoNow(); +#endif // defined(RDTSC) +} +uint64_t CycleCounter::Tick() const { + return Now(); +} +} // namespace duckdb + + + +namespace duckdb { + +// LCOV_EXCL_START +string CatalogTypeToString(CatalogType type) { + switch (type) { + case CatalogType::COLLATION_ENTRY: + return "Collation"; + case CatalogType::TYPE_ENTRY: + return "Type"; + case CatalogType::TABLE_ENTRY: + return "Table"; + case CatalogType::SCHEMA_ENTRY: + return "Schema"; + case CatalogType::TABLE_FUNCTION_ENTRY: + return "Table Function"; + case CatalogType::SCALAR_FUNCTION_ENTRY: + return "Scalar Function"; + case CatalogType::AGGREGATE_FUNCTION_ENTRY: + return "Aggregate Function"; + case CatalogType::COPY_FUNCTION_ENTRY: + return "Copy Function"; + case CatalogType::PRAGMA_FUNCTION_ENTRY: + return "Pragma Function"; + case CatalogType::MACRO_ENTRY: + return "Macro Function"; + case CatalogType::VIEW_ENTRY: + return "View"; + case CatalogType::INDEX_ENTRY: + return "Index"; + case CatalogType::PREPARED_STATEMENT: + return "Prepared Statement"; + case CatalogType::SEQUENCE_ENTRY: + return "Sequence"; + case CatalogType::INVALID: + case CatalogType::DELETED_ENTRY: + case CatalogType::UPDATED_ENTRY: + break; + } + return "INVALID"; +} +// LCOV_EXCL_STOP + +} // namespace duckdb + + + + +namespace duckdb { + +// LCOV_EXCL_START +CompressionType CompressionTypeFromString(const string &str) { + auto compression = StringUtil::Lower(str); + if (compression == "uncompressed") { + return CompressionType::COMPRESSION_UNCOMPRESSED; + } else if (compression == "rle") { + return CompressionType::COMPRESSION_RLE; + } else if (compression == "dictionary") { + return CompressionType::COMPRESSION_DICTIONARY; + } else if (compression == "pfor") { + return CompressionType::COMPRESSION_PFOR_DELTA; + } else if (compression == "bitpacking") { + return CompressionType::COMPRESSION_BITPACKING; + } else if (compression == "fsst") { + return CompressionType::COMPRESSION_FSST; + } else { + return CompressionType::COMPRESSION_AUTO; + } +} + +string CompressionTypeToString(CompressionType type) { + switch (type) { + case CompressionType::COMPRESSION_UNCOMPRESSED: + return "Uncompressed"; + case CompressionType::COMPRESSION_CONSTANT: + return "Constant"; + case CompressionType::COMPRESSION_RLE: + return "RLE"; + case CompressionType::COMPRESSION_DICTIONARY: + return "Dictionary"; + case CompressionType::COMPRESSION_PFOR_DELTA: + return "PFOR"; + case CompressionType::COMPRESSION_BITPACKING: + return "BitPacking"; + case CompressionType::COMPRESSION_FSST: + return "FSST"; + default: + throw InternalException("Unrecognized compression type!"); + } +} +// LCOV_EXCL_STOP + +} // namespace duckdb + + + + +namespace duckdb { + +// LCOV_EXCL_START +string ExpressionTypeToString(ExpressionType type) { + switch (type) { + case ExpressionType::OPERATOR_CAST: + return "CAST"; + case ExpressionType::OPERATOR_NOT: + return "NOT"; + case ExpressionType::OPERATOR_IS_NULL: + return "IS_NULL"; + case ExpressionType::OPERATOR_IS_NOT_NULL: + return "IS_NOT_NULL"; + case ExpressionType::COMPARE_EQUAL: + return "EQUAL"; + case ExpressionType::COMPARE_NOTEQUAL: + return "NOTEQUAL"; + case ExpressionType::COMPARE_LESSTHAN: + return "LESSTHAN"; + case ExpressionType::COMPARE_GREATERTHAN: + return "GREATERTHAN"; + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + return "LESSTHANOREQUALTO"; + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + return "GREATERTHANOREQUALTO"; + case ExpressionType::COMPARE_IN: + return "IN"; + case ExpressionType::COMPARE_DISTINCT_FROM: + return "DISTINCT_FROM"; + case ExpressionType::COMPARE_NOT_DISTINCT_FROM: + return "NOT_DISTINCT_FROM"; + case ExpressionType::CONJUNCTION_AND: + return "AND"; + case ExpressionType::CONJUNCTION_OR: + return "OR"; + case ExpressionType::VALUE_CONSTANT: + return "CONSTANT"; + case ExpressionType::VALUE_PARAMETER: + return "PARAMETER"; + case ExpressionType::VALUE_TUPLE: + return "TUPLE"; + case ExpressionType::VALUE_TUPLE_ADDRESS: + return "TUPLE_ADDRESS"; + case ExpressionType::VALUE_NULL: + return "NULL"; + case ExpressionType::VALUE_VECTOR: + return "VECTOR"; + case ExpressionType::VALUE_SCALAR: + return "SCALAR"; + case ExpressionType::AGGREGATE: + return "AGGREGATE"; + case ExpressionType::WINDOW_AGGREGATE: + return "WINDOW_AGGREGATE"; + case ExpressionType::WINDOW_RANK: + return "RANK"; + case ExpressionType::WINDOW_RANK_DENSE: + return "RANK_DENSE"; + case ExpressionType::WINDOW_PERCENT_RANK: + return "PERCENT_RANK"; + case ExpressionType::WINDOW_ROW_NUMBER: + return "ROW_NUMBER"; + case ExpressionType::WINDOW_FIRST_VALUE: + return "FIRST_VALUE"; + case ExpressionType::WINDOW_LAST_VALUE: + return "LAST_VALUE"; + case ExpressionType::WINDOW_NTH_VALUE: + return "NTH_VALUE"; + case ExpressionType::WINDOW_CUME_DIST: + return "CUME_DIST"; + case ExpressionType::WINDOW_LEAD: + return "LEAD"; + case ExpressionType::WINDOW_LAG: + return "LAG"; + case ExpressionType::WINDOW_NTILE: + return "NTILE"; + case ExpressionType::FUNCTION: + return "FUNCTION"; + case ExpressionType::CASE_EXPR: + return "CASE"; + case ExpressionType::OPERATOR_NULLIF: + return "NULLIF"; + case ExpressionType::OPERATOR_COALESCE: + return "COALESCE"; + case ExpressionType::ARRAY_EXTRACT: + return "ARRAY_EXTRACT"; + case ExpressionType::ARRAY_SLICE: + return "ARRAY_SLICE"; + case ExpressionType::STRUCT_EXTRACT: + return "STRUCT_EXTRACT"; + case ExpressionType::SUBQUERY: + return "SUBQUERY"; + case ExpressionType::STAR: + return "STAR"; + case ExpressionType::PLACEHOLDER: + return "PLACEHOLDER"; + case ExpressionType::COLUMN_REF: + return "COLUMN_REF"; + case ExpressionType::FUNCTION_REF: + return "FUNCTION_REF"; + case ExpressionType::TABLE_REF: + return "TABLE_REF"; + case ExpressionType::CAST: + return "CAST"; + case ExpressionType::COMPARE_NOT_IN: + return "COMPARE_NOT_IN"; + case ExpressionType::COMPARE_BETWEEN: + return "COMPARE_BETWEEN"; + case ExpressionType::COMPARE_NOT_BETWEEN: + return "COMPARE_NOT_BETWEEN"; + case ExpressionType::VALUE_DEFAULT: + return "VALUE_DEFAULT"; + case ExpressionType::BOUND_REF: + return "BOUND_REF"; + case ExpressionType::BOUND_COLUMN_REF: + return "BOUND_COLUMN_REF"; + case ExpressionType::BOUND_FUNCTION: + return "BOUND_FUNCTION"; + case ExpressionType::BOUND_AGGREGATE: + return "BOUND_AGGREGATE"; + case ExpressionType::GROUPING_FUNCTION: + return "GROUPING"; + case ExpressionType::ARRAY_CONSTRUCTOR: + return "ARRAY_CONSTRUCTOR"; + case ExpressionType::TABLE_STAR: + return "TABLE_STAR"; + case ExpressionType::BOUND_UNNEST: + return "BOUND_UNNEST"; + case ExpressionType::COLLATE: + return "COLLATE"; + case ExpressionType::POSITIONAL_REFERENCE: + return "POSITIONAL_REFERENCE"; + case ExpressionType::LAMBDA: + return "LAMBDA"; + case ExpressionType::INVALID: + break; + } + return "INVALID"; +} +// LCOV_EXCL_STOP + +string ExpressionTypeToOperator(ExpressionType type) { + switch (type) { + case ExpressionType::OPERATOR_NOT: + return "!"; + case ExpressionType::COMPARE_EQUAL: + return "="; + case ExpressionType::COMPARE_NOTEQUAL: + return "!="; + case ExpressionType::COMPARE_LESSTHAN: + return "<"; + case ExpressionType::COMPARE_GREATERTHAN: + return ">"; + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + return "<="; + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + return ">="; + case ExpressionType::CONJUNCTION_AND: + return "AND"; + case ExpressionType::CONJUNCTION_OR: + return "OR"; + default: + return ""; + } +} + +ExpressionType NegateComparisionExpression(ExpressionType type) { + ExpressionType negated_type = ExpressionType::INVALID; + switch (type) { + case ExpressionType::COMPARE_EQUAL: + negated_type = ExpressionType::COMPARE_NOTEQUAL; + break; + case ExpressionType::COMPARE_NOTEQUAL: + negated_type = ExpressionType::COMPARE_EQUAL; + break; + case ExpressionType::COMPARE_LESSTHAN: + negated_type = ExpressionType::COMPARE_GREATERTHANOREQUALTO; + break; + case ExpressionType::COMPARE_GREATERTHAN: + negated_type = ExpressionType::COMPARE_LESSTHANOREQUALTO; + break; + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + negated_type = ExpressionType::COMPARE_GREATERTHAN; + break; + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + negated_type = ExpressionType::COMPARE_LESSTHAN; + break; + default: + throw InternalException("Unsupported comparison type in negation"); + } + return negated_type; +} + +ExpressionType FlipComparisionExpression(ExpressionType type) { + ExpressionType flipped_type = ExpressionType::INVALID; + switch (type) { + case ExpressionType::COMPARE_NOT_DISTINCT_FROM: + case ExpressionType::COMPARE_DISTINCT_FROM: + case ExpressionType::COMPARE_NOTEQUAL: + case ExpressionType::COMPARE_EQUAL: + flipped_type = type; + break; + case ExpressionType::COMPARE_LESSTHAN: + flipped_type = ExpressionType::COMPARE_GREATERTHAN; + break; + case ExpressionType::COMPARE_GREATERTHAN: + flipped_type = ExpressionType::COMPARE_LESSTHAN; + break; + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + flipped_type = ExpressionType::COMPARE_GREATERTHANOREQUALTO; + break; + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + flipped_type = ExpressionType::COMPARE_LESSTHANOREQUALTO; + break; + default: + throw InternalException("Unsupported comparison type in flip"); + } + return flipped_type; +} + +} // namespace duckdb + + + +namespace duckdb { + +FileCompressionType FileCompressionTypeFromString(const string &input) { + auto parameter = StringUtil::Lower(input); + if (parameter == "infer" || parameter == "auto") { + return FileCompressionType::AUTO_DETECT; + } else if (parameter == "gzip") { + return FileCompressionType::GZIP; + } else if (parameter == "zstd") { + return FileCompressionType::ZSTD; + } else if (parameter == "uncompressed" || parameter == "none" || parameter.empty()) { + return FileCompressionType::UNCOMPRESSED; + } else { + throw ParserException("Unrecognized file compression type \"%s\"", input); + } +} + +} // namespace duckdb + + +namespace duckdb { + +string JoinTypeToString(JoinType type) { + switch (type) { + case JoinType::LEFT: + return "LEFT"; + case JoinType::RIGHT: + return "RIGHT"; + case JoinType::INNER: + return "INNER"; + case JoinType::OUTER: + return "OUTER"; + case JoinType::SEMI: + return "SEMI"; + case JoinType::ANTI: + return "ANTI"; + case JoinType::SINGLE: + return "SINGLE"; + case JoinType::MARK: + return "MARK"; + case JoinType::INVALID: // LCOV_EXCL_START + break; + } + return "INVALID"; +} // LCOV_EXCL_STOP + +bool IsLeftOuterJoin(JoinType type) { + return type == JoinType::LEFT || type == JoinType::OUTER; +} + +bool IsRightOuterJoin(JoinType type) { + return type == JoinType::OUTER || type == JoinType::RIGHT; +} + +} // namespace duckdb + + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Value <--> String Utilities +//===--------------------------------------------------------------------===// +// LCOV_EXCL_START +string LogicalOperatorToString(LogicalOperatorType type) { + switch (type) { + case LogicalOperatorType::LOGICAL_GET: + return "GET"; + case LogicalOperatorType::LOGICAL_CHUNK_GET: + return "CHUNK_GET"; + case LogicalOperatorType::LOGICAL_DELIM_GET: + return "DELIM_GET"; + case LogicalOperatorType::LOGICAL_EMPTY_RESULT: + return "EMPTY_RESULT"; + case LogicalOperatorType::LOGICAL_EXPRESSION_GET: + return "EXPRESSION_GET"; + case LogicalOperatorType::LOGICAL_ANY_JOIN: + return "ANY_JOIN"; + case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: + return "COMPARISON_JOIN"; + case LogicalOperatorType::LOGICAL_DELIM_JOIN: + return "DELIM_JOIN"; + case LogicalOperatorType::LOGICAL_PROJECTION: + return "PROJECTION"; + case LogicalOperatorType::LOGICAL_FILTER: + return "FILTER"; + case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY: + return "AGGREGATE"; + case LogicalOperatorType::LOGICAL_WINDOW: + return "WINDOW"; + case LogicalOperatorType::LOGICAL_UNNEST: + return "UNNEST"; + case LogicalOperatorType::LOGICAL_LIMIT: + return "LIMIT"; + case LogicalOperatorType::LOGICAL_ORDER_BY: + return "ORDER_BY"; + case LogicalOperatorType::LOGICAL_TOP_N: + return "TOP_N"; + case LogicalOperatorType::LOGICAL_SAMPLE: + return "SAMPLE"; + case LogicalOperatorType::LOGICAL_LIMIT_PERCENT: + return "LIMIT_PERCENT"; + case LogicalOperatorType::LOGICAL_COPY_TO_FILE: + return "COPY_TO_FILE"; + case LogicalOperatorType::LOGICAL_JOIN: + return "JOIN"; + case LogicalOperatorType::LOGICAL_CROSS_PRODUCT: + return "CROSS_PRODUCT"; + case LogicalOperatorType::LOGICAL_UNION: + return "UNION"; + case LogicalOperatorType::LOGICAL_EXCEPT: + return "EXCEPT"; + case LogicalOperatorType::LOGICAL_INTERSECT: + return "INTERSECT"; + case LogicalOperatorType::LOGICAL_INSERT: + return "INSERT"; + case LogicalOperatorType::LOGICAL_DISTINCT: + return "DISTINCT"; + case LogicalOperatorType::LOGICAL_DELETE: + return "DELETE"; + case LogicalOperatorType::LOGICAL_UPDATE: + return "UPDATE"; + case LogicalOperatorType::LOGICAL_PREPARE: + return "PREPARE"; + case LogicalOperatorType::LOGICAL_DUMMY_SCAN: + return "DUMMY_SCAN"; + case LogicalOperatorType::LOGICAL_CREATE_INDEX: + return "CREATE_INDEX"; + case LogicalOperatorType::LOGICAL_CREATE_TABLE: + return "CREATE_TABLE"; + case LogicalOperatorType::LOGICAL_CREATE_MACRO: + return "CREATE_MACRO"; + case LogicalOperatorType::LOGICAL_EXPLAIN: + return "EXPLAIN"; + case LogicalOperatorType::LOGICAL_EXECUTE: + return "EXECUTE"; + case LogicalOperatorType::LOGICAL_VACUUM: + return "VACUUM"; + case LogicalOperatorType::LOGICAL_RECURSIVE_CTE: + return "REC_CTE"; + case LogicalOperatorType::LOGICAL_CTE_REF: + return "CTE_SCAN"; + case LogicalOperatorType::LOGICAL_SHOW: + return "SHOW"; + case LogicalOperatorType::LOGICAL_ALTER: + return "ALTER"; + case LogicalOperatorType::LOGICAL_CREATE_SEQUENCE: + return "CREATE_SEQUENCE"; + case LogicalOperatorType::LOGICAL_CREATE_TYPE: + return "CREATE_TYPE"; + case LogicalOperatorType::LOGICAL_CREATE_VIEW: + return "CREATE_VIEW"; + case LogicalOperatorType::LOGICAL_CREATE_SCHEMA: + return "CREATE_SCHEMA"; + case LogicalOperatorType::LOGICAL_DROP: + return "DROP"; + case LogicalOperatorType::LOGICAL_PRAGMA: + return "PRAGMA"; + case LogicalOperatorType::LOGICAL_TRANSACTION: + return "TRANSACTION"; + case LogicalOperatorType::LOGICAL_EXPORT: + return "EXPORT"; + case LogicalOperatorType::LOGICAL_SET: + return "SET"; + case LogicalOperatorType::LOGICAL_LOAD: + return "LOAD"; + case LogicalOperatorType::LOGICAL_INVALID: + break; + } + return "INVALID"; +} +// LCOV_EXCL_STOP + +} // namespace duckdb + + + + + +namespace duckdb { + +struct DefaultOptimizerType { + const char *name; + OptimizerType type; +}; + +static DefaultOptimizerType internal_optimizer_types[] = { + {"expression_rewriter", OptimizerType::EXPRESSION_REWRITER}, + {"filter_pullup", OptimizerType::FILTER_PULLUP}, + {"filter_pushdown", OptimizerType::FILTER_PUSHDOWN}, + {"regex_range", OptimizerType::REGEX_RANGE}, + {"in_clause", OptimizerType::IN_CLAUSE}, + {"join_order", OptimizerType::JOIN_ORDER}, + {"deliminator", OptimizerType::DELIMINATOR}, + {"unused_columns", OptimizerType::UNUSED_COLUMNS}, + {"statistics_propagation", OptimizerType::STATISTICS_PROPAGATION}, + {"common_subexpressions", OptimizerType::COMMON_SUBEXPRESSIONS}, + {"common_aggregate", OptimizerType::COMMON_AGGREGATE}, + {"column_lifetime", OptimizerType::COLUMN_LIFETIME}, + {"top_n", OptimizerType::TOP_N}, + {"reorder_filter", OptimizerType::REORDER_FILTER}, + {nullptr, OptimizerType::INVALID}}; + +string OptimizerTypeToString(OptimizerType type) { + for (idx_t i = 0; internal_optimizer_types[i].name; i++) { + if (internal_optimizer_types[i].type == type) { + return internal_optimizer_types[i].name; + } + } + throw InternalException("Invalid optimizer type"); +} + +OptimizerType OptimizerTypeFromString(const string &str) { + for (idx_t i = 0; internal_optimizer_types[i].name; i++) { + if (internal_optimizer_types[i].name == str) { + return internal_optimizer_types[i].type; + } + } + // optimizer not found, construct candidate list + vector optimizer_names; + for (idx_t i = 0; internal_optimizer_types[i].name; i++) { + optimizer_names.emplace_back(internal_optimizer_types[i].name); + } + throw ParserException("Optimizer type \"%s\" not recognized\n%s", str, + StringUtil::CandidatesErrorMessage(optimizer_names, str, "Candidate optimizers")); +} + +} // namespace duckdb + + +namespace duckdb { + +// LCOV_EXCL_START +string PhysicalOperatorToString(PhysicalOperatorType type) { + switch (type) { + case PhysicalOperatorType::TABLE_SCAN: + return "TABLE_SCAN"; + case PhysicalOperatorType::DUMMY_SCAN: + return "DUMMY_SCAN"; + case PhysicalOperatorType::CHUNK_SCAN: + return "CHUNK_SCAN"; + case PhysicalOperatorType::DELIM_SCAN: + return "DELIM_SCAN"; + case PhysicalOperatorType::ORDER_BY: + return "ORDER_BY"; + case PhysicalOperatorType::LIMIT: + return "LIMIT"; + case PhysicalOperatorType::LIMIT_PERCENT: + return "LIMIT_PERCENT"; + case PhysicalOperatorType::RESERVOIR_SAMPLE: + return "RESERVOIR_SAMPLE"; + case PhysicalOperatorType::STREAMING_SAMPLE: + return "STREAMING_SAMPLE"; + case PhysicalOperatorType::TOP_N: + return "TOP_N"; + case PhysicalOperatorType::WINDOW: + return "WINDOW"; + case PhysicalOperatorType::STREAMING_WINDOW: + return "STREAMING_WINDOW"; + case PhysicalOperatorType::UNNEST: + return "UNNEST"; + case PhysicalOperatorType::SIMPLE_AGGREGATE: + return "SIMPLE_AGGREGATE"; + case PhysicalOperatorType::HASH_GROUP_BY: + return "HASH_GROUP_BY"; + case PhysicalOperatorType::PERFECT_HASH_GROUP_BY: + return "PERFECT_HASH_GROUP_BY"; + case PhysicalOperatorType::FILTER: + return "FILTER"; + case PhysicalOperatorType::PROJECTION: + return "PROJECTION"; + case PhysicalOperatorType::COPY_TO_FILE: + return "COPY_TO_FILE"; + case PhysicalOperatorType::DELIM_JOIN: + return "DELIM_JOIN"; + case PhysicalOperatorType::BLOCKWISE_NL_JOIN: + return "BLOCKWISE_NL_JOIN"; + case PhysicalOperatorType::NESTED_LOOP_JOIN: + return "NESTED_LOOP_JOIN"; + case PhysicalOperatorType::HASH_JOIN: + return "HASH_JOIN"; + case PhysicalOperatorType::INDEX_JOIN: + return "INDEX_JOIN"; + case PhysicalOperatorType::PIECEWISE_MERGE_JOIN: + return "PIECEWISE_MERGE_JOIN"; + case PhysicalOperatorType::CROSS_PRODUCT: + return "CROSS_PRODUCT"; + case PhysicalOperatorType::UNION: + return "UNION"; + case PhysicalOperatorType::INSERT: + return "INSERT"; + case PhysicalOperatorType::DELETE_OPERATOR: + return "DELETE"; + case PhysicalOperatorType::UPDATE: + return "UPDATE"; + case PhysicalOperatorType::EMPTY_RESULT: + return "EMPTY_RESULT"; + case PhysicalOperatorType::CREATE_TABLE: + return "CREATE_TABLE"; + case PhysicalOperatorType::CREATE_TABLE_AS: + return "CREATE_TABLE_AS"; + case PhysicalOperatorType::CREATE_INDEX: + return "CREATE_INDEX"; + case PhysicalOperatorType::EXPLAIN: + return "EXPLAIN"; + case PhysicalOperatorType::EXPLAIN_ANALYZE: + return "EXPLAIN_ANALYZE"; + case PhysicalOperatorType::EXECUTE: + return "EXECUTE"; + case PhysicalOperatorType::VACUUM: + return "VACUUM"; + case PhysicalOperatorType::RECURSIVE_CTE: + return "REC_CTE"; + case PhysicalOperatorType::RECURSIVE_CTE_SCAN: + return "REC_CTE_SCAN"; + case PhysicalOperatorType::EXPRESSION_SCAN: + return "EXPRESSION_SCAN"; + case PhysicalOperatorType::ALTER: + return "ALTER"; + case PhysicalOperatorType::CREATE_SEQUENCE: + return "CREATE_SEQUENCE"; + case PhysicalOperatorType::CREATE_VIEW: + return "CREATE_VIEW"; + case PhysicalOperatorType::CREATE_SCHEMA: + return "CREATE_SCHEMA"; + case PhysicalOperatorType::CREATE_MACRO: + return "CREATE_MACRO"; + case PhysicalOperatorType::DROP: + return "DROP"; + case PhysicalOperatorType::PRAGMA: + return "PRAGMA"; + case PhysicalOperatorType::TRANSACTION: + return "TRANSACTION"; + case PhysicalOperatorType::PREPARE: + return "PREPARE"; + case PhysicalOperatorType::EXPORT: + return "EXPORT"; + case PhysicalOperatorType::SET: + return "SET"; + case PhysicalOperatorType::LOAD: + return "LOAD"; + case PhysicalOperatorType::INOUT_FUNCTION: + return "INOUT_FUNCTION"; + case PhysicalOperatorType::CREATE_TYPE: + return "CREATE_TYPE"; + case PhysicalOperatorType::INVALID: + break; + } + return "INVALID"; +} +// LCOV_EXCL_STOP + +} // namespace duckdb + + + + +namespace duckdb { + +// LCOV_EXCL_START +string RelationTypeToString(RelationType type) { + switch (type) { + case RelationType::TABLE_RELATION: + return "TABLE_RELATION"; + case RelationType::PROJECTION_RELATION: + return "PROJECTION_RELATION"; + case RelationType::FILTER_RELATION: + return "FILTER_RELATION"; + case RelationType::EXPLAIN_RELATION: + return "EXPLAIN_RELATION"; + case RelationType::CROSS_PRODUCT_RELATION: + return "CROSS_PRODUCT_RELATION"; + case RelationType::JOIN_RELATION: + return "JOIN_RELATION"; + case RelationType::AGGREGATE_RELATION: + return "AGGREGATE_RELATION"; + case RelationType::SET_OPERATION_RELATION: + return "SET_OPERATION_RELATION"; + case RelationType::DISTINCT_RELATION: + return "DISTINCT_RELATION"; + case RelationType::LIMIT_RELATION: + return "LIMIT_RELATION"; + case RelationType::ORDER_RELATION: + return "ORDER_RELATION"; + case RelationType::CREATE_VIEW_RELATION: + return "CREATE_VIEW_RELATION"; + case RelationType::CREATE_TABLE_RELATION: + return "CREATE_TABLE_RELATION"; + case RelationType::INSERT_RELATION: + return "INSERT_RELATION"; + case RelationType::VALUE_LIST_RELATION: + return "VALUE_LIST_RELATION"; + case RelationType::DELETE_RELATION: + return "DELETE_RELATION"; + case RelationType::UPDATE_RELATION: + return "UPDATE_RELATION"; + case RelationType::WRITE_CSV_RELATION: + return "WRITE_CSV_RELATION"; + case RelationType::READ_CSV_RELATION: + return "READ_CSV_RELATION"; + case RelationType::SUBQUERY_RELATION: + return "SUBQUERY_RELATION"; + case RelationType::TABLE_FUNCTION_RELATION: + return "TABLE_FUNCTION_RELATION"; + case RelationType::VIEW_RELATION: + return "VIEW_RELATION"; + case RelationType::QUERY_RELATION: + return "QUERY_RELATION"; + case RelationType::INVALID_RELATION: + break; + } + return "INVALID_RELATION"; +} +// LCOV_EXCL_STOP + +} // namespace duckdb + + +namespace duckdb { + +// LCOV_EXCL_START +string StatementTypeToString(StatementType type) { + switch (type) { + case StatementType::SELECT_STATEMENT: + return "SELECT"; + case StatementType::INSERT_STATEMENT: + return "INSERT"; + case StatementType::UPDATE_STATEMENT: + return "UPDATE"; + case StatementType::DELETE_STATEMENT: + return "DELETE"; + case StatementType::PREPARE_STATEMENT: + return "PREPARE"; + case StatementType::EXECUTE_STATEMENT: + return "EXECUTE"; + case StatementType::ALTER_STATEMENT: + return "ALTER"; + case StatementType::TRANSACTION_STATEMENT: + return "TRANSACTION"; + case StatementType::COPY_STATEMENT: + return "COPY"; + case StatementType::ANALYZE_STATEMENT: + return "ANALYZE"; + case StatementType::VARIABLE_SET_STATEMENT: + return "VARIABLE_SET"; + case StatementType::CREATE_FUNC_STATEMENT: + return "CREATE_FUNC"; + case StatementType::EXPLAIN_STATEMENT: + return "EXPLAIN"; + case StatementType::CREATE_STATEMENT: + return "CREATE"; + case StatementType::DROP_STATEMENT: + return "DROP"; + case StatementType::PRAGMA_STATEMENT: + return "PRAGMA"; + case StatementType::SHOW_STATEMENT: + return "SHOW"; + case StatementType::VACUUM_STATEMENT: + return "VACUUM"; + case StatementType::RELATION_STATEMENT: + return "RELATION"; + case StatementType::EXPORT_STATEMENT: + return "EXPORT"; + case StatementType::CALL_STATEMENT: + return "CALL"; + case StatementType::SET_STATEMENT: + return "SET"; + case StatementType::LOAD_STATEMENT: + return "LOAD"; + case StatementType::INVALID_STATEMENT: + break; + } + return "INVALID"; +} +// LCOV_EXCL_STOP + +bool StatementTypeReturnChanges(StatementType type) { + switch (type) { + case StatementType::INSERT_STATEMENT: + case StatementType::UPDATE_STATEMENT: + case StatementType::DELETE_STATEMENT: + return true; + default: + return false; + } +} + +} // namespace duckdb + + + + + + +namespace duckdb { + +Exception::Exception(const string &msg) : std::exception(), type(ExceptionType::INVALID) { + exception_message_ = msg; +} + +Exception::Exception(ExceptionType exception_type, const string &message) : std::exception(), type(exception_type) { + exception_message_ = ExceptionTypeToString(exception_type) + " Error: " + message; +} + +const char *Exception::what() const noexcept { + return exception_message_.c_str(); +} + +bool Exception::UncaughtException() { +#if __cplusplus >= 201703L + return std::uncaught_exceptions() > 0; +#else + return std::uncaught_exception(); +#endif +} + +string Exception::ConstructMessageRecursive(const string &msg, vector &values) { + return ExceptionFormatValue::Format(msg, values); +} + +string Exception::ExceptionTypeToString(ExceptionType type) { + switch (type) { + case ExceptionType::INVALID: + return "Invalid"; + case ExceptionType::OUT_OF_RANGE: + return "Out of Range"; + case ExceptionType::CONVERSION: + return "Conversion"; + case ExceptionType::UNKNOWN_TYPE: + return "Unknown Type"; + case ExceptionType::DECIMAL: + return "Decimal"; + case ExceptionType::MISMATCH_TYPE: + return "Mismatch Type"; + case ExceptionType::DIVIDE_BY_ZERO: + return "Divide by Zero"; + case ExceptionType::OBJECT_SIZE: + return "Object Size"; + case ExceptionType::INVALID_TYPE: + return "Invalid type"; + case ExceptionType::SERIALIZATION: + return "Serialization"; + case ExceptionType::TRANSACTION: + return "TransactionContext"; + case ExceptionType::NOT_IMPLEMENTED: + return "Not implemented"; + case ExceptionType::EXPRESSION: + return "Expression"; + case ExceptionType::CATALOG: + return "Catalog"; + case ExceptionType::PARSER: + return "Parser"; + case ExceptionType::BINDER: + return "Binder"; + case ExceptionType::PLANNER: + return "Planner"; + case ExceptionType::SCHEDULER: + return "Scheduler"; + case ExceptionType::EXECUTOR: + return "Executor"; + case ExceptionType::CONSTRAINT: + return "Constraint"; + case ExceptionType::INDEX: + return "Index"; + case ExceptionType::STAT: + return "Stat"; + case ExceptionType::CONNECTION: + return "Connection"; + case ExceptionType::SYNTAX: + return "Syntax"; + case ExceptionType::SETTINGS: + return "Settings"; + case ExceptionType::OPTIMIZER: + return "Optimizer"; + case ExceptionType::NULL_POINTER: + return "NullPointer"; + case ExceptionType::IO: + return "IO"; + case ExceptionType::INTERRUPT: + return "INTERRUPT"; + case ExceptionType::FATAL: + return "FATAL"; + case ExceptionType::INTERNAL: + return "INTERNAL"; + case ExceptionType::INVALID_INPUT: + return "Invalid Input"; + case ExceptionType::OUT_OF_MEMORY: + return "Out of Memory"; + case ExceptionType::PERMISSION: + return "Permission"; + default: + return "Unknown"; + } +} + +StandardException::StandardException(ExceptionType exception_type, const string &message) + : Exception(exception_type, message) { +} + +CastException::CastException(const PhysicalType orig_type, const PhysicalType new_type) + : Exception(ExceptionType::CONVERSION, + "Type " + TypeIdToString(orig_type) + " can't be cast as " + TypeIdToString(new_type)) { +} + +CastException::CastException(const LogicalType &orig_type, const LogicalType &new_type) + : Exception(ExceptionType::CONVERSION, + "Type " + orig_type.ToString() + " can't be cast as " + new_type.ToString()) { +} + +ValueOutOfRangeException::ValueOutOfRangeException(const int64_t value, const PhysicalType orig_type, + const PhysicalType new_type) + : Exception(ExceptionType::CONVERSION, "Type " + TypeIdToString(orig_type) + " with value " + + to_string((intmax_t)value) + + " can't be cast because the value is out of range " + "for the destination type " + + TypeIdToString(new_type)) { +} + +ValueOutOfRangeException::ValueOutOfRangeException(const double value, const PhysicalType orig_type, + const PhysicalType new_type) + : Exception(ExceptionType::CONVERSION, "Type " + TypeIdToString(orig_type) + " with value " + to_string(value) + + " can't be cast because the value is out of range " + "for the destination type " + + TypeIdToString(new_type)) { +} + +ValueOutOfRangeException::ValueOutOfRangeException(const hugeint_t value, const PhysicalType orig_type, + const PhysicalType new_type) + : Exception(ExceptionType::CONVERSION, "Type " + TypeIdToString(orig_type) + " with value " + value.ToString() + + " can't be cast because the value is out of range " + "for the destination type " + + TypeIdToString(new_type)) { +} + +ValueOutOfRangeException::ValueOutOfRangeException(const PhysicalType var_type, const idx_t length) + : Exception(ExceptionType::OUT_OF_RANGE, + "The value is too long to fit into type " + TypeIdToString(var_type) + "(" + to_string(length) + ")") { +} + +ConversionException::ConversionException(const string &msg) : Exception(ExceptionType::CONVERSION, msg) { +} + +InvalidTypeException::InvalidTypeException(PhysicalType type, const string &msg) + : Exception(ExceptionType::INVALID_TYPE, "Invalid Type [" + TypeIdToString(type) + "]: " + msg) { +} + +InvalidTypeException::InvalidTypeException(const LogicalType &type, const string &msg) + : Exception(ExceptionType::INVALID_TYPE, "Invalid Type [" + type.ToString() + "]: " + msg) { +} + +TypeMismatchException::TypeMismatchException(const PhysicalType type_1, const PhysicalType type_2, const string &msg) + : Exception(ExceptionType::MISMATCH_TYPE, + "Type " + TypeIdToString(type_1) + " does not match with " + TypeIdToString(type_2) + ". " + msg) { +} + +TypeMismatchException::TypeMismatchException(const LogicalType &type_1, const LogicalType &type_2, const string &msg) + : Exception(ExceptionType::MISMATCH_TYPE, + "Type " + type_1.ToString() + " does not match with " + type_2.ToString() + ". " + msg) { +} + +TransactionException::TransactionException(const string &msg) : Exception(ExceptionType::TRANSACTION, msg) { +} + +NotImplementedException::NotImplementedException(const string &msg) : Exception(ExceptionType::NOT_IMPLEMENTED, msg) { +} + +OutOfRangeException::OutOfRangeException(const string &msg) : Exception(ExceptionType::OUT_OF_RANGE, msg) { +} + +CatalogException::CatalogException(const string &msg) : StandardException(ExceptionType::CATALOG, msg) { +} + +ParserException::ParserException(const string &msg) : StandardException(ExceptionType::PARSER, msg) { +} + +PermissionException::PermissionException(const string &msg) : StandardException(ExceptionType::PERMISSION, msg) { +} + +SyntaxException::SyntaxException(const string &msg) : Exception(ExceptionType::SYNTAX, msg) { +} + +ConstraintException::ConstraintException(const string &msg) : Exception(ExceptionType::CONSTRAINT, msg) { +} + +BinderException::BinderException(const string &msg) : StandardException(ExceptionType::BINDER, msg) { +} + +IOException::IOException(const string &msg) : Exception(ExceptionType::IO, msg) { +} + +SerializationException::SerializationException(const string &msg) : Exception(ExceptionType::SERIALIZATION, msg) { +} + +SequenceException::SequenceException(const string &msg) : Exception(ExceptionType::SERIALIZATION, msg) { +} + +InterruptException::InterruptException() : Exception(ExceptionType::INTERRUPT, "Interrupted!") { +} + +FatalException::FatalException(const string &msg) : Exception(ExceptionType::FATAL, msg) { +} + +InternalException::InternalException(const string &msg) : Exception(ExceptionType::INTERNAL, msg) { +} + +InvalidInputException::InvalidInputException(const string &msg) : Exception(ExceptionType::INVALID_INPUT, msg) { +} + +OutOfMemoryException::OutOfMemoryException(const string &msg) : Exception(ExceptionType::OUT_OF_MEMORY, msg) { +} + +} // namespace duckdb + + + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 +// See the end of this file for a list + +/* + Formatting library for C++ + + Copyright (c) 2012 - present, Victor Zverovich + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + --- Optional exception to the license --- + + As an exception, if, as a result of your compiling your source code, portions + of this Software are embedded into a machine-executable object form of such + source code, you may redistribute such embedded portions in such object form + without including the above copyright and permission notices. + */ + +#ifndef FMT_FORMAT_H_ +#define FMT_FORMAT_H_ + + + + +// LICENSE_CHANGE_BEGIN +// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #1 +// See the end of this file for a list + +// Formatting library for C++ - the core API +// +// Copyright (c) 2012 - present, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_CORE_H_ +#define FMT_CORE_H_ + +#include // std::FILE +#include +#include +#include +#include + +// The fmt library version in the form major * 10000 + minor * 100 + patch. +#define FMT_VERSION 60102 + +#ifdef __has_feature +# define FMT_HAS_FEATURE(x) __has_feature(x) +#else +# define FMT_HAS_FEATURE(x) 0 +#endif + +#if defined(__has_include) && !defined(__INTELLISENSE__) && \ + !(defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1600) +# define FMT_HAS_INCLUDE(x) __has_include(x) +#else +# define FMT_HAS_INCLUDE(x) 0 +#endif + +#ifdef __has_cpp_attribute +# define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define FMT_HAS_CPP_ATTRIBUTE(x) 0 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +# define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#else +# define FMT_GCC_VERSION 0 +#endif + +#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) +# define FMT_HAS_GXX_CXX11 FMT_GCC_VERSION +#else +# define FMT_HAS_GXX_CXX11 0 +#endif + +#ifdef __NVCC__ +# define FMT_NVCC __NVCC__ +#else +# define FMT_NVCC 0 +#endif + +#ifdef _MSC_VER +# define FMT_MSC_VER _MSC_VER +#else +# define FMT_MSC_VER 0 +#endif + +// Check if relaxed C++14 constexpr is supported. +// GCC doesn't allow throw in constexpr until version 6 (bug 67371). +#ifndef FMT_USE_CONSTEXPR +# define FMT_USE_CONSTEXPR \ + (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VER >= 1910 || \ + (FMT_GCC_VERSION >= 600 && __cplusplus >= 201402L)) && \ + !FMT_NVCC +#endif +#if FMT_USE_CONSTEXPR +# define FMT_CONSTEXPR constexpr +# define FMT_CONSTEXPR_DECL constexpr +#else +# define FMT_CONSTEXPR inline +# define FMT_CONSTEXPR_DECL +#endif + +#ifndef FMT_OVERRIDE +# if FMT_HAS_FEATURE(cxx_override) || \ + (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 +# define FMT_OVERRIDE override +# else +# define FMT_OVERRIDE +# endif +#endif + +// Check if exceptions are disabled. +#ifndef FMT_EXCEPTIONS +# if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ + FMT_MSC_VER && !_HAS_EXCEPTIONS +# define FMT_EXCEPTIONS 0 +# else +# define FMT_EXCEPTIONS 1 +# endif +#endif + +// Define FMT_USE_NOEXCEPT to make fmt use noexcept (C++11 feature). +#ifndef FMT_USE_NOEXCEPT +# define FMT_USE_NOEXCEPT 0 +#endif + +#if FMT_USE_NOEXCEPT || FMT_HAS_FEATURE(cxx_noexcept) || \ + (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900 +# define FMT_DETECTED_NOEXCEPT noexcept +# define FMT_HAS_CXX11_NOEXCEPT 1 +#else +# define FMT_DETECTED_NOEXCEPT throw() +# define FMT_HAS_CXX11_NOEXCEPT 0 +#endif + +#ifndef FMT_NOEXCEPT +# if FMT_EXCEPTIONS || FMT_HAS_CXX11_NOEXCEPT +# define FMT_NOEXCEPT FMT_DETECTED_NOEXCEPT +# else +# define FMT_NOEXCEPT +# endif +#endif + +// [[noreturn]] is disabled on MSVC because of bogus unreachable code warnings. +#if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VER +# define FMT_NORETURN [[noreturn]] +#else +# define FMT_NORETURN +#endif + +#ifndef FMT_DEPRECATED +# if (FMT_HAS_CPP_ATTRIBUTE(deprecated) && __cplusplus >= 201402L) || \ + FMT_MSC_VER >= 1900 +# define FMT_DEPRECATED [[deprecated]] +# else +# if defined(__GNUC__) || defined(__clang__) +# define FMT_DEPRECATED __attribute__((deprecated)) +# elif FMT_MSC_VER +# define FMT_DEPRECATED __declspec(deprecated) +# else +# define FMT_DEPRECATED /* deprecated */ +# endif +# endif +#endif + +// Workaround broken [[deprecated]] in the Intel compiler and NVCC. +#if defined(__INTEL_COMPILER) || FMT_NVCC +# define FMT_DEPRECATED_ALIAS +#else +# define FMT_DEPRECATED_ALIAS FMT_DEPRECATED +#endif + +#ifndef FMT_BEGIN_NAMESPACE +# if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \ + FMT_MSC_VER >= 1900 +# define FMT_INLINE_NAMESPACE inline namespace +# define FMT_END_NAMESPACE \ + } \ + } +# else +# define FMT_INLINE_NAMESPACE namespace +# define FMT_END_NAMESPACE \ + } \ + using namespace v6; \ + } +# endif +# define FMT_BEGIN_NAMESPACE \ + namespace duckdb_fmt { \ + FMT_INLINE_NAMESPACE v6 { +#endif + +#if !defined(FMT_HEADER_ONLY) && defined(_WIN32) +# ifdef FMT_EXPORT +# define FMT_API __declspec(dllexport) +# elif defined(FMT_SHARED) +# define FMT_API __declspec(dllimport) +# define FMT_EXTERN_TEMPLATE_API FMT_API +# endif +#endif +#ifndef FMT_API +# define FMT_API +#endif +#ifndef FMT_EXTERN_TEMPLATE_API +# define FMT_EXTERN_TEMPLATE_API +#endif + +#ifndef FMT_HEADER_ONLY +# define FMT_EXTERN extern +#else +# define FMT_EXTERN +#endif + +// libc++ supports string_view in pre-c++17. +#if (FMT_HAS_INCLUDE() && \ + (__cplusplus > 201402L || defined(_LIBCPP_VERSION))) || \ + (defined(_MSVC_LANG) && _MSVC_LANG > 201402L && _MSC_VER >= 1910) +# include +# define FMT_USE_STRING_VIEW +#elif FMT_HAS_INCLUDE("experimental/string_view") && __cplusplus >= 201402L +# include +# define FMT_USE_EXPERIMENTAL_STRING_VIEW +#endif + +FMT_BEGIN_NAMESPACE + +// Implementations of enable_if_t and other types for pre-C++14 systems. +template +using enable_if_t = typename std::enable_if::type; +template +using conditional_t = typename std::conditional::type; +template using bool_constant = std::integral_constant; +template +using remove_reference_t = typename std::remove_reference::type; +template +using remove_const_t = typename std::remove_const::type; +template +using remove_cvref_t = typename std::remove_cv>::type; + +struct monostate {}; + +// An enable_if helper to be used in template parameters which results in much +// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed +// to workaround a bug in MSVC 2019 (see #1140 and #1186). +#define FMT_ENABLE_IF(...) enable_if_t<(__VA_ARGS__), int> = 0 + +namespace internal { + +// A workaround for gcc 4.8 to make void_t work in a SFINAE context. +template struct void_t_impl { using type = void; }; + +#ifndef FMT_ASSERT +#define FMT_ASSERT(condition, message) +#endif + +#if defined(FMT_USE_STRING_VIEW) +template using std_string_view = std::basic_string_view; +#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) +template +using std_string_view = std::experimental::basic_string_view; +#else +template struct std_string_view {}; +#endif + +#ifdef FMT_USE_INT128 +// Do nothing. +#elif defined(__SIZEOF_INT128__) +# define FMT_USE_INT128 1 +using int128_t = __int128_t; +using uint128_t = __uint128_t; +#else +# define FMT_USE_INT128 0 +#endif +#if !FMT_USE_INT128 +struct int128_t {}; +struct uint128_t {}; +#endif + +// Casts a nonnegative integer to unsigned. +template +FMT_CONSTEXPR typename std::make_unsigned::type to_unsigned(Int value) { + FMT_ASSERT(value >= 0, "negative value"); + return static_cast::type>(value); +} +} // namespace internal + +template +using void_t = typename internal::void_t_impl::type; + +/** + An implementation of ``std::basic_string_view`` for pre-C++17. It provides a + subset of the API. ``fmt::basic_string_view`` is used for format strings even + if ``std::string_view`` is available to prevent issues when a library is + compiled with a different ``-std`` option than the client code (which is not + recommended). + */ +template class basic_string_view { + private: + const Char* data_; + size_t size_; + + public: + using char_type = Char; + using iterator = const Char*; + + FMT_CONSTEXPR basic_string_view() FMT_NOEXCEPT : data_(nullptr), size_(0) {} + + /** Constructs a string reference object from a C string and a size. */ + FMT_CONSTEXPR basic_string_view(const Char* s, size_t count) FMT_NOEXCEPT + : data_(s), + size_(count) {} + + /** + \rst + Constructs a string reference object from a C string computing + the size with ``std::char_traits::length``. + \endrst + */ + basic_string_view(const Char* s) + : data_(s), size_(std::char_traits::length(s)) {} + + /** Constructs a string reference from a ``std::basic_string`` object. */ + template + FMT_CONSTEXPR basic_string_view( + const std::basic_string& s) FMT_NOEXCEPT + : data_(s.data()), + size_(s.size()) {} + + template < + typename S, + FMT_ENABLE_IF(std::is_same>::value)> + FMT_CONSTEXPR basic_string_view(S s) FMT_NOEXCEPT : data_(s.data()), + size_(s.size()) {} + + /** Returns a pointer to the string data. */ + FMT_CONSTEXPR const Char* data() const { return data_; } + + /** Returns the string size. */ + FMT_CONSTEXPR size_t size() const { return size_; } + + FMT_CONSTEXPR iterator begin() const { return data_; } + FMT_CONSTEXPR iterator end() const { return data_ + size_; } + + FMT_CONSTEXPR const Char& operator[](size_t pos) const { return data_[pos]; } + + FMT_CONSTEXPR void remove_prefix(size_t n) { + data_ += n; + size_ -= n; + } + + // Lexicographically compare this string reference to other. + int compare(basic_string_view other) const { + size_t str_size = size_ < other.size_ ? size_ : other.size_; + int result = std::char_traits::compare(data_, other.data_, str_size); + if (result == 0) + result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); + return result; + } + + friend bool operator==(basic_string_view lhs, basic_string_view rhs) { + return lhs.compare(rhs) == 0; + } + friend bool operator!=(basic_string_view lhs, basic_string_view rhs) { + return lhs.compare(rhs) != 0; + } + friend bool operator<(basic_string_view lhs, basic_string_view rhs) { + return lhs.compare(rhs) < 0; + } + friend bool operator<=(basic_string_view lhs, basic_string_view rhs) { + return lhs.compare(rhs) <= 0; + } + friend bool operator>(basic_string_view lhs, basic_string_view rhs) { + return lhs.compare(rhs) > 0; + } + friend bool operator>=(basic_string_view lhs, basic_string_view rhs) { + return lhs.compare(rhs) >= 0; + } +}; + +using string_view = basic_string_view; +using wstring_view = basic_string_view; + +#ifndef __cpp_char8_t +// A UTF-8 code unit type. +enum char8_t : unsigned char {}; +#endif + +/** Specifies if ``T`` is a character type. Can be specialized by users. */ +template struct is_char : std::false_type {}; +template <> struct is_char : std::true_type {}; +template <> struct is_char : std::true_type {}; +template <> struct is_char : std::true_type {}; +template <> struct is_char : std::true_type {}; +template <> struct is_char : std::true_type {}; + +/** + \rst + Returns a string view of `s`. In order to add custom string type support to + {fmt} provide an overload of `to_string_view` for it in the same namespace as + the type for the argument-dependent lookup to work. + + **Example**:: + + namespace my_ns { + inline string_view to_string_view(const my_string& s) { + return {s.data(), s.length()}; + } + } + std::string message = fmt::format(my_string("The answer is {}"), 42); + \endrst + */ +template ::value)> +inline basic_string_view to_string_view(const Char* s) { + return s; +} + +template +inline basic_string_view to_string_view( + const std::basic_string& s) { + return s; +} + +template +inline basic_string_view to_string_view(basic_string_view s) { + return s; +} + +template >::value)> +inline basic_string_view to_string_view( + internal::std_string_view s) { + return s; +} + +// A base class for compile-time strings. It is defined in the fmt namespace to +// make formatting functions visible via ADL, e.g. format(fmt("{}"), 42). +struct compile_string {}; + +template +struct is_compile_string : std::is_base_of {}; + +template ::value)> +constexpr basic_string_view to_string_view(const S& s) { + return s; +} + +namespace internal { +void to_string_view(...); +using duckdb_fmt::v6::to_string_view; + +// Specifies whether S is a string type convertible to fmt::basic_string_view. +// It should be a constexpr function but MSVC 2017 fails to compile it in +// enable_if and MSVC 2015 fails to compile it as an alias template. +template +struct is_string : std::is_class()))> { +}; + +template struct char_t_impl {}; +template struct char_t_impl::value>> { + using result = decltype(to_string_view(std::declval())); + using type = typename result::char_type; +}; + +struct error_handler { + FMT_CONSTEXPR error_handler() = default; + FMT_CONSTEXPR error_handler(const error_handler&) = default; + + // This function is intentionally not constexpr to give a compile-time error. + FMT_NORETURN FMT_API void on_error(const char* message); +}; +} // namespace internal + +/** String's character type. */ +template using char_t = typename internal::char_t_impl::type; + +/** + \rst + Parsing context consisting of a format string range being parsed and an + argument counter for automatic indexing. + + You can use one of the following type aliases for common character types: + + +-----------------------+-------------------------------------+ + | Type | Definition | + +=======================+=====================================+ + | format_parse_context | basic_format_parse_context | + +-----------------------+-------------------------------------+ + | wformat_parse_context | basic_format_parse_context | + +-----------------------+-------------------------------------+ + \endrst + */ +template +class basic_format_parse_context : private ErrorHandler { + private: + basic_string_view format_str_; + int next_arg_id_; + + public: + using char_type = Char; + using iterator = typename basic_string_view::iterator; + + explicit FMT_CONSTEXPR basic_format_parse_context( + basic_string_view format_str, ErrorHandler eh = ErrorHandler()) + : ErrorHandler(eh), format_str_(format_str), next_arg_id_(0) {} + + /** + Returns an iterator to the beginning of the format string range being + parsed. + */ + FMT_CONSTEXPR iterator begin() const FMT_NOEXCEPT { + return format_str_.begin(); + } + + /** + Returns an iterator past the end of the format string range being parsed. + */ + FMT_CONSTEXPR iterator end() const FMT_NOEXCEPT { return format_str_.end(); } + + /** Advances the begin iterator to ``it``. */ + FMT_CONSTEXPR void advance_to(iterator it) { + format_str_.remove_prefix(internal::to_unsigned(it - begin())); + } + + /** + Reports an error if using the manual argument indexing; otherwise returns + the next argument index and switches to the automatic indexing. + */ + FMT_CONSTEXPR int next_arg_id() { + if (next_arg_id_ >= 0) return next_arg_id_++; + on_error("cannot switch from manual to automatic argument indexing"); + return 0; + } + + /** + Reports an error if using the automatic argument indexing; otherwise + switches to the manual indexing. + */ + FMT_CONSTEXPR void check_arg_id(int) { + if (next_arg_id_ > 0) + on_error("cannot switch from automatic to manual argument indexing"); + else + next_arg_id_ = -1; + } + + FMT_CONSTEXPR void check_arg_id(basic_string_view) {} + + FMT_CONSTEXPR void on_error(const char* message) { + ErrorHandler::on_error(message); + } + + FMT_CONSTEXPR ErrorHandler error_handler() const { return *this; } +}; + +using format_parse_context = basic_format_parse_context; +using wformat_parse_context = basic_format_parse_context; + +template +using basic_parse_context FMT_DEPRECATED_ALIAS = + basic_format_parse_context; +using parse_context FMT_DEPRECATED_ALIAS = basic_format_parse_context; +using wparse_context FMT_DEPRECATED_ALIAS = basic_format_parse_context; + +template class basic_format_arg; +template class basic_format_args; + +// A formatter for objects of type T. +template +struct formatter { + // A deleted default constructor indicates a disabled formatter. + formatter() = delete; +}; + +template +struct FMT_DEPRECATED convert_to_int + : bool_constant::value && + std::is_convertible::value> {}; + +// Specifies if T has an enabled formatter specialization. A type can be +// formattable even if it doesn't have a formatter e.g. via a conversion. +template +using has_formatter = + std::is_constructible>; + +namespace internal { + +/** A contiguous memory buffer with an optional growing ability. */ +template class buffer { + private: + T* ptr_; + std::size_t size_; + std::size_t capacity_; + + protected: + // Don't initialize ptr_ since it is not accessed to save a few cycles. + buffer(std::size_t sz) FMT_NOEXCEPT : size_(sz), capacity_(sz) {} + + buffer(T* p = nullptr, std::size_t sz = 0, std::size_t cap = 0) FMT_NOEXCEPT + : ptr_(p), + size_(sz), + capacity_(cap) {} + + /** Sets the buffer data and capacity. */ + void set(T* buf_data, std::size_t buf_capacity) FMT_NOEXCEPT { + ptr_ = buf_data; + capacity_ = buf_capacity; + } + + /** Increases the buffer capacity to hold at least *capacity* elements. */ + virtual void grow(std::size_t capacity) = 0; + + public: + using value_type = T; + using const_reference = const T&; + + buffer(const buffer&) = delete; + void operator=(const buffer&) = delete; + virtual ~buffer() = default; + + T* begin() FMT_NOEXCEPT { return ptr_; } + T* end() FMT_NOEXCEPT { return ptr_ + size_; } + + /** Returns the size of this buffer. */ + std::size_t size() const FMT_NOEXCEPT { return size_; } + + /** Returns the capacity of this buffer. */ + std::size_t capacity() const FMT_NOEXCEPT { return capacity_; } + + /** Returns a pointer to the buffer data. */ + T* data() FMT_NOEXCEPT { return ptr_; } + + /** Returns a pointer to the buffer data. */ + const T* data() const FMT_NOEXCEPT { return ptr_; } + + /** + Resizes the buffer. If T is a POD type new elements may not be initialized. + */ + void resize(std::size_t new_size) { + reserve(new_size); + size_ = new_size; + } + + /** Clears this buffer. */ + void clear() { size_ = 0; } + + /** Reserves space to store at least *capacity* elements. */ + void reserve(std::size_t new_capacity) { + if (new_capacity > capacity_) grow(new_capacity); + } + + void push_back(const T& value) { + reserve(size_ + 1); + ptr_[size_++] = value; + } + + /** Appends data to the end of the buffer. */ + template void append(const U* begin, const U* end); + + T& operator[](std::size_t index) { return ptr_[index]; } + const T& operator[](std::size_t index) const { return ptr_[index]; } +}; + +// A container-backed buffer. +template +class container_buffer : public buffer { + private: + Container& container_; + + protected: + void grow(std::size_t capacity) FMT_OVERRIDE { + container_.resize(capacity); + this->set(&container_[0], capacity); + } + + public: + explicit container_buffer(Container& c) + : buffer(c.size()), container_(c) {} +}; + +// Extracts a reference to the container from back_insert_iterator. +template +inline Container& get_container(std::back_insert_iterator it) { + using bi_iterator = std::back_insert_iterator; + struct accessor : bi_iterator { + accessor(bi_iterator iter) : bi_iterator(iter) {} + using bi_iterator::container; + }; + return *accessor(it).container; +} + +template +struct fallback_formatter { + fallback_formatter() = delete; +}; + +// Specifies if T has an enabled fallback_formatter specialization. +template +using has_fallback_formatter = + std::is_constructible>; + +template struct named_arg_base; +template struct named_arg; + +enum type { + none_type, + named_arg_type, + // Integer types should go first, + int_type, + uint_type, + long_long_type, + ulong_long_type, + int128_type, + uint128_type, + bool_type, + char_type, + last_integer_type = char_type, + // followed by floating-point types. + float_type, + double_type, + long_double_type, + last_numeric_type = long_double_type, + cstring_type, + string_type, + pointer_type, + custom_type +}; + +// Maps core type T to the corresponding type enum constant. +template +struct type_constant : std::integral_constant {}; + +#define FMT_TYPE_CONSTANT(Type, constant) \ + template \ + struct type_constant : std::integral_constant {} + +FMT_TYPE_CONSTANT(const named_arg_base&, named_arg_type); +FMT_TYPE_CONSTANT(int, int_type); +FMT_TYPE_CONSTANT(unsigned, uint_type); +FMT_TYPE_CONSTANT(long long, long_long_type); +FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type); +FMT_TYPE_CONSTANT(int128_t, int128_type); +FMT_TYPE_CONSTANT(uint128_t, uint128_type); +FMT_TYPE_CONSTANT(bool, bool_type); +FMT_TYPE_CONSTANT(Char, char_type); +FMT_TYPE_CONSTANT(float, float_type); +FMT_TYPE_CONSTANT(double, double_type); +FMT_TYPE_CONSTANT(long double, long_double_type); +FMT_TYPE_CONSTANT(const Char*, cstring_type); +FMT_TYPE_CONSTANT(basic_string_view, string_type); +FMT_TYPE_CONSTANT(const void*, pointer_type); + +FMT_CONSTEXPR bool is_integral_type(type t) { + FMT_ASSERT(t != named_arg_type, "invalid argument type"); + return t > none_type && t <= last_integer_type; +} + +FMT_CONSTEXPR bool is_arithmetic_type(type t) { + FMT_ASSERT(t != named_arg_type, "invalid argument type"); + return t > none_type && t <= last_numeric_type; +} + +template struct string_value { + const Char* data; + std::size_t size; +}; + +template struct custom_value { + using parse_context = basic_format_parse_context; + const void* value; + void (*format)(const void* arg, parse_context& parse_ctx, Context& ctx); +}; + +// A formatting argument value. +template class value { + public: + using char_type = typename Context::char_type; + + union { + int int_value; + unsigned uint_value; + long long long_long_value; + unsigned long long ulong_long_value; + int128_t int128_value; + uint128_t uint128_value; + bool bool_value; + char_type char_value; + float float_value; + double double_value; + long double long_double_value; + const void* pointer; + string_value string; + custom_value custom; + const named_arg_base* named_arg; + }; + + FMT_CONSTEXPR value(int val = 0) : int_value(val) {} + FMT_CONSTEXPR value(unsigned val) : uint_value(val) {} + value(long long val) : long_long_value(val) {} + value(unsigned long long val) : ulong_long_value(val) {} + value(int128_t val) : int128_value(val) {} + value(uint128_t val) : uint128_value(val) {} + value(float val) : float_value(val) {} + value(double val) : double_value(val) {} + value(long double val) : long_double_value(val) {} + value(bool val) : bool_value(val) {} + value(char_type val) : char_value(val) {} + value(const char_type* val) { string.data = val; } + value(basic_string_view val) { + string.data = val.data(); + string.size = val.size(); + } + value(const void* val) : pointer(val) {} + + template value(const T& val) { + custom.value = &val; + // Get the formatter type through the context to allow different contexts + // have different extension points, e.g. `formatter` for `format` and + // `printf_formatter` for `printf`. + custom.format = format_custom_arg< + T, conditional_t::value, + typename Context::template formatter_type, + fallback_formatter>>; + } + + value(const named_arg_base& val) { named_arg = &val; } + + private: + // Formats an argument of a custom type, such as a user-defined class. + template + static void format_custom_arg( + const void* arg, basic_format_parse_context& parse_ctx, + Context& ctx) { + Formatter f; + parse_ctx.advance_to(f.parse(parse_ctx)); + ctx.advance_to(f.format(*static_cast(arg), ctx)); + } +}; + +template +FMT_CONSTEXPR basic_format_arg make_arg(const T& value); + +// To minimize the number of types we need to deal with, long is translated +// either to int or to long long depending on its size. +enum { long_short = sizeof(long) == sizeof(int) }; +using long_type = conditional_t; +using ulong_type = conditional_t; + +// Maps formatting arguments to core types. +template struct arg_mapper { + using char_type = typename Context::char_type; + + FMT_CONSTEXPR int map(signed char val) { return val; } + FMT_CONSTEXPR unsigned map(unsigned char val) { return val; } + FMT_CONSTEXPR int map(short val) { return val; } + FMT_CONSTEXPR unsigned map(unsigned short val) { return val; } + FMT_CONSTEXPR int map(int val) { return val; } + FMT_CONSTEXPR unsigned map(unsigned val) { return val; } + FMT_CONSTEXPR long_type map(long val) { return val; } + FMT_CONSTEXPR ulong_type map(unsigned long val) { return val; } + FMT_CONSTEXPR long long map(long long val) { return val; } + FMT_CONSTEXPR unsigned long long map(unsigned long long val) { return val; } + FMT_CONSTEXPR int128_t map(int128_t val) { return val; } + FMT_CONSTEXPR uint128_t map(uint128_t val) { return val; } + FMT_CONSTEXPR bool map(bool val) { return val; } + + template ::value)> + FMT_CONSTEXPR char_type map(T val) { + static_assert( + std::is_same::value || std::is_same::value, + "mixing character types is disallowed"); + return val; + } + + FMT_CONSTEXPR float map(float val) { return val; } + FMT_CONSTEXPR double map(double val) { return val; } + FMT_CONSTEXPR long double map(long double val) { return val; } + + FMT_CONSTEXPR const char_type* map(char_type* val) { return val; } + FMT_CONSTEXPR const char_type* map(const char_type* val) { return val; } + template ::value)> + FMT_CONSTEXPR basic_string_view map(const T& val) { + static_assert(std::is_same>::value, + "mixing character types is disallowed"); + return to_string_view(val); + } + template , T>::value && + !is_string::value)> + FMT_CONSTEXPR basic_string_view map(const T& val) { + return basic_string_view(val); + } + template < + typename T, + FMT_ENABLE_IF( + std::is_constructible, T>::value && + !std::is_constructible, T>::value && + !is_string::value && !has_formatter::value)> + FMT_CONSTEXPR basic_string_view map(const T& val) { + return std_string_view(val); + } + FMT_CONSTEXPR const char* map(const signed char* val) { + static_assert(std::is_same::value, "invalid string type"); + return reinterpret_cast(val); + } + FMT_CONSTEXPR const char* map(const unsigned char* val) { + static_assert(std::is_same::value, "invalid string type"); + return reinterpret_cast(val); + } + + FMT_CONSTEXPR const void* map(void* val) { return val; } + FMT_CONSTEXPR const void* map(const void* val) { return val; } + FMT_CONSTEXPR const void* map(std::nullptr_t val) { return val; } + template FMT_CONSTEXPR int map(const T*) { + // Formatting of arbitrary pointers is disallowed. If you want to output + // a pointer cast it to "void *" or "const void *". In particular, this + // forbids formatting of "[const] volatile char *" which is printed as bool + // by iostreams. + static_assert(!sizeof(T), "formatting of non-void pointers is disallowed"); + return 0; + } + + template ::value && + !has_formatter::value && + !has_fallback_formatter::value)> + FMT_CONSTEXPR auto map(const T& val) -> decltype( + map(static_cast::type>(val))) { + return map(static_cast::type>(val)); + } + template < + typename T, + FMT_ENABLE_IF( + !is_string::value && !is_char::value && + !std::is_constructible, T>::value && + (has_formatter::value || + (has_fallback_formatter::value && + !std::is_constructible, T>::value)))> + FMT_CONSTEXPR const T& map(const T& val) { + return val; + } + + template + FMT_CONSTEXPR const named_arg_base& map( + const named_arg& val) { + auto arg = make_arg(val.value); + std::memcpy(val.data, &arg, sizeof(arg)); + return val; + } +}; + +// A type constant after applying arg_mapper. +template +using mapped_type_constant = + type_constant().map(std::declval())), + typename Context::char_type>; + +enum { packed_arg_bits = 5 }; +// Maximum number of arguments with packed types. +enum { max_packed_args = 63 / packed_arg_bits }; +enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; + +template class arg_map; +} // namespace internal + +// A formatting argument. It is a trivially copyable/constructible type to +// allow storage in basic_memory_buffer. +template class basic_format_arg { + private: + internal::value value_; + internal::type type_; + + template + friend FMT_CONSTEXPR basic_format_arg internal::make_arg( + const T& value); + + template + friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, + const basic_format_arg& arg) + -> decltype(vis(0)); + + friend class basic_format_args; + friend class internal::arg_map; + + using char_type = typename Context::char_type; + + public: + class handle { + public: + explicit handle(internal::custom_value custom) : custom_(custom) {} + + void format(basic_format_parse_context& parse_ctx, + Context& ctx) const { + custom_.format(custom_.value, parse_ctx, ctx); + } + + private: + internal::custom_value custom_; + }; + + FMT_CONSTEXPR basic_format_arg() : type_(internal::none_type) {} + + FMT_CONSTEXPR explicit operator bool() const FMT_NOEXCEPT { + return type_ != internal::none_type; + } + + internal::type type() const { return type_; } + + bool is_integral() const { return internal::is_integral_type(type_); } + bool is_arithmetic() const { return internal::is_arithmetic_type(type_); } +}; + +/** + \rst + Visits an argument dispatching to the appropriate visit method based on + the argument type. For example, if the argument type is ``double`` then + ``vis(value)`` will be called with the value of type ``double``. + \endrst + */ +template +FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, + const basic_format_arg& arg) + -> decltype(vis(0)) { + using char_type = typename Context::char_type; + switch (arg.type_) { + case internal::none_type: + break; + case internal::named_arg_type: + FMT_ASSERT(false, "invalid argument type"); + break; + case internal::int_type: + return vis(arg.value_.int_value); + case internal::uint_type: + return vis(arg.value_.uint_value); + case internal::long_long_type: + return vis(arg.value_.long_long_value); + case internal::ulong_long_type: + return vis(arg.value_.ulong_long_value); +#if FMT_USE_INT128 + case internal::int128_type: + return vis(arg.value_.int128_value); + case internal::uint128_type: + return vis(arg.value_.uint128_value); +#else + case internal::int128_type: + case internal::uint128_type: + break; +#endif + case internal::bool_type: + return vis(arg.value_.bool_value); + case internal::char_type: + return vis(arg.value_.char_value); + case internal::float_type: + return vis(arg.value_.float_value); + case internal::double_type: + return vis(arg.value_.double_value); + case internal::long_double_type: + return vis(arg.value_.long_double_value); + case internal::cstring_type: + return vis(arg.value_.string.data); + case internal::string_type: + return vis(basic_string_view(arg.value_.string.data, + arg.value_.string.size)); + case internal::pointer_type: + return vis(arg.value_.pointer); + case internal::custom_type: + return vis(typename basic_format_arg::handle(arg.value_.custom)); + } + return vis(monostate()); +} + +namespace internal { +// A map from argument names to their values for named arguments. +template class arg_map { + private: + using char_type = typename Context::char_type; + + struct entry { + basic_string_view name; + basic_format_arg arg; + }; + + entry* map_; + unsigned size_; + + void push_back(value val) { + const auto& named = *val.named_arg; + map_[size_] = {named.name, named.template deserialize()}; + ++size_; + } + + public: + arg_map(const arg_map&) = delete; + void operator=(const arg_map&) = delete; + arg_map() : map_(nullptr), size_(0) {} + void init(const basic_format_args& args); + ~arg_map() { delete[] map_; } + + basic_format_arg find(basic_string_view name) const { + // The list is unsorted, so just return the first matching name. + for (entry *it = map_, *end = map_ + size_; it != end; ++it) { + if (it->name == name) return it->arg; + } + return {}; + } +}; + +// A type-erased reference to an std::locale to avoid heavy include. +class locale_ref { + private: + const void* locale_; // A type-erased pointer to std::locale. + + public: + locale_ref() : locale_(nullptr) {} + template explicit locale_ref(const Locale& loc); + + explicit operator bool() const FMT_NOEXCEPT { return locale_ != nullptr; } + + template Locale get() const; +}; + +template constexpr unsigned long long encode_types() { return 0; } + +template +constexpr unsigned long long encode_types() { + return mapped_type_constant::value | + (encode_types() << packed_arg_bits); +} + +template +FMT_CONSTEXPR basic_format_arg make_arg(const T& value) { + basic_format_arg arg; + arg.type_ = mapped_type_constant::value; + arg.value_ = arg_mapper().map(value); + return arg; +} + +template +inline value make_arg(const T& val) { + return arg_mapper().map(val); +} + +template +inline basic_format_arg make_arg(const T& value) { + return make_arg(value); +} +} // namespace internal + +// Formatting context. +template class basic_format_context { + public: + /** The character type for the output. */ + using char_type = Char; + + private: + OutputIt out_; + basic_format_args args_; + internal::arg_map map_; + internal::locale_ref loc_; + + public: + using iterator = OutputIt; + using format_arg = basic_format_arg; + template using formatter_type = formatter; + + basic_format_context(const basic_format_context&) = delete; + void operator=(const basic_format_context&) = delete; + /** + Constructs a ``basic_format_context`` object. References to the arguments are + stored in the object so make sure they have appropriate lifetimes. + */ + basic_format_context(OutputIt out, + basic_format_args ctx_args, + internal::locale_ref loc = internal::locale_ref()) + : out_(out), args_(ctx_args), loc_(loc) {} + + format_arg arg(int id) const { return args_.get(id); } + + // Checks if manual indexing is used and returns the argument with the + // specified name. + format_arg arg(basic_string_view name); + + internal::error_handler error_handler() { return {}; } + void on_error(const char* message) { error_handler().on_error(message); } + + // Returns an iterator to the beginning of the output range. + iterator out() { return out_; } + + // Advances the begin iterator to ``it``. + void advance_to(iterator it) { out_ = it; } + + internal::locale_ref locale() { return loc_; } +}; + +template +using buffer_context = + basic_format_context>, + Char>; +using format_context = buffer_context; +using wformat_context = buffer_context; + +/** + \rst + An array of references to arguments. It can be implicitly converted into + `~fmt::basic_format_args` for passing into type-erased formatting functions + such as `~fmt::vformat`. + \endrst + */ +template class format_arg_store { + private: + static const size_t num_args = sizeof...(Args); + static const bool is_packed = num_args < internal::max_packed_args; + + using value_type = conditional_t, + basic_format_arg>; + + // If the arguments are not packed, add one more element to mark the end. + value_type data_[num_args + (num_args == 0 ? 1 : 0)]; + + friend class basic_format_args; + + public: + static constexpr unsigned long long types = + is_packed ? internal::encode_types() + : internal::is_unpacked_bit | num_args; + + format_arg_store(const Args&... args) + : data_{internal::make_arg(args)...} {} +}; + +/** + \rst + Constructs an `~fmt::format_arg_store` object that contains references to + arguments and can be implicitly converted to `~fmt::format_args`. `Context` + can be omitted in which case it defaults to `~fmt::context`. + See `~fmt::arg` for lifetime considerations. + \endrst + */ +template +inline format_arg_store make_format_args( + const Args&... args) { + return {args...}; +} + +/** Formatting arguments. */ +template class basic_format_args { + public: + using size_type = int; + using format_arg = basic_format_arg; + + private: + // To reduce compiled code size per formatting function call, types of first + // max_packed_args arguments are passed in the types_ field. + unsigned long long types_; + union { + // If the number of arguments is less than max_packed_args, the argument + // values are stored in values_, otherwise they are stored in args_. + // This is done to reduce compiled code size as storing larger objects + // may require more code (at least on x86-64) even if the same amount of + // data is actually copied to stack. It saves ~10% on the bloat test. + const internal::value* values_; + const format_arg* args_; + }; + + bool is_packed() const { return (types_ & internal::is_unpacked_bit) == 0; } + + internal::type type(int index) const { + int shift = index * internal::packed_arg_bits; + unsigned int mask = (1 << internal::packed_arg_bits) - 1; + return static_cast((types_ >> shift) & mask); + } + + friend class internal::arg_map; + + void set_data(const internal::value* values) { values_ = values; } + void set_data(const format_arg* args) { args_ = args; } + + format_arg do_get(int index) const { + format_arg arg; + if (!is_packed()) { + auto num_args = max_size(); + if (index < num_args) arg = args_[index]; + return arg; + } + if (index > internal::max_packed_args) return arg; + arg.type_ = type(index); + if (arg.type_ == internal::none_type) return arg; + internal::value& val = arg.value_; + val = values_[index]; + return arg; + } + + public: + basic_format_args() : types_(0) {} + + /** + \rst + Constructs a `basic_format_args` object from `~fmt::format_arg_store`. + \endrst + */ + template + basic_format_args(const format_arg_store& store) + : types_(store.types) { + set_data(store.data_); + } + + /** + \rst + Constructs a `basic_format_args` object from a dynamic set of arguments. + \endrst + */ + basic_format_args(const format_arg* args, int count) + : types_(internal::is_unpacked_bit | internal::to_unsigned(count)) { + set_data(args); + } + + /** Returns the argument at specified index. */ + format_arg get(int index) const { + format_arg arg = do_get(index); + if (arg.type_ == internal::named_arg_type) + arg = arg.value_.named_arg->template deserialize(); + return arg; + } + + int max_size() const { + unsigned long long max_packed = internal::max_packed_args; + return static_cast(is_packed() ? max_packed + : types_ & ~internal::is_unpacked_bit); + } +}; + +/** An alias to ``basic_format_args``. */ +// It is a separate type rather than an alias to make symbols readable. +struct format_args : basic_format_args { + template + format_args(Args&&... args) + : basic_format_args(std::forward(args)...) {} +}; +struct wformat_args : basic_format_args { + template + wformat_args(Args&&... args) + : basic_format_args(std::forward(args)...) {} +}; + +template struct is_contiguous : std::false_type {}; + +template +struct is_contiguous> : std::true_type {}; + +template +struct is_contiguous> : std::true_type {}; + +namespace internal { + +template +struct is_contiguous_back_insert_iterator : std::false_type {}; +template +struct is_contiguous_back_insert_iterator> + : is_contiguous {}; + +template struct named_arg_base { + basic_string_view name; + + // Serialized value. + mutable char data[sizeof(basic_format_arg>)]; + + named_arg_base(basic_string_view nm) : name(nm) {} + + template basic_format_arg deserialize() const { + basic_format_arg arg; + std::memcpy(&arg, data, sizeof(basic_format_arg)); + return arg; + } +}; + +template struct named_arg : named_arg_base { + const T& value; + + named_arg(basic_string_view name, const T& val) + : named_arg_base(name), value(val) {} +}; + +template ::value)> +inline void check_format_string(const S&) { +#if defined(FMT_ENFORCE_COMPILE_STRING) + static_assert(is_compile_string::value, + "FMT_ENFORCE_COMPILE_STRING requires all format strings to " + "utilize FMT_STRING() or fmt()."); +#endif +} +template ::value)> +void check_format_string(S); + +struct view {}; +template struct bool_pack; +template +using all_true = + std::is_same, bool_pack>; + +template > +inline format_arg_store, remove_reference_t...> +make_args_checked(const S& format_str, + const remove_reference_t&... args) { + static_assert(all_true<(!std::is_base_of>() || + !std::is_reference())...>::value, + "passing views as lvalues is disallowed"); + check_format_string>...>(format_str); + return {args...}; +} + +template +std::basic_string vformat(basic_string_view format_str, + basic_format_args> args); + +template +typename buffer_context::iterator vformat_to( + buffer& buf, basic_string_view format_str, + basic_format_args> args); +} // namespace internal + +/** + \rst + Returns a named argument to be used in a formatting function. + + The named argument holds a reference and does not extend the lifetime + of its arguments. + Consequently, a dangling reference can accidentally be created. + The user should take care to only pass this function temporaries when + the named argument is itself a temporary, as per the following example. + + **Example**:: + + fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23)); + \endrst + */ +template > +inline internal::named_arg arg(const S& name, const T& arg) { + static_assert(internal::is_string::value, ""); + return {name, arg}; +} + +// Disable nested named arguments, e.g. ``arg("a", arg("b", 42))``. +template +void arg(S, internal::named_arg) = delete; + +/** Formats a string and writes the output to ``out``. */ +// GCC 8 and earlier cannot handle std::back_insert_iterator with +// vformat_to(...) overload, so SFINAE on iterator type instead. +template , + FMT_ENABLE_IF( + internal::is_contiguous_back_insert_iterator::value)> +OutputIt vformat_to(OutputIt out, const S& format_str, + basic_format_args> args) { + using container = remove_reference_t; + internal::container_buffer buf((internal::get_container(out))); + internal::vformat_to(buf, to_string_view(format_str), args); + return out; +} + +template ::value&& internal::is_string::value)> +inline std::back_insert_iterator format_to( + std::back_insert_iterator out, const S& format_str, + Args&&... args) { + return vformat_to( + out, to_string_view(format_str), + {internal::make_args_checked(format_str, args...)}); +} + +template > +inline std::basic_string vformat( + const S& format_str, basic_format_args> args) { + return internal::vformat(to_string_view(format_str), args); +} + +/** + \rst + Formats arguments and returns the result as a string. + + **Example**:: + + #include + std::string message = fmt::format("The answer is {}", 42); + \endrst +*/ +// Pass char_t as a default template parameter instead of using +// std::basic_string> to reduce the symbol size. +template > +inline std::basic_string format(const S& format_str, Args&&... args) { + return internal::vformat( + to_string_view(format_str), + {internal::make_args_checked(format_str, args...)}); +} + +FMT_END_NAMESPACE + +#endif // FMT_CORE_H_ + + +// LICENSE_CHANGE_END + + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __clang__ +# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) +#else +# define FMT_CLANG_VERSION 0 +#endif + +#ifdef __INTEL_COMPILER +# define FMT_ICC_VERSION __INTEL_COMPILER +#elif defined(__ICL) +# define FMT_ICC_VERSION __ICL +#else +# define FMT_ICC_VERSION 0 +#endif + +#ifdef __NVCC__ +# define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__) +#else +# define FMT_CUDA_VERSION 0 +#endif + +#ifdef __has_builtin +# define FMT_HAS_BUILTIN(x) __has_builtin(x) +#else +# define FMT_HAS_BUILTIN(x) 0 +#endif + +#if FMT_HAS_CPP_ATTRIBUTE(fallthrough) && \ + (__cplusplus >= 201703 || FMT_GCC_VERSION != 0) +# define FMT_FALLTHROUGH [[fallthrough]] +#else +# define FMT_FALLTHROUGH +#endif + +#ifndef FMT_THROW +# if FMT_EXCEPTIONS +# if FMT_MSC_VER +FMT_BEGIN_NAMESPACE +namespace internal { +template inline void do_throw(const Exception& x) { + // Silence unreachable code warnings in MSVC because these are nearly + // impossible to fix in a generic code. + volatile bool b = true; + if (b) throw x; +} +} // namespace internal +FMT_END_NAMESPACE +# define FMT_THROW(x) internal::do_throw(x) +# else +# define FMT_THROW(x) throw x +# endif +# else +# define FMT_THROW(x) \ + do { \ + static_cast(sizeof(x)); \ + FMT_ASSERT(false, ""); \ + } while (false) +# endif +#endif + +#ifndef FMT_USE_USER_DEFINED_LITERALS +// For Intel and NVIDIA compilers both they and the system gcc/msc support UDLs. +# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ + FMT_MSC_VER >= 1900) && \ + (!(FMT_ICC_VERSION || FMT_CUDA_VERSION) || FMT_ICC_VERSION >= 1500 || \ + FMT_CUDA_VERSION >= 700) +# define FMT_USE_USER_DEFINED_LITERALS 1 +# else +# define FMT_USE_USER_DEFINED_LITERALS 0 +# endif +#endif + +#ifndef FMT_USE_UDL_TEMPLATE +#define FMT_USE_UDL_TEMPLATE 0 +#endif + +// __builtin_clz is broken in clang with Microsoft CodeGen: +// https://github.com/fmtlib/fmt/issues/519 +#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER +# define FMT_BUILTIN_CLZ(n) __builtin_clz(n) +#endif +#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER +# define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n) +#endif + +// Some compilers masquerade as both MSVC and GCC-likes or otherwise support +// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the +// MSVC intrinsics if the clz and clzll builtins are not available. +#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED) +# include // _BitScanReverse, _BitScanReverse64 + +FMT_BEGIN_NAMESPACE +namespace internal { +// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning. +# ifndef __clang__ +# pragma intrinsic(_BitScanReverse) +# endif +inline uint32_t clz(uint32_t x) { + unsigned long r = 0; + _BitScanReverse(&r, x); + + FMT_ASSERT(x != 0, ""); + // Static analysis complains about using uninitialized data + // "r", but the only way that can happen is if "x" is 0, + // which the callers guarantee to not happen. +# pragma warning(suppress : 6102) + return 31 - r; +} +# define FMT_BUILTIN_CLZ(n) internal::clz(n) + +# if defined(_WIN64) && !defined(__clang__) +# pragma intrinsic(_BitScanReverse64) +# endif + +inline uint32_t clzll(uint64_t x) { + unsigned long r = 0; +# ifdef _WIN64 + _BitScanReverse64(&r, x); +# else + // Scan the high 32 bits. + if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 - (r + 32); + + // Scan the low 32 bits. + _BitScanReverse(&r, static_cast(x)); +# endif + + FMT_ASSERT(x != 0, ""); + // Static analysis complains about using uninitialized data + // "r", but the only way that can happen is if "x" is 0, + // which the callers guarantee to not happen. +# pragma warning(suppress : 6102) + return 63 - r; +} +# define FMT_BUILTIN_CLZLL(n) internal::clzll(n) +} // namespace internal +FMT_END_NAMESPACE +#endif + +// Enable the deprecated numeric alignment. +#ifndef FMT_NUMERIC_ALIGN +# define FMT_NUMERIC_ALIGN 1 +#endif + +// Enable the deprecated percent specifier. +#ifndef FMT_DEPRECATED_PERCENT +# define FMT_DEPRECATED_PERCENT 0 +#endif + +FMT_BEGIN_NAMESPACE +namespace internal { + +// A helper function to suppress bogus "conditional expression is constant" +// warnings. +template inline T const_check(T value) { return value; } + +// An equivalent of `*reinterpret_cast(&source)` that doesn't have +// undefined behavior (e.g. due to type aliasing). +// Example: uint64_t d = bit_cast(2.718); +template +inline Dest bit_cast(const Source& source) { + static_assert(sizeof(Dest) == sizeof(Source), "size mismatch"); + Dest dest; + std::memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +inline bool is_big_endian() { + auto u = 1u; + struct bytes { + char data[sizeof(u)]; + }; + return bit_cast(u).data[0] == 0; +} + +// A fallback implementation of uintptr_t for systems that lack it. +struct fallback_uintptr { + unsigned char value[sizeof(void*)]; + + fallback_uintptr() = default; + explicit fallback_uintptr(const void* p) { + *this = bit_cast(p); + if (is_big_endian()) { + for (size_t i = 0, j = sizeof(void*) - 1; i < j; ++i, --j) + std::swap(value[i], value[j]); + } + } +}; +#ifdef UINTPTR_MAX +using uintptr_t = ::uintptr_t; +inline uintptr_t to_uintptr(const void* p) { return bit_cast(p); } +#else +using uintptr_t = fallback_uintptr; +inline fallback_uintptr to_uintptr(const void* p) { + return fallback_uintptr(p); +} +#endif + +// Returns the largest possible value for type T. Same as +// std::numeric_limits::max() but shorter and not affected by the max macro. +template constexpr T max_value() { + return (std::numeric_limits::max)(); +} +template constexpr int num_bits() { + return std::numeric_limits::digits; +} +template <> constexpr int num_bits() { + return static_cast(sizeof(void*) * + std::numeric_limits::digits); +} + +// An approximation of iterator_t for pre-C++20 systems. +template +using iterator_t = decltype(std::begin(std::declval())); + +// Detect the iterator category of *any* given type in a SFINAE-friendly way. +// Unfortunately, older implementations of std::iterator_traits are not safe +// for use in a SFINAE-context. +template +struct iterator_category : std::false_type {}; + +template struct iterator_category { + using type = std::random_access_iterator_tag; +}; + +template +struct iterator_category> { + using type = typename It::iterator_category; +}; + +// Detect if *any* given type models the OutputIterator concept. +template class is_output_iterator { + // Check for mutability because all iterator categories derived from + // std::input_iterator_tag *may* also meet the requirements of an + // OutputIterator, thereby falling into the category of 'mutable iterators' + // [iterator.requirements.general] clause 4. The compiler reveals this + // property only at the point of *actually dereferencing* the iterator! + template + static decltype(*(std::declval())) test(std::input_iterator_tag); + template static char& test(std::output_iterator_tag); + template static const char& test(...); + + using type = decltype(test(typename iterator_category::type{})); + + public: + static const bool value = !std::is_const>::value; +}; + +// A workaround for std::string not having mutable data() until C++17. +template inline Char* get_data(std::basic_string& s) { + return &s[0]; +} +template +inline typename Container::value_type* get_data(Container& c) { + return c.data(); +} + +#ifdef _SECURE_SCL +// Make a checked iterator to avoid MSVC warnings. +template using checked_ptr = stdext::checked_array_iterator; +template checked_ptr make_checked(T* p, std::size_t size) { + return {p, size}; +} +#else +template using checked_ptr = T*; +template inline T* make_checked(T* p, std::size_t) { return p; } +#endif + +template ::value)> +inline checked_ptr reserve( + std::back_insert_iterator& it, std::size_t n) { + Container& c = get_container(it); + std::size_t size = c.size(); + c.resize(size + n); + return make_checked(get_data(c) + size, n); +} + +template +inline Iterator& reserve(Iterator& it, std::size_t) { + return it; +} + +// An output iterator that counts the number of objects written to it and +// discards them. +class counting_iterator { + private: + std::size_t count_; + + public: + using iterator_category = std::output_iterator_tag; + using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = void; + using _Unchecked_type = counting_iterator; // Mark iterator as checked. + + struct value_type { + template void operator=(const T&) {} + }; + + counting_iterator() : count_(0) {} + + std::size_t count() const { return count_; } + + counting_iterator& operator++() { + ++count_; + return *this; + } + + counting_iterator operator++(int) { + auto it = *this; + ++*this; + return it; + } + + value_type operator*() const { return {}; } +}; + +template class truncating_iterator_base { + protected: + OutputIt out_; + std::size_t limit_; + std::size_t count_; + + truncating_iterator_base(OutputIt out, std::size_t limit) + : out_(out), limit_(limit), count_(0) {} + + public: + using iterator_category = std::output_iterator_tag; + using difference_type = void; + using pointer = void; + using reference = void; + using _Unchecked_type = + truncating_iterator_base; // Mark iterator as checked. + + OutputIt base() const { return out_; } + std::size_t count() const { return count_; } +}; + +// An output iterator that truncates the output and counts the number of objects +// written to it. +template ::value_type>::type> +class truncating_iterator; + +template +class truncating_iterator + : public truncating_iterator_base { + using traits = std::iterator_traits; + + mutable typename traits::value_type blackhole_; + + public: + using value_type = typename traits::value_type; + + truncating_iterator(OutputIt out, std::size_t limit) + : truncating_iterator_base(out, limit) {} + + truncating_iterator& operator++() { + if (this->count_++ < this->limit_) ++this->out_; + return *this; + } + + truncating_iterator operator++(int) { + auto it = *this; + ++*this; + return it; + } + + value_type& operator*() const { + return this->count_ < this->limit_ ? *this->out_ : blackhole_; + } +}; + +template +class truncating_iterator + : public truncating_iterator_base { + public: + using value_type = typename OutputIt::container_type::value_type; + + truncating_iterator(OutputIt out, std::size_t limit) + : truncating_iterator_base(out, limit) {} + + truncating_iterator& operator=(value_type val) { + if (this->count_++ < this->limit_) this->out_ = val; + return *this; + } + + truncating_iterator& operator++() { return *this; } + truncating_iterator& operator++(int) { return *this; } + truncating_iterator& operator*() { return *this; } +}; + +// A range with the specified output iterator and value type. +template +class output_range { + private: + OutputIt it_; + + public: + using value_type = T; + using iterator = OutputIt; + struct sentinel {}; + + explicit output_range(OutputIt it) : it_(it) {} + OutputIt begin() const { return it_; } + sentinel end() const { return {}; } // Sentinel is not used yet. +}; + +template +inline size_t count_code_points(basic_string_view s) { + return s.size(); +} + +// Counts the number of code points in a UTF-8 string. +inline size_t count_code_points(basic_string_view s) { + const char8_t* data = s.data(); + size_t num_code_points = 0; + for (size_t i = 0, size = s.size(); i != size; ++i) { + if ((data[i] & 0xc0) != 0x80) ++num_code_points; + } + return num_code_points; +} + +template +inline size_t code_point_index(basic_string_view s, size_t n) { + size_t size = s.size(); + return n < size ? n : size; +} + +// Calculates the index of the nth code point in a UTF-8 string. +inline size_t code_point_index(basic_string_view s, size_t n) { + const char8_t* data = s.data(); + size_t num_code_points = 0; + for (size_t i = 0, size = s.size(); i != size; ++i) { + if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) { + return i; + } + } + return s.size(); +} + +inline char8_t to_char8_t(char c) { return static_cast(c); } + +template +using needs_conversion = bool_constant< + std::is_same::value_type, + char>::value && + std::is_same::value>; + +template ::value)> +OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { + return std::copy(begin, end, it); +} + +template ::value)> +OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) { + return std::transform(begin, end, it, to_char8_t); +} + +#ifndef FMT_USE_GRISU +# define FMT_USE_GRISU 1 +#endif + +template constexpr bool use_grisu() { + return FMT_USE_GRISU && std::numeric_limits::is_iec559 && + sizeof(T) <= sizeof(double); +} + +template +template +void buffer::append(const U* begin, const U* end) { + std::size_t new_size = size_ + to_unsigned(end - begin); + reserve(new_size); + std::uninitialized_copy(begin, end, make_checked(ptr_, capacity_) + size_); + size_ = new_size; +} +} // namespace internal + +// A range with an iterator appending to a buffer. +template +class buffer_range : public internal::output_range< + std::back_insert_iterator>, T> { + public: + using iterator = std::back_insert_iterator>; + using internal::output_range::output_range; + buffer_range(internal::buffer& buf) + : internal::output_range(std::back_inserter(buf)) {} +}; + +// A UTF-8 string view. +class u8string_view : public basic_string_view { + public: + u8string_view(const char* s) + : basic_string_view(reinterpret_cast(s)) {} + u8string_view(const char* s, size_t count) FMT_NOEXCEPT + : basic_string_view(reinterpret_cast(s), count) { + } +}; + +#if FMT_USE_USER_DEFINED_LITERALS +inline namespace literals { +inline u8string_view operator"" _u(const char* s, std::size_t n) { + return {s, n}; +} +} // namespace literals +#endif + +// The number of characters to store in the basic_memory_buffer object itself +// to avoid dynamic memory allocation. +enum { inline_buffer_size = 500 }; + +/** + \rst + A dynamically growing memory buffer for trivially copyable/constructible types + with the first ``SIZE`` elements stored in the object itself. + + You can use one of the following type aliases for common character types: + + +----------------+------------------------------+ + | Type | Definition | + +================+==============================+ + | memory_buffer | basic_memory_buffer | + +----------------+------------------------------+ + | wmemory_buffer | basic_memory_buffer | + +----------------+------------------------------+ + + **Example**:: + + fmt::memory_buffer out; + format_to(out, "The answer is {}.", 42); + + This will append the following output to the ``out`` object: + + .. code-block:: none + + The answer is 42. + + The output can be converted to an ``std::string`` with ``to_string(out)``. + \endrst + */ +template > +class basic_memory_buffer : private Allocator, public internal::buffer { + private: + T store_[SIZE]; + + // Deallocate memory allocated by the buffer. + void deallocate() { + T* data = this->data(); + if (data != store_) Allocator::deallocate(data, this->capacity()); + } + + protected: + void grow(std::size_t size) FMT_OVERRIDE; + + public: + using value_type = T; + using const_reference = const T&; + + explicit basic_memory_buffer(const Allocator& alloc = Allocator()) + : Allocator(alloc) { + this->set(store_, SIZE); + } + ~basic_memory_buffer() FMT_OVERRIDE { deallocate(); } + + private: + // Move data from other to this buffer. + void move(basic_memory_buffer& other) { + Allocator &this_alloc = *this, &other_alloc = other; + this_alloc = std::move(other_alloc); + T* data = other.data(); + std::size_t size = other.size(), capacity = other.capacity(); + if (data == other.store_) { + this->set(store_, capacity); + std::uninitialized_copy(other.store_, other.store_ + size, + internal::make_checked(store_, capacity)); + } else { + this->set(data, capacity); + // Set pointer to the inline array so that delete is not called + // when deallocating. + other.set(other.store_, 0); + } + this->resize(size); + } + + public: + /** + \rst + Constructs a :class:`fmt::basic_memory_buffer` object moving the content + of the other object to it. + \endrst + */ + basic_memory_buffer(basic_memory_buffer&& other) FMT_NOEXCEPT { move(other); } + + /** + \rst + Moves the content of the other ``basic_memory_buffer`` object to this one. + \endrst + */ + basic_memory_buffer& operator=(basic_memory_buffer&& other) FMT_NOEXCEPT { + FMT_ASSERT(this != &other, ""); + deallocate(); + move(other); + return *this; + } + + // Returns a copy of the allocator associated with this buffer. + Allocator get_allocator() const { return *this; } +}; + +template +void basic_memory_buffer::grow(std::size_t size) { +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (size > 1000) throw std::runtime_error("fuzz mode - won't grow that much"); +#endif + std::size_t old_capacity = this->capacity(); + std::size_t new_capacity = old_capacity + old_capacity / 2; + if (size > new_capacity) new_capacity = size; + T* old_data = this->data(); + T* new_data = std::allocator_traits::allocate(*this, new_capacity); + // The following code doesn't throw, so the raw pointer above doesn't leak. + std::uninitialized_copy(old_data, old_data + this->size(), + internal::make_checked(new_data, new_capacity)); + this->set(new_data, new_capacity); + // deallocate must not throw according to the standard, but even if it does, + // the buffer already uses the new storage and will deallocate it in + // destructor. + if (old_data != store_) Allocator::deallocate(old_data, old_capacity); +} + +using memory_buffer = basic_memory_buffer; +using wmemory_buffer = basic_memory_buffer; + +namespace internal { + +// Returns true if value is negative, false otherwise. +// Same as `value < 0` but doesn't produce warnings if T is an unsigned type. +template ::is_signed)> +FMT_CONSTEXPR bool is_negative(T value) { + return value < 0; +} +template ::is_signed)> +FMT_CONSTEXPR bool is_negative(T) { + return false; +} + +// Smallest of uint32_t, uint64_t, uint128_t that is large enough to +// represent all values of T. +template +using uint32_or_64_or_128_t = conditional_t< + std::numeric_limits::digits <= 32, uint32_t, + conditional_t::digits <= 64, uint64_t, uint128_t>>; + +// Static data is placed in this class template for the header-only config. +template struct FMT_EXTERN_TEMPLATE_API basic_data { + static const uint64_t powers_of_10_64[]; + static const uint32_t zero_or_powers_of_10_32[]; + static const uint64_t zero_or_powers_of_10_64[]; + static const uint64_t pow10_significands[]; + static const int16_t pow10_exponents[]; + static const char digits[]; + static const char hex_digits[]; + static const char foreground_color[]; + static const char background_color[]; + static const char reset_color[5]; + static const wchar_t wreset_color[5]; + static const char signs[]; +}; + +FMT_EXTERN template struct basic_data; + +// This is a struct rather than an alias to avoid shadowing warnings in gcc. +struct data : basic_data<> {}; + +#ifdef FMT_BUILTIN_CLZLL +// Returns the number of decimal digits in n. Leading zeros are not counted +// except for n == 0 in which case count_digits returns 1. +inline int count_digits(uint64_t n) { + // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 + // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits. + int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12; + return t - (n < data::zero_or_powers_of_10_64[t]) + 1; +} +#else +// Fallback version of count_digits used when __builtin_clz is not available. +inline int count_digits(uint64_t n) { + int count = 1; + for (;;) { + // Integer division is slow so do it for a group of four digits instead + // of for every digit. The idea comes from the talk by Alexandrescu + // "Three Optimization Tips for C++". See speed-test for a comparison. + if (n < 10) return count; + if (n < 100) return count + 1; + if (n < 1000) return count + 2; + if (n < 10000) return count + 3; + n /= 10000u; + count += 4; + } +} +#endif + +#if FMT_USE_INT128 +inline int count_digits(uint128_t n) { + int count = 1; + for (;;) { + // Integer division is slow so do it for a group of four digits instead + // of for every digit. The idea comes from the talk by Alexandrescu + // "Three Optimization Tips for C++". See speed-test for a comparison. + if (n < 10) return count; + if (n < 100) return count + 1; + if (n < 1000) return count + 2; + if (n < 10000) return count + 3; + n /= 10000U; + count += 4; + } +} +#endif + +// Counts the number of digits in n. BITS = log2(radix). +template inline int count_digits(UInt n) { + int num_digits = 0; + do { + ++num_digits; + } while ((n >>= BITS) != 0); + return num_digits; +} + +template <> int count_digits<4>(internal::fallback_uintptr n); + +#if FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_ALWAYS_INLINE inline __attribute__((always_inline)) +#else +# define FMT_ALWAYS_INLINE +#endif + +#ifdef FMT_BUILTIN_CLZ +// Optional version of count_digits for better performance on 32-bit platforms. +inline int count_digits(uint32_t n) { + int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12; + return t - (n < data::zero_or_powers_of_10_32[t]) + 1; +} +#endif + +template FMT_API std::string grouping_impl(locale_ref loc); +template inline std::string grouping(locale_ref loc) { + return grouping_impl(loc); +} +template <> inline std::string grouping(locale_ref loc) { + return grouping_impl(loc); +} + +template FMT_API Char thousands_sep_impl(locale_ref loc); +template inline Char thousands_sep(locale_ref loc) { + return Char(thousands_sep_impl(loc)); +} +template <> inline wchar_t thousands_sep(locale_ref loc) { + return thousands_sep_impl(loc); +} + +template FMT_API Char decimal_point_impl(locale_ref loc); +template inline Char decimal_point(locale_ref loc) { + return Char(decimal_point_impl(loc)); +} +template <> inline wchar_t decimal_point(locale_ref loc) { + return decimal_point_impl(loc); +} + +// Formats a decimal unsigned integer value writing into buffer. +// add_thousands_sep is called after writing each char to add a thousands +// separator if necessary. +template +inline Char* format_decimal(Char* buffer, UInt value, int num_digits, + F add_thousands_sep) { + FMT_ASSERT(num_digits >= 0, "invalid digit count"); + buffer += num_digits; + Char* end = buffer; + while (value >= 100) { + // Integer division is slow so do it for a group of two digits instead + // of for every digit. The idea comes from the talk by Alexandrescu + // "Three Optimization Tips for C++". See speed-test for a comparison. + auto index = static_cast((value % 100) * 2); + value /= 100; + *--buffer = static_cast(data::digits[index + 1]); + add_thousands_sep(buffer); + *--buffer = static_cast(data::digits[index]); + add_thousands_sep(buffer); + } + if (value < 10) { + *--buffer = static_cast('0' + value); + return end; + } + auto index = static_cast(value * 2); + *--buffer = static_cast(data::digits[index + 1]); + add_thousands_sep(buffer); + *--buffer = static_cast(data::digits[index]); + return end; +} + +template constexpr int digits10() noexcept { + return std::numeric_limits::digits10; +} +template <> constexpr int digits10() noexcept { return 38; } +template <> constexpr int digits10() noexcept { return 38; } + +template +inline Iterator format_decimal(Iterator out, UInt value, int num_digits, + F add_thousands_sep) { + FMT_ASSERT(num_digits >= 0, "invalid digit count"); + // Buffer should be large enough to hold all digits (<= digits10 + 1). + enum { max_size = digits10() + 1 }; + Char buffer[2 * max_size]; + auto end = format_decimal(buffer, value, num_digits, add_thousands_sep); + return internal::copy_str(buffer, end, out); +} + +template +inline It format_decimal(It out, UInt value, int num_digits) { + return format_decimal(out, value, num_digits, [](Char*) {}); +} + +template +inline Char* format_uint(Char* buffer, UInt value, int num_digits, + bool upper = false) { + buffer += num_digits; + Char* end = buffer; + do { + const char* digits = upper ? "0123456789ABCDEF" : data::hex_digits; + unsigned digit = (value & ((1 << BASE_BITS) - 1)); + *--buffer = static_cast(BASE_BITS < 4 ? static_cast('0' + digit) + : digits[digit]); + } while ((value >>= BASE_BITS) != 0); + return end; +} + +template +Char* format_uint(Char* buffer, internal::fallback_uintptr n, int num_digits, + bool = false) { + auto char_digits = std::numeric_limits::digits / 4; + int start = (num_digits + char_digits - 1) / char_digits - 1; + if (int start_digits = num_digits % char_digits) { + unsigned value = n.value[start--]; + buffer = format_uint(buffer, value, start_digits); + } + for (; start >= 0; --start) { + unsigned value = n.value[start]; + buffer += char_digits; + auto p = buffer; + for (int i = 0; i < char_digits; ++i) { + unsigned digit = (value & ((1 << BASE_BITS) - 1)); + *--p = static_cast(data::hex_digits[digit]); + value >>= BASE_BITS; + } + } + return buffer; +} + +template +inline It format_uint(It out, UInt value, int num_digits, bool upper = false) { + // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). + char buffer[num_bits() / BASE_BITS + 1]; + format_uint(buffer, value, num_digits, upper); + return internal::copy_str(buffer, buffer + num_digits, out); +} + +template struct null {}; + +// Workaround an array initialization issue in gcc 4.8. +template struct fill_t { + private: + Char data_[6]; + + public: + FMT_CONSTEXPR Char& operator[](size_t index) { return data_[index]; } + FMT_CONSTEXPR const Char& operator[](size_t index) const { + return data_[index]; + } + + static FMT_CONSTEXPR fill_t make() { + auto fill = fill_t(); + fill[0] = Char(' '); + return fill; + } +}; +} // namespace internal + +// We cannot use enum classes as bit fields because of a gcc bug +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414. +namespace align { +enum type { none, left, right, center, numeric }; +} +using align_t = align::type; + +namespace sign { +enum type { none, minus, plus, space }; +} +using sign_t = sign::type; + +// Format specifiers for built-in and string types. +template struct basic_format_specs { + int width; + int precision; + char type; + align_t align : 4; + sign_t sign : 3; + bool alt : 1; // Alternate form ('#'). + internal::fill_t fill; + + constexpr basic_format_specs() + : width(0), + precision(-1), + type(0), + align(align::none), + sign(sign::none), + alt(false), + fill(internal::fill_t::make()) {} +}; + +using format_specs = basic_format_specs; + +namespace internal { + +// A floating-point presentation format. +enum class float_format : unsigned char { + general, // General: exponent notation or fixed point based on magnitude. + exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. + fixed, // Fixed point with the default precision of 6, e.g. 0.0012. + hex +}; + +struct float_specs { + int precision; + float_format format : 8; + sign_t sign : 8; + bool upper : 1; + bool locale : 1; + bool percent : 1; + bool binary32 : 1; + bool use_grisu : 1; + bool trailing_zeros : 1; +}; + +// Writes the exponent exp in the form "[+-]d{2,3}" to buffer. +template It write_exponent(int exp, It it) { + FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range"); + if (exp < 0) { + *it++ = static_cast('-'); + exp = -exp; + } else { + *it++ = static_cast('+'); + } + if (exp >= 100) { + const char* top = data::digits + (exp / 100) * 2; + if (exp >= 1000) *it++ = static_cast(top[0]); + *it++ = static_cast(top[1]); + exp %= 100; + } + const char* d = data::digits + exp * 2; + *it++ = static_cast(d[0]); + *it++ = static_cast(d[1]); + return it; +} + +template class float_writer { + private: + // The number is given as v = digits_ * pow(10, exp_). + const char* digits_; + int num_digits_; + int exp_; + size_t size_; + float_specs specs_; + Char decimal_point_; + + template It prettify(It it) const { + // pow(10, full_exp - 1) <= v <= pow(10, full_exp). + int full_exp = num_digits_ + exp_; + if (specs_.format == float_format::exp) { + // Insert a decimal point after the first digit and add an exponent. + *it++ = static_cast(*digits_); + int num_zeros = specs_.precision - num_digits_; + bool trailing_zeros = num_zeros > 0 && specs_.trailing_zeros; + if (num_digits_ > 1 || trailing_zeros) *it++ = decimal_point_; + it = copy_str(digits_ + 1, digits_ + num_digits_, it); + if (trailing_zeros) + it = std::fill_n(it, num_zeros, static_cast('0')); + *it++ = static_cast(specs_.upper ? 'E' : 'e'); + return write_exponent(full_exp - 1, it); + } + if (num_digits_ <= full_exp) { + // 1234e7 -> 12340000000[.0+] + it = copy_str(digits_, digits_ + num_digits_, it); + it = std::fill_n(it, full_exp - num_digits_, static_cast('0')); + if (specs_.trailing_zeros) { + *it++ = decimal_point_; + int num_zeros = specs_.precision - full_exp; + if (num_zeros <= 0) { + if (specs_.format != float_format::fixed) + *it++ = static_cast('0'); + return it; + } +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (num_zeros > 1000) + throw std::runtime_error("fuzz mode - avoiding excessive cpu use"); +#endif + it = std::fill_n(it, num_zeros, static_cast('0')); + } + } else if (full_exp > 0) { + // 1234e-2 -> 12.34[0+] + it = copy_str(digits_, digits_ + full_exp, it); + if (!specs_.trailing_zeros) { + // Remove trailing zeros. + int num_digits = num_digits_; + while (num_digits > full_exp && digits_[num_digits - 1] == '0') + --num_digits; + if (num_digits != full_exp) *it++ = decimal_point_; + return copy_str(digits_ + full_exp, digits_ + num_digits, it); + } + *it++ = decimal_point_; + it = copy_str(digits_ + full_exp, digits_ + num_digits_, it); + if (specs_.precision > num_digits_) { + // Add trailing zeros. + int num_zeros = specs_.precision - num_digits_; + it = std::fill_n(it, num_zeros, static_cast('0')); + } + } else { + // 1234e-6 -> 0.001234 + *it++ = static_cast('0'); + int num_zeros = -full_exp; + if (specs_.precision >= 0 && specs_.precision < num_zeros) + num_zeros = specs_.precision; + int num_digits = num_digits_; + if (!specs_.trailing_zeros) + while (num_digits > 0 && digits_[num_digits - 1] == '0') --num_digits; + if (num_zeros != 0 || num_digits != 0) { + *it++ = decimal_point_; + it = std::fill_n(it, num_zeros, static_cast('0')); + it = copy_str(digits_, digits_ + num_digits, it); + } + } + return it; + } + + public: + float_writer(const char* digits, int num_digits, int exp, float_specs specs, + Char decimal_point) + : digits_(digits), + num_digits_(num_digits), + exp_(exp), + specs_(specs), + decimal_point_(decimal_point) { + int full_exp = num_digits + exp - 1; + int precision = specs.precision > 0 ? specs.precision : 16; + if (specs_.format == float_format::general && + !(full_exp >= -4 && full_exp < precision)) { + specs_.format = float_format::exp; + } + size_ = prettify(counting_iterator()).count(); + size_ += specs.sign ? 1 : 0; + } + + size_t size() const { return size_; } + size_t width() const { return size(); } + + template void operator()(It&& it) { + if (specs_.sign) *it++ = static_cast(data::signs[specs_.sign]); + it = prettify(it); + } +}; + +template +int format_float(T value, int precision, float_specs specs, buffer& buf); + +// Formats a floating-point number with snprintf. +template +int snprintf_float(T value, int precision, float_specs specs, + buffer& buf); + +template T promote_float(T value) { return value; } +inline double promote_float(float value) { return value; } + +template +FMT_CONSTEXPR void handle_int_type_spec(char spec, Handler&& handler) { + switch (spec) { + case 0: + case 'd': + handler.on_dec(); + break; + case 'x': + case 'X': + handler.on_hex(); + break; + case 'b': + case 'B': + handler.on_bin(); + break; + case 'o': + handler.on_oct(); + break; + case 'n': + handler.on_num(); + break; + default: + handler.on_error(); + } +} + +template +FMT_CONSTEXPR float_specs parse_float_type_spec( + const basic_format_specs& specs, ErrorHandler&& eh = {}) { + auto result = float_specs(); + result.trailing_zeros = specs.alt; + switch (specs.type) { + case 0: + result.format = float_format::general; + result.trailing_zeros |= specs.precision != 0; + break; + case 'G': + result.upper = true; + FMT_FALLTHROUGH; + case 'g': + result.format = float_format::general; + break; + case 'E': + result.upper = true; + FMT_FALLTHROUGH; + case 'e': + result.format = float_format::exp; + result.trailing_zeros |= specs.precision != 0; + break; + case 'F': + result.upper = true; + FMT_FALLTHROUGH; + case 'f': + result.format = float_format::fixed; + result.trailing_zeros |= specs.precision != 0; + break; +#if FMT_DEPRECATED_PERCENT + case '%': + result.format = float_format::fixed; + result.percent = true; + break; +#endif + case 'A': + result.upper = true; + FMT_FALLTHROUGH; + case 'a': + result.format = float_format::hex; + break; + case 'n': + result.locale = true; + break; + default: + eh.on_error("invalid type specifier"); + break; + } + return result; +} + +template +FMT_CONSTEXPR void handle_char_specs(const basic_format_specs* specs, + Handler&& handler) { + if (!specs) return handler.on_char(); + if (specs->type && specs->type != 'c') return handler.on_int(); + if (specs->align == align::numeric || specs->sign != sign::none || specs->alt) + handler.on_error("invalid format specifier for char"); + handler.on_char(); +} + +template +FMT_CONSTEXPR void handle_cstring_type_spec(Char spec, Handler&& handler) { + if (spec == 0 || spec == 's') + handler.on_string(); + else if (spec == 'p') + handler.on_pointer(); + else + handler.on_error("invalid type specifier"); +} + +template +FMT_CONSTEXPR void check_string_type_spec(Char spec, ErrorHandler&& eh) { + if (spec != 0 && spec != 's') eh.on_error("invalid type specifier"); +} + +template +FMT_CONSTEXPR void check_pointer_type_spec(Char spec, ErrorHandler&& eh) { + if (spec != 0 && spec != 'p') eh.on_error("invalid type specifier"); +} + +template class int_type_checker : private ErrorHandler { + public: + FMT_CONSTEXPR explicit int_type_checker(ErrorHandler eh) : ErrorHandler(eh) {} + + FMT_CONSTEXPR void on_dec() {} + FMT_CONSTEXPR void on_hex() {} + FMT_CONSTEXPR void on_bin() {} + FMT_CONSTEXPR void on_oct() {} + FMT_CONSTEXPR void on_num() {} + + FMT_CONSTEXPR void on_error() { + ErrorHandler::on_error("invalid type specifier"); + } +}; + +template +class char_specs_checker : public ErrorHandler { + private: + char type_; + + public: + FMT_CONSTEXPR char_specs_checker(char type, ErrorHandler eh) + : ErrorHandler(eh), type_(type) {} + + FMT_CONSTEXPR void on_int() { + handle_int_type_spec(type_, int_type_checker(*this)); + } + FMT_CONSTEXPR void on_char() {} +}; + +template +class cstring_type_checker : public ErrorHandler { + public: + FMT_CONSTEXPR explicit cstring_type_checker(ErrorHandler eh) + : ErrorHandler(eh) {} + + FMT_CONSTEXPR void on_string() {} + FMT_CONSTEXPR void on_pointer() {} +}; + +template +void arg_map::init(const basic_format_args& args) { + if (map_) return; + map_ = new entry[internal::to_unsigned(args.max_size())]; + if (args.is_packed()) { + for (int i = 0;; ++i) { + internal::type arg_type = args.type(i); + if (arg_type == internal::none_type) return; + if (arg_type == internal::named_arg_type) push_back(args.values_[i]); + } + } + for (int i = 0, n = args.max_size(); i < n; ++i) { + auto type = args.args_[i].type_; + if (type == internal::named_arg_type) push_back(args.args_[i].value_); + } +} + +template struct nonfinite_writer { + sign_t sign; + const char* str; + static constexpr size_t str_size = 3; + + size_t size() const { return str_size + (sign ? 1 : 0); } + size_t width() const { return size(); } + + template void operator()(It&& it) const { + if (sign) *it++ = static_cast(data::signs[sign]); + it = copy_str(str, str + str_size, it); + } +}; + +// This template provides operations for formatting and writing data into a +// character range. +template class basic_writer { + public: + using char_type = typename Range::value_type; + using iterator = typename Range::iterator; + using format_specs = basic_format_specs; + + private: + iterator out_; // Output iterator. + locale_ref locale_; + + // Attempts to reserve space for n extra characters in the output range. + // Returns a pointer to the reserved range or a reference to out_. + auto reserve(std::size_t n) -> decltype(internal::reserve(out_, n)) { + return internal::reserve(out_, n); + } + + template struct padded_int_writer { + size_t size_; + string_view prefix; + char_type fill; + std::size_t padding; + F f; + + size_t size() const { return size_; } + size_t width() const { return size_; } + + template void operator()(It&& it) const { + if (prefix.size() != 0) + it = copy_str(prefix.begin(), prefix.end(), it); + it = std::fill_n(it, padding, fill); + f(it); + } + }; + + // Writes an integer in the format + // + // where are written by f(it). + template + void write_int(int num_digits, string_view prefix, format_specs specs, F f) { + std::size_t size = prefix.size() + to_unsigned(num_digits); + char_type fill = specs.fill[0]; + std::size_t padding = 0; + if (specs.align == align::numeric) { + auto unsiged_width = to_unsigned(specs.width); + if (unsiged_width > size) { + padding = unsiged_width - size; + size = unsiged_width; + } + } else if (specs.precision > num_digits) { + size = prefix.size() + to_unsigned(specs.precision); + padding = to_unsigned(specs.precision - num_digits); + fill = static_cast('0'); + } + if (specs.align == align::none) specs.align = align::right; + write_padded(specs, padded_int_writer{size, prefix, fill, padding, f}); + } + + // Writes a decimal integer. + template void write_decimal(Int value) { + auto abs_value = static_cast>(value); + bool negative = is_negative(value); + // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. + if (negative) abs_value = ~abs_value + 1; + int num_digits = count_digits(abs_value); + auto&& it = reserve((negative ? 1 : 0) + static_cast(num_digits)); + if (negative) *it++ = static_cast('-'); + it = format_decimal(it, abs_value, num_digits); + } + + // The handle_int_type_spec handler that writes an integer. + template struct int_writer { + using unsigned_type = uint32_or_64_or_128_t; + + basic_writer& writer; + const Specs& specs; + unsigned_type abs_value; + char prefix[4]; + unsigned prefix_size; + + string_view get_prefix() const { return string_view(prefix, prefix_size); } + + int_writer(basic_writer& w, Int value, const Specs& s) + : writer(w), + specs(s), + abs_value(static_cast(value)), + prefix_size(0) { + if (is_negative(value)) { + prefix[0] = '-'; + ++prefix_size; + abs_value = 0 - abs_value; + } else if (specs.sign != sign::none && specs.sign != sign::minus) { + prefix[0] = specs.sign == sign::plus ? '+' : ' '; + ++prefix_size; + } + } + + struct dec_writer { + unsigned_type abs_value; + int num_digits; + + template void operator()(It&& it) const { + it = internal::format_decimal(it, abs_value, num_digits); + } + }; + + void on_dec() { + int num_digits = count_digits(abs_value); + writer.write_int(num_digits, get_prefix(), specs, + dec_writer{abs_value, num_digits}); + } + + struct hex_writer { + int_writer& self; + int num_digits; + + template void operator()(It&& it) const { + it = format_uint<4, char_type>(it, self.abs_value, num_digits, + self.specs.type != 'x'); + } + }; + + void on_hex() { + if (specs.alt) { + prefix[prefix_size++] = '0'; + prefix[prefix_size++] = specs.type; + } + int num_digits = count_digits<4>(abs_value); + writer.write_int(num_digits, get_prefix(), specs, + hex_writer{*this, num_digits}); + } + + template struct bin_writer { + unsigned_type abs_value; + int num_digits; + + template void operator()(It&& it) const { + it = format_uint(it, abs_value, num_digits); + } + }; + + void on_bin() { + if (specs.alt) { + prefix[prefix_size++] = '0'; + prefix[prefix_size++] = static_cast(specs.type); + } + int num_digits = count_digits<1>(abs_value); + writer.write_int(num_digits, get_prefix(), specs, + bin_writer<1>{abs_value, num_digits}); + } + + void on_oct() { + int num_digits = count_digits<3>(abs_value); + if (specs.alt && specs.precision <= num_digits && abs_value != 0) { + // Octal prefix '0' is counted as a digit, so only add it if precision + // is not greater than the number of digits. + prefix[prefix_size++] = '0'; + } + writer.write_int(num_digits, get_prefix(), specs, + bin_writer<3>{abs_value, num_digits}); + } + + enum { sep_size = 1 }; + + struct num_writer { + unsigned_type abs_value; + int size; + const std::string& groups; + char_type sep; + + template void operator()(It&& it) const { + basic_string_view s(&sep, sep_size); + // Index of a decimal digit with the least significant digit having + // index 0. + int digit_index = 0; + std::string::const_iterator group = groups.cbegin(); + it = format_decimal( + it, abs_value, size, + [this, s, &group, &digit_index](char_type*& buffer) { + if (*group <= 0 || ++digit_index % *group != 0 || + *group == max_value()) + return; + if (group + 1 != groups.cend()) { + digit_index = 0; + ++group; + } + buffer -= s.size(); + std::uninitialized_copy(s.data(), s.data() + s.size(), + make_checked(buffer, s.size())); + }); + } + }; + + void on_num() { + std::string groups = grouping(writer.locale_); + if (groups.empty()) return on_dec(); + auto sep = thousands_sep(writer.locale_); + if (!sep) return on_dec(); + int num_digits = count_digits(abs_value); + int size = num_digits; + std::string::const_iterator group = groups.cbegin(); + while (group != groups.cend() && num_digits > *group && *group > 0 && + *group != max_value()) { + size += sep_size; + num_digits -= *group; + ++group; + } + if (group == groups.cend()) + size += sep_size * ((num_digits - 1) / groups.back()); + writer.write_int(size, get_prefix(), specs, + num_writer{abs_value, size, groups, sep}); + } + + FMT_NORETURN void on_error() { + FMT_THROW(duckdb::Exception("invalid type specifier")); + } + }; + + template struct str_writer { + const Char* s; + size_t size_; + + size_t size() const { return size_; } + size_t width() const { + return count_code_points(basic_string_view(s, size_)); + } + + template void operator()(It&& it) const { + it = copy_str(s, s + size_, it); + } + }; + + template struct pointer_writer { + UIntPtr value; + int num_digits; + + size_t size() const { return to_unsigned(num_digits) + 2; } + size_t width() const { return size(); } + + template void operator()(It&& it) const { + *it++ = static_cast('0'); + *it++ = static_cast('x'); + it = format_uint<4, char_type>(it, value, num_digits); + } + }; + + public: + explicit basic_writer(Range out, locale_ref loc = locale_ref()) + : out_(out.begin()), locale_(loc) {} + + iterator out() const { return out_; } + + // Writes a value in the format + // + // where is written by f(it). + template void write_padded(const format_specs& specs, F&& f) { + // User-perceived width (in code points). + unsigned width = to_unsigned(specs.width); + size_t size = f.size(); // The number of code units. + size_t num_code_points = width != 0 ? f.width() : size; + if (width <= num_code_points) return f(reserve(size)); + auto&& it = reserve(width + (size - num_code_points)); + char_type fill = specs.fill[0]; + std::size_t padding = width - num_code_points; + if (specs.align == align::right) { + it = std::fill_n(it, padding, fill); + f(it); + } else if (specs.align == align::center) { + std::size_t left_padding = padding / 2; + it = std::fill_n(it, left_padding, fill); + f(it); + it = std::fill_n(it, padding - left_padding, fill); + } else { + f(it); + it = std::fill_n(it, padding, fill); + } + } + + void write(int value) { write_decimal(value); } + void write(long value) { write_decimal(value); } + void write(long long value) { write_decimal(value); } + + void write(unsigned value) { write_decimal(value); } + void write(unsigned long value) { write_decimal(value); } + void write(unsigned long long value) { write_decimal(value); } + +#if FMT_USE_INT128 + void write(int128_t value) { write_decimal(value); } + void write(uint128_t value) { write_decimal(value); } +#endif + + template + void write_int(T value, const Spec& spec) { + handle_int_type_spec(spec.type, int_writer(*this, value, spec)); + } + + template ::value)> + void write(T value, format_specs specs = {}) { + float_specs fspecs = parse_float_type_spec(specs); + fspecs.sign = specs.sign; + if (std::signbit(value)) { // value < 0 is false for NaN so use signbit. + fspecs.sign = sign::minus; + value = -value; + } else if (fspecs.sign == sign::minus) { + fspecs.sign = sign::none; + } + + if (!std::isfinite(value)) { + auto str = std::isinf(value) ? (fspecs.upper ? "INF" : "inf") + : (fspecs.upper ? "NAN" : "nan"); + return write_padded(specs, nonfinite_writer{fspecs.sign, str}); + } + + if (specs.align == align::none) { + specs.align = align::right; + } else if (specs.align == align::numeric) { + if (fspecs.sign) { + auto&& it = reserve(1); + *it++ = static_cast(data::signs[fspecs.sign]); + fspecs.sign = sign::none; + if (specs.width != 0) --specs.width; + } + specs.align = align::right; + } + + memory_buffer buffer; + if (fspecs.format == float_format::hex) { + if (fspecs.sign) buffer.push_back(data::signs[fspecs.sign]); + snprintf_float(promote_float(value), specs.precision, fspecs, buffer); + write_padded(specs, str_writer{buffer.data(), buffer.size()}); + return; + } + int precision = specs.precision >= 0 || !specs.type ? specs.precision : 6; + if (fspecs.format == float_format::exp) ++precision; + if (const_check(std::is_same())) fspecs.binary32 = true; + fspecs.use_grisu = use_grisu(); + if (const_check(FMT_DEPRECATED_PERCENT) && fspecs.percent) value *= 100; + int exp = format_float(promote_float(value), precision, fspecs, buffer); + if (const_check(FMT_DEPRECATED_PERCENT) && fspecs.percent) { + buffer.push_back('%'); + --exp; // Adjust decimal place position. + } + fspecs.precision = precision; + char_type point = fspecs.locale ? decimal_point(locale_) + : static_cast('.'); + write_padded(specs, float_writer(buffer.data(), + static_cast(buffer.size()), + exp, fspecs, point)); + } + + void write(char value) { + auto&& it = reserve(1); + *it++ = value; + } + + template ::value)> + void write(Char value) { + auto&& it = reserve(1); + *it++ = value; + } + + void write(string_view value) { + auto&& it = reserve(value.size()); + it = copy_str(value.begin(), value.end(), it); + } + void write(wstring_view value) { + static_assert(std::is_same::value, ""); + auto&& it = reserve(value.size()); + it = std::copy(value.begin(), value.end(), it); + } + + template + void write(const Char* s, std::size_t size, const format_specs& specs) { + write_padded(specs, str_writer{s, size}); + } + + template + void write(basic_string_view s, const format_specs& specs = {}) { + const Char* data = s.data(); + std::size_t size = s.size(); + if (specs.precision >= 0 && to_unsigned(specs.precision) < size) + size = code_point_index(s, to_unsigned(specs.precision)); + write(data, size, specs); + } + + template + void write_pointer(UIntPtr value, const format_specs* specs) { + int num_digits = count_digits<4>(value); + auto pw = pointer_writer{value, num_digits}; + if (!specs) return pw(reserve(to_unsigned(num_digits) + 2)); + format_specs specs_copy = *specs; + if (specs_copy.align == align::none) specs_copy.align = align::right; + write_padded(specs_copy, pw); + } +}; + +using writer = basic_writer>; + +template struct is_integral : std::is_integral {}; +template <> struct is_integral : std::true_type {}; +template <> struct is_integral : std::true_type {}; + +template +class arg_formatter_base { + public: + using char_type = typename Range::value_type; + using iterator = typename Range::iterator; + using format_specs = basic_format_specs; + + private: + using writer_type = basic_writer; + writer_type writer_; + format_specs* specs_; + + struct char_writer { + char_type value; + + size_t size() const { return 1; } + size_t width() const { return 1; } + + template void operator()(It&& it) const { *it++ = value; } + }; + + void write_char(char_type value) { + if (specs_) + writer_.write_padded(*specs_, char_writer{value}); + else + writer_.write(value); + } + + void write_pointer(const void* p) { + writer_.write_pointer(internal::to_uintptr(p), specs_); + } + + protected: + writer_type& writer() { return writer_; } + FMT_DEPRECATED format_specs* spec() { return specs_; } + format_specs* specs() { return specs_; } + iterator out() { return writer_.out(); } + + void write(bool value) { + string_view sv(value ? "true" : "false"); + specs_ ? writer_.write(sv, *specs_) : writer_.write(sv); + } + + void write(const char_type* value) { + if (!value) { + FMT_THROW(duckdb::Exception("string pointer is null")); + } else { + auto length = std::char_traits::length(value); + basic_string_view sv(value, length); + specs_ ? writer_.write(sv, *specs_) : writer_.write(sv); + } + } + + public: + arg_formatter_base(Range r, format_specs* s, locale_ref loc) + : writer_(r, loc), specs_(s) {} + + iterator operator()(monostate) { + FMT_ASSERT(false, "invalid argument type"); + return out(); + } + + template ::value)> + iterator operator()(T value) { + if (specs_) + writer_.write_int(value, *specs_); + else + writer_.write(value); + return out(); + } + + iterator operator()(char_type value) { + internal::handle_char_specs( + specs_, char_spec_handler(*this, static_cast(value))); + return out(); + } + + iterator operator()(bool value) { + if (specs_ && specs_->type) return (*this)(value ? 1 : 0); + write(value != 0); + return out(); + } + + template ::value)> + iterator operator()(T value) { + writer_.write(value, specs_ ? *specs_ : format_specs()); + return out(); + } + + struct char_spec_handler : ErrorHandler { + arg_formatter_base& formatter; + char_type value; + + char_spec_handler(arg_formatter_base& f, char_type val) + : formatter(f), value(val) {} + + void on_int() { + if (formatter.specs_) + formatter.writer_.write_int(value, *formatter.specs_); + else + formatter.writer_.write(value); + } + void on_char() { formatter.write_char(value); } + }; + + struct cstring_spec_handler : internal::error_handler { + arg_formatter_base& formatter; + const char_type* value; + + cstring_spec_handler(arg_formatter_base& f, const char_type* val) + : formatter(f), value(val) {} + + void on_string() { formatter.write(value); } + void on_pointer() { formatter.write_pointer(value); } + }; + + iterator operator()(const char_type* value) { + if (!specs_) return write(value), out(); + internal::handle_cstring_type_spec(specs_->type, + cstring_spec_handler(*this, value)); + return out(); + } + + iterator operator()(basic_string_view value) { + if (specs_) { + internal::check_string_type_spec(specs_->type, internal::error_handler()); + writer_.write(value, *specs_); + } else { + writer_.write(value); + } + return out(); + } + + iterator operator()(const void* value) { + if (specs_) + check_pointer_type_spec(specs_->type, internal::error_handler()); + write_pointer(value); + return out(); + } +}; + +template FMT_CONSTEXPR bool is_name_start(Char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; +} + +// Parses the range [begin, end) as an unsigned integer. This function assumes +// that the range is non-empty and the first character is a digit. +template +FMT_CONSTEXPR int parse_nonnegative_int(const Char*& begin, const Char* end, + ErrorHandler&& eh) { + FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); + if (*begin == '0') { + ++begin; + return 0; + } + unsigned value = 0; + // Convert to unsigned to prevent a warning. + constexpr unsigned max_int = max_value(); + unsigned big = max_int / 10; + do { + // Check for overflow. + if (value > big) { + value = max_int + 1; + break; + } + value = value * 10 + unsigned(*begin - '0'); + ++begin; + } while (begin != end && '0' <= *begin && *begin <= '9'); + if (value > max_int) eh.on_error("number is too big"); + return static_cast(value); +} + +template class custom_formatter { + private: + using char_type = typename Context::char_type; + + basic_format_parse_context& parse_ctx_; + Context& ctx_; + + public: + explicit custom_formatter(basic_format_parse_context& parse_ctx, + Context& ctx) + : parse_ctx_(parse_ctx), ctx_(ctx) {} + + bool operator()(typename basic_format_arg::handle h) const { + h.format(parse_ctx_, ctx_); + return true; + } + + template bool operator()(T) const { return false; } +}; + +template +using is_integer = + bool_constant::value && !std::is_same::value && + !std::is_same::value && + !std::is_same::value>; + +template class width_checker { + public: + explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} + + template ::value)> + FMT_CONSTEXPR unsigned long long operator()(T value) { + if (is_negative(value)) handler_.on_error("negative width"); + return static_cast(value); + } + + template ::value)> + FMT_CONSTEXPR unsigned long long operator()(T) { + handler_.on_error("width is not integer"); + return 0; + } + + private: + ErrorHandler& handler_; +}; + +template class precision_checker { + public: + explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} + + template ::value)> + FMT_CONSTEXPR unsigned long long operator()(T value) { + if (is_negative(value)) handler_.on_error("negative precision"); + return static_cast(value); + } + + template ::value)> + FMT_CONSTEXPR unsigned long long operator()(T) { + handler_.on_error("precision is not integer"); + return 0; + } + + private: + ErrorHandler& handler_; +}; + +// A format specifier handler that sets fields in basic_format_specs. +template class specs_setter { + public: + explicit FMT_CONSTEXPR specs_setter(basic_format_specs& specs) + : specs_(specs) {} + + FMT_CONSTEXPR specs_setter(const specs_setter& other) + : specs_(other.specs_) {} + + FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; } + FMT_CONSTEXPR void on_fill(Char fill) { specs_.fill[0] = fill; } + FMT_CONSTEXPR void on_plus() { specs_.sign = sign::plus; } + FMT_CONSTEXPR void on_minus() { specs_.sign = sign::minus; } + FMT_CONSTEXPR void on_space() { specs_.sign = sign::space; } + FMT_CONSTEXPR void on_hash() { specs_.alt = true; } + + FMT_CONSTEXPR void on_zero() { + specs_.align = align::numeric; + specs_.fill[0] = Char('0'); + } + + FMT_CONSTEXPR void on_width(int width) { specs_.width = width; } + FMT_CONSTEXPR void on_precision(int precision) { + specs_.precision = precision; + } + FMT_CONSTEXPR void end_precision() {} + + FMT_CONSTEXPR void on_type(Char type) { + specs_.type = static_cast(type); + } + + protected: + basic_format_specs& specs_; +}; + +template class numeric_specs_checker { + public: + FMT_CONSTEXPR numeric_specs_checker(ErrorHandler& eh, internal::type arg_type) + : error_handler_(eh), arg_type_(arg_type) {} + + FMT_CONSTEXPR void require_numeric_argument() { + if (!is_arithmetic_type(arg_type_)) + error_handler_.on_error("format specifier requires numeric argument"); + } + + FMT_CONSTEXPR void check_sign() { + require_numeric_argument(); + if (is_integral_type(arg_type_) && arg_type_ != int_type && + arg_type_ != long_long_type && arg_type_ != internal::char_type) { + error_handler_.on_error("format specifier requires signed argument"); + } + } + + FMT_CONSTEXPR void check_precision() { + if (is_integral_type(arg_type_) || arg_type_ == internal::pointer_type) + error_handler_.on_error("precision not allowed for this argument type"); + } + + private: + ErrorHandler& error_handler_; + internal::type arg_type_; +}; + +// A format specifier handler that checks if specifiers are consistent with the +// argument type. +template class specs_checker : public Handler { + public: + FMT_CONSTEXPR specs_checker(const Handler& handler, internal::type arg_type) + : Handler(handler), checker_(*this, arg_type) {} + + FMT_CONSTEXPR specs_checker(const specs_checker& other) + : Handler(other), checker_(*this, other.arg_type_) {} + + FMT_CONSTEXPR void on_align(align_t align) { + if (align == align::numeric) checker_.require_numeric_argument(); + Handler::on_align(align); + } + + FMT_CONSTEXPR void on_plus() { + checker_.check_sign(); + Handler::on_plus(); + } + + FMT_CONSTEXPR void on_minus() { + checker_.check_sign(); + Handler::on_minus(); + } + + FMT_CONSTEXPR void on_space() { + checker_.check_sign(); + Handler::on_space(); + } + + FMT_CONSTEXPR void on_hash() { + checker_.require_numeric_argument(); + Handler::on_hash(); + } + + FMT_CONSTEXPR void on_zero() { + checker_.require_numeric_argument(); + Handler::on_zero(); + } + + FMT_CONSTEXPR void end_precision() { checker_.check_precision(); } + + private: + numeric_specs_checker checker_; +}; + +template