diff --git a/.clang-tidy b/.clang-tidy index 13c1b116eadb..dc1cebe94304 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -96,7 +96,6 @@ Checks: [ '-modernize-use-default-member-init', '-modernize-use-emplace', '-modernize-use-nodiscard', - '-modernize-use-override', '-modernize-use-trailing-return-type', '-performance-inefficient-string-concatenation', @@ -120,7 +119,6 @@ Checks: [ '-readability-named-parameter', '-readability-redundant-declaration', '-readability-simplify-boolean-expr', - '-readability-static-accessed-through-instance', '-readability-suspicious-call-argument', '-readability-uppercase-literal-suffix', '-readability-use-anyofallof', diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 816bdfd4f31a..64372a90613e 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -23,6 +23,10 @@ jobs: clear-repository: true # to ensure correct digests fetch-depth: 0 # to get version filter: tree:0 + - name: Check sync PR + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 sync_pr.py || : - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index ff0adee14433..74ce8452de8b 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -157,7 +157,7 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !failure() && !cancelled() && github.event_name != 'merge_group' }} needs: [Tests_1, Tests_2] runs-on: [self-hosted, style-checker] steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index 84e51c1efdfe..dd88f3ee2c79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -123,7 +123,6 @@ * Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)). * An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)). * Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Don't allow to set max_parallel_replicas to 0 as it doesn't make sense [#60430](https://github.com/ClickHouse/ClickHouse/pull/60430) ([Kruglov Pavel](https://github.com/Avogar)). * Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)). * Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/base/base/scope_guard.h b/base/base/scope_guard.h index 03670792d596..e6789c5cb1bb 100644 --- a/base/base/scope_guard.h +++ b/base/base/scope_guard.h @@ -29,11 +29,13 @@ class [[nodiscard]] BasicScopeGuard requires std::is_convertible_v constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations) { - if (this != &src) + if constexpr (std::is_same_v) { - invoke(); - function = src.release(); + if (this == &src) + return *this; } + invoke(); + function = src.release(); return *this; } diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 17b1fa7cd6a5..0e98b6e5ee65 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -13,8 +13,6 @@ #include #include -#include - // NOLINTBEGIN(*) /// Use same extended double for all platforms @@ -22,6 +20,7 @@ #define CONSTEXPR_FROM_DOUBLE constexpr using FromDoubleIntermediateType = long double; #else +#include #include /// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended` #define CONSTEXPR_FROM_DOUBLE @@ -309,6 +308,13 @@ struct integer::_impl constexpr uint64_t max_int = std::numeric_limits::max(); static_assert(std::is_same_v || std::is_same_v); /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast). +#if (LDBL_MANT_DIG == 64) + if (!std::isfinite(t)) + { + self = 0; + return; + } +#else if constexpr (std::is_same_v) { if (!std::isfinite(t)) @@ -325,6 +331,7 @@ struct integer::_impl return; } } +#endif const T alpha = t / static_cast(max_int); diff --git a/base/poco/Foundation/src/pcre_compile.c b/base/poco/Foundation/src/pcre_compile.c index 3a6fafe8d569..b5f5f9a82864 100644 --- a/base/poco/Foundation/src/pcre_compile.c +++ b/base/poco/Foundation/src/pcre_compile.c @@ -4835,7 +4835,7 @@ for (;; ptr++) If the class contains characters outside the 0-255 range, a different opcode is compiled. It may optionally have a bit map for characters < 256, - but those above are are explicitly listed afterwards. A flag byte tells + but those above are explicitly listed afterwards. A flag byte tells whether the bitmap is present, and whether this is a negated class or not. In JavaScript compatibility mode, an isolated ']' causes an error. In diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index 1cef988566c5..edbb135d8c67 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -213,6 +213,19 @@ namespace Net Poco::Timespan getKeepAliveTimeout() const; /// Returns the connection timeout for HTTP connections. + void setKeepAliveMaxRequests(int max_requests); + + int getKeepAliveMaxRequests() const; + + int getKeepAliveRequest() const; + + bool isKeepAliveExpired(double reliability = 1.0) const; + /// Returns if the connection is expired with some margin as fraction of timeout as reliability + + double getKeepAliveReliability() const; + /// Returns the current fraction of keep alive timeout when connection is considered safe to use + /// It helps to avoid situation when a client uses nearly expired connection and receives NoMessageException + virtual std::ostream & sendRequest(HTTPRequest & request); /// Sends the header for the given HTTP request to /// the server. @@ -345,6 +358,8 @@ namespace Net void assign(HTTPClientSession & session); + void setKeepAliveRequest(int request); + HTTPSessionFactory _proxySessionFactory; /// Factory to create HTTPClientSession to proxy. private: @@ -353,6 +368,8 @@ namespace Net Poco::UInt16 _port; ProxyConfig _proxyConfig; Poco::Timespan _keepAliveTimeout; + int _keepAliveCurrentRequest = 0; + int _keepAliveMaxRequests = 1000; Poco::Timestamp _lastRequest; bool _reconnect; bool _mustReconnect; @@ -361,6 +378,7 @@ namespace Net Poco::SharedPtr _pRequestStream; Poco::SharedPtr _pResponseStream; + static const double _defaultKeepAliveReliabilityLevel; static ProxyConfig _globalProxyConfig; HTTPClientSession(const HTTPClientSession &); @@ -450,9 +468,19 @@ namespace Net return _lastRequest; } - inline void HTTPClientSession::setLastRequest(Poco::Timestamp time) + inline double HTTPClientSession::getKeepAliveReliability() const + { + return _defaultKeepAliveReliabilityLevel; + } + + inline int HTTPClientSession::getKeepAliveMaxRequests() const + { + return _keepAliveMaxRequests; + } + + inline int HTTPClientSession::getKeepAliveRequest() const { - _lastRequest = time; + return _keepAliveCurrentRequest; } } diff --git a/base/poco/Net/include/Poco/Net/HTTPMessage.h b/base/poco/Net/include/Poco/Net/HTTPMessage.h index 0bef50803a8f..8bc95ccc1af5 100644 --- a/base/poco/Net/include/Poco/Net/HTTPMessage.h +++ b/base/poco/Net/include/Poco/Net/HTTPMessage.h @@ -120,6 +120,10 @@ namespace Net /// The value is set to "Keep-Alive" if keepAlive is /// true, or to "Close" otherwise. + void setKeepAliveTimeout(int timeout, int max_requests); + int getKeepAliveTimeout() const; + int getKeepAliveMaxRequests() const; + bool getKeepAlive() const; /// Returns true if /// * the message has a Connection header field and its value is "Keep-Alive" diff --git a/base/poco/Net/include/Poco/Net/HTTPServerParams.h b/base/poco/Net/include/Poco/Net/HTTPServerParams.h index 3c836a630a04..d614c62d57a3 100644 --- a/base/poco/Net/include/Poco/Net/HTTPServerParams.h +++ b/base/poco/Net/include/Poco/Net/HTTPServerParams.h @@ -44,7 +44,7 @@ namespace Net /// - timeout: 60 seconds /// - keepAlive: true /// - maxKeepAliveRequests: 0 - /// - keepAliveTimeout: 10 seconds + /// - keepAliveTimeout: 15 seconds void setServerName(const std::string & serverName); /// Sets the name and port (name:port) that the server uses to identify itself. diff --git a/base/poco/Net/include/Poco/Net/HTTPServerSession.h b/base/poco/Net/include/Poco/Net/HTTPServerSession.h index ec928af304fa..3df7995509a1 100644 --- a/base/poco/Net/include/Poco/Net/HTTPServerSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPServerSession.h @@ -56,6 +56,8 @@ namespace Net SocketAddress serverAddress(); /// Returns the server's address. + void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout); + private: bool _firstRequest; Poco::Timespan _keepAliveTimeout; diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index 33a3dcc49014..c9899266be79 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -37,6 +37,7 @@ namespace Net { HTTPClientSession::ProxyConfig HTTPClientSession::_globalProxyConfig; +const double HTTPClientSession::_defaultKeepAliveReliabilityLevel = 0.9; HTTPClientSession::HTTPClientSession(): @@ -220,7 +221,41 @@ void HTTPClientSession::setGlobalProxyConfig(const ProxyConfig& config) void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout) { - _keepAliveTimeout = timeout; + if (connected()) + { + throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection, " + "That value is managed privately after connection is established."); + } + _keepAliveTimeout = timeout; +} + + +void HTTPClientSession::setKeepAliveMaxRequests(int max_requests) +{ + if (connected()) + { + throw Poco::IllegalStateException("cannot change keep alive max requests on initiated connection, " + "That value is managed privately after connection is established."); + } + _keepAliveMaxRequests = max_requests; +} + + +void HTTPClientSession::setKeepAliveRequest(int request) +{ + _keepAliveCurrentRequest = request; +} + + + +void HTTPClientSession::setLastRequest(Poco::Timestamp time) +{ + if (connected()) + { + throw Poco::IllegalStateException("cannot change last request on initiated connection, " + "That value is managed privately after connection is established."); + } + _lastRequest = time; } @@ -231,6 +266,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) clearException(); _responseReceived = false; + _keepAliveCurrentRequest += 1; + bool keepAlive = getKeepAlive(); if (((connected() && !keepAlive) || mustReconnect()) && !_host.empty()) { @@ -241,8 +278,10 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) { if (!connected()) reconnect(); - if (!keepAlive) - request.setKeepAlive(false); + if (!request.has(HTTPMessage::CONNECTION)) + request.setKeepAlive(keepAlive); + if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0) + request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds(), _keepAliveMaxRequests); if (!request.has(HTTPRequest::HOST) && !_host.empty()) request.setHost(_host, _port); if (!_proxyConfig.host.empty() && !bypassProxy()) @@ -324,6 +363,17 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response) _mustReconnect = getKeepAlive() && !response.getKeepAlive(); + if (!_mustReconnect) + { + /// when server sends its keep alive timeout, client has to follow that value + auto timeout = response.getKeepAliveTimeout(); + if (timeout > 0) + _keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0)); + auto max_requests = response.getKeepAliveMaxRequests(); + if (max_requests > 0) + _keepAliveMaxRequests = std::min(_keepAliveMaxRequests, max_requests); + } + if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) _pResponseStream = new HTTPFixedLengthInputStream(*this, 0); else if (response.getChunkedTransferEncoding()) @@ -430,15 +480,18 @@ std::string HTTPClientSession::proxyRequestPrefix() const return result; } +bool HTTPClientSession::isKeepAliveExpired(double reliability) const +{ + Poco::Timestamp now; + return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest + || _keepAliveCurrentRequest > _keepAliveMaxRequests; +} bool HTTPClientSession::mustReconnect() const { if (!_mustReconnect) - { - Poco::Timestamp now; - return _keepAliveTimeout <= now - _lastRequest; - } - else return true; + return isKeepAliveExpired(_defaultKeepAliveReliabilityLevel); + return true; } @@ -511,14 +564,21 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session) if (buffered()) throw Poco::LogicException("assign to a session with not empty buffered data"); - attachSocket(session.detachSocket()); - setLastRequest(session.getLastRequest()); + poco_assert(!connected()); + setResolvedHost(session.getResolvedHost()); - setKeepAlive(session.getKeepAlive()); + setProxyConfig(session.getProxyConfig()); setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout()); + setKeepAlive(session.getKeepAlive()); + + setLastRequest(session.getLastRequest()); setKeepAliveTimeout(session.getKeepAliveTimeout()); - setProxyConfig(session.getProxyConfig()); + + _keepAliveMaxRequests = session._keepAliveMaxRequests; + _keepAliveCurrentRequest = session._keepAliveCurrentRequest; + + attachSocket(session.detachSocket()); session.reset(); } diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index 0cd234ee9cb3..c0083ec410c1 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -17,6 +17,7 @@ #include "Poco/NumberFormatter.h" #include "Poco/NumberParser.h" #include "Poco/String.h" +#include using Poco::NumberFormatter; @@ -179,4 +180,51 @@ bool HTTPMessage::getKeepAlive() const } +void HTTPMessage::setKeepAliveTimeout(int timeout, int max_requests) +{ + add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max={}", timeout, max_requests)); +} + + +int parseFromHeaderValues(const std::string_view header_value, const std::string_view param_name) +{ + auto param_value_pos = header_value.find(param_name); + if (param_value_pos == std::string::npos) + param_value_pos = header_value.size(); + if (param_value_pos != header_value.size()) + param_value_pos += param_name.size(); + + auto param_value_end = header_value.find(',', param_value_pos); + if (param_value_end == std::string::npos) + param_value_end = header_value.size(); + + auto timeout_value_substr = header_value.substr(param_value_pos, param_value_end - param_value_pos); + if (timeout_value_substr.empty()) + return -1; + + int value = 0; + auto [ptr, ec] = std::from_chars(timeout_value_substr.begin(), timeout_value_substr.end(), value); + + if (ec == std::errc()) + return value; + + return -1; +} + + +int HTTPMessage::getKeepAliveTimeout() const +{ + const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY); + static const std::string_view timeout_param = "timeout="; + return parseFromHeaderValues(ka_header, timeout_param); +} + + +int HTTPMessage::getKeepAliveMaxRequests() const +{ + const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY); + static const std::string_view timeout_param = "max="; + return parseFromHeaderValues(ka_header, timeout_param); +} + } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPServerConnection.cpp b/base/poco/Net/src/HTTPServerConnection.cpp index c57984b0162e..d5eb29d31343 100644 --- a/base/poco/Net/src/HTTPServerConnection.cpp +++ b/base/poco/Net/src/HTTPServerConnection.cpp @@ -88,7 +88,18 @@ void HTTPServerConnection::run() pHandler->handleRequest(request, response); session.setKeepAlive(_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive()); - } + + /// all that fuzz is all about to make session close with less timeout than 15s (set in HTTPServerParams c-tor) + if (_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive()) + { + int value = response.getKeepAliveTimeout(); + if (value < 0) + value = request.getKeepAliveTimeout(); + if (value > 0) + session.setKeepAliveTimeout(Poco::Timespan(value, 0)); + } + + } else sendErrorResponse(session, HTTPResponse::HTTP_NOT_IMPLEMENTED); } catch (Poco::Exception&) diff --git a/base/poco/Net/src/HTTPServerSession.cpp b/base/poco/Net/src/HTTPServerSession.cpp index d4f2b24879e4..f67a63a9e0e9 100644 --- a/base/poco/Net/src/HTTPServerSession.cpp +++ b/base/poco/Net/src/HTTPServerSession.cpp @@ -33,6 +33,12 @@ HTTPServerSession::~HTTPServerSession() { } +void HTTPServerSession::setKeepAliveTimeout(Poco::Timespan keepAliveTimeout) +{ + _keepAliveTimeout = keepAliveTimeout; +} + + bool HTTPServerSession::hasMoreRequests() { diff --git a/contrib/NuRaft b/contrib/NuRaft index 08ac76ea80a3..cb5dc3c906e8 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 08ac76ea80a37f89b12109c805eafe9f1dc9b991 +Subproject commit cb5dc3c906e80f253e9ce9535807caef827cc2e0 diff --git a/contrib/arrow b/contrib/arrow index ba5c67934e82..8f36d71d1858 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit ba5c67934e8274d649befcffab56731632dc5253 +Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index 63b3854eef90..96f740b6dd2e 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -59,12 +59,3 @@ target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams) target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE) target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR}) target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy) - -# create a symlink to include headers with -set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") -ADD_CUSTOM_TARGET(avro_symlink_headers ALL - COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}" - COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro" -) -add_dependencies(_avrocpp avro_symlink_headers) -target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}") diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt index 7b589718140c..ecd1fccb800e 100644 --- a/contrib/libssh-cmake/CMakeLists.txt +++ b/contrib/libssh-cmake/CMakeLists.txt @@ -1,26 +1,18 @@ -option (ENABLE_SSH "Enable support for SSH keys and protocol" ${ENABLE_LIBRARIES}) +option (ENABLE_SSH "Enable support for libssh" ${ENABLE_LIBRARIES}) if (NOT ENABLE_SSH) - message(STATUS "Not using SSH") + message(STATUS "Not using libssh") return() endif() +# CMake variables needed by libssh_version.h.cmake, update them when you update libssh +set(libssh_VERSION_MAJOR 0) +set(libssh_VERSION_MINOR 9) +set(libssh_VERSION_PATCH 8) + set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh") set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh") -# Set CMake variables which are used in libssh_version.h.cmake -project(libssh VERSION 0.9.8 LANGUAGES C) - -set(LIBRARY_VERSION "4.8.8") -set(LIBRARY_SOVERSION "4") - -set(CMAKE_THREAD_PREFER_PTHREADS ON) -set(THREADS_PREFER_PTHREAD_FLAG ON) - -set(WITH_ZLIB OFF) -set(WITH_SYMBOL_VERSIONING OFF) -set(WITH_SERVER ON) - set(libssh_SRCS ${LIB_SOURCE_DIR}/src/agent.c ${LIB_SOURCE_DIR}/src/auth.c @@ -28,15 +20,21 @@ set(libssh_SRCS ${LIB_SOURCE_DIR}/src/bignum.c ${LIB_SOURCE_DIR}/src/buffer.c ${LIB_SOURCE_DIR}/src/callbacks.c + ${LIB_SOURCE_DIR}/src/chachapoly.c ${LIB_SOURCE_DIR}/src/channels.c ${LIB_SOURCE_DIR}/src/client.c ${LIB_SOURCE_DIR}/src/config.c + ${LIB_SOURCE_DIR}/src/config_parser.c ${LIB_SOURCE_DIR}/src/connect.c ${LIB_SOURCE_DIR}/src/connector.c ${LIB_SOURCE_DIR}/src/curve25519.c ${LIB_SOURCE_DIR}/src/dh.c ${LIB_SOURCE_DIR}/src/ecdh.c ${LIB_SOURCE_DIR}/src/error.c + ${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c + ${LIB_SOURCE_DIR}/src/external/blowfish.c + ${LIB_SOURCE_DIR}/src/external/chacha.c + ${LIB_SOURCE_DIR}/src/external/poly1305.c ${LIB_SOURCE_DIR}/src/getpass.c ${LIB_SOURCE_DIR}/src/init.c ${LIB_SOURCE_DIR}/src/kdf.c @@ -55,37 +53,32 @@ set(libssh_SRCS ${LIB_SOURCE_DIR}/src/pcap.c ${LIB_SOURCE_DIR}/src/pki.c ${LIB_SOURCE_DIR}/src/pki_container_openssh.c + ${LIB_SOURCE_DIR}/src/pki_ed25519_common.c ${LIB_SOURCE_DIR}/src/poll.c - ${LIB_SOURCE_DIR}/src/session.c ${LIB_SOURCE_DIR}/src/scp.c + ${LIB_SOURCE_DIR}/src/session.c ${LIB_SOURCE_DIR}/src/socket.c ${LIB_SOURCE_DIR}/src/string.c ${LIB_SOURCE_DIR}/src/threads.c - ${LIB_SOURCE_DIR}/src/wrapper.c - ${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c - ${LIB_SOURCE_DIR}/src/external/blowfish.c - ${LIB_SOURCE_DIR}/src/external/chacha.c - ${LIB_SOURCE_DIR}/src/external/poly1305.c - ${LIB_SOURCE_DIR}/src/chachapoly.c - ${LIB_SOURCE_DIR}/src/config_parser.c ${LIB_SOURCE_DIR}/src/token.c - ${LIB_SOURCE_DIR}/src/pki_ed25519_common.c + ${LIB_SOURCE_DIR}/src/wrapper.c + # some files of libssh/src/ are missing - why? ${LIB_SOURCE_DIR}/src/threads/noop.c ${LIB_SOURCE_DIR}/src/threads/pthread.c + # files missing - why? # LIBCRYPT specific - ${libssh_SRCS} - ${LIB_SOURCE_DIR}/src/threads/libcrypto.c - ${LIB_SOURCE_DIR}/src/pki_crypto.c + ${LIB_SOURCE_DIR}/src/dh_crypto.c ${LIB_SOURCE_DIR}/src/ecdh_crypto.c ${LIB_SOURCE_DIR}/src/libcrypto.c - ${LIB_SOURCE_DIR}/src/dh_crypto.c + ${LIB_SOURCE_DIR}/src/pki_crypto.c + ${LIB_SOURCE_DIR}/src/threads/libcrypto.c - ${LIB_SOURCE_DIR}/src/options.c - ${LIB_SOURCE_DIR}/src/server.c ${LIB_SOURCE_DIR}/src/bind.c ${LIB_SOURCE_DIR}/src/bind_config.c + ${LIB_SOURCE_DIR}/src/options.c + ${LIB_SOURCE_DIR}/src/server.c ) if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) @@ -94,7 +87,7 @@ endif() configure_file(${LIB_SOURCE_DIR}/include/libssh/libssh_version.h.cmake ${LIB_BINARY_DIR}/include/libssh/libssh_version.h @ONLY) -add_library(_ssh STATIC ${libssh_SRCS}) +add_library(_ssh ${libssh_SRCS}) add_library(ch_contrib::ssh ALIAS _ssh) target_link_libraries(_ssh PRIVATE OpenSSL::Crypto) diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt index 970ca4b9ce1a..736e91e359d7 100644 --- a/contrib/nuraft-cmake/CMakeLists.txt +++ b/contrib/nuraft-cmake/CMakeLists.txt @@ -51,6 +51,12 @@ else() target_compile_definitions(_nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1) endif() +target_link_libraries (_nuraft PRIVATE clickhouse_common_io) +# We must have it PUBLIC here because some headers which depend on it directly +# included in clickhouse +target_compile_definitions(_nuraft PUBLIC USE_CLICKHOUSE_THREADS=1) +MESSAGE(STATUS "Will use clickhouse threads for NuRaft") + target_include_directories (_nuraft SYSTEM PRIVATE "${LIBRARY_DIR}/include/libnuraft") # for some reason include "asio.h" directly without "boost/" prefix. target_include_directories (_nuraft SYSTEM PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/boost/boost") diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 3daa62cb212a..346868e19c46 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.3.1.2672" +ARG VERSION="24.3.2.23" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index ace01ae9a9f5..36f09c092f85 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.3.1.2672" +ARG VERSION="24.3.2.23" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index e92823b686a9..531a50efe969 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.3.1.2672" +ARG VERSION="24.3.2.23" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index 023f257253a4..c31d2fd7f397 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -26,6 +26,11 @@ 200 + + + + + diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index b9ed0561a48e..271f30d187b5 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -16,6 +16,8 @@ ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone dpkg -i package_folder/clickhouse-common-static_*.deb dpkg -i package_folder/clickhouse-common-static-dbg_*.deb +dpkg -i package_folder/clickhouse-odbc-bridge_*.deb +dpkg -i package_folder/clickhouse-library-bridge_*.deb dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb @@ -41,6 +43,8 @@ source /utils.lib if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then echo "Azure is disabled" +elif [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + echo "Azure is disabled" else azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & fi @@ -137,6 +141,32 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi +if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \ + | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" \ + > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp + mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml + + sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \ + | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" \ + > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp + mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml + + mkdir -p /var/run/clickhouse-server1 + sudo chown clickhouse:clickhouse /var/run/clickhouse-server1 + sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ + --pid-file /var/run/clickhouse-server1/clickhouse-server.pid \ + -- --path /var/lib/clickhouse1/ --logger.stderr /var/log/clickhouse-server/stderr1.log \ + --logger.log /var/log/clickhouse-server/clickhouse-server1.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server1.err.log \ + --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \ + --mysql_port 19004 --postgresql_port 19005 \ + --keeper_server.tcp_port 19181 --keeper_server.server_id 2 \ + --prometheus.port 19988 \ + --macros.replica r2 # It doesn't work :( + + MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) + MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) +fi # Wait for the server to start, but not for too long. for _ in {1..100} @@ -183,6 +213,10 @@ function run_tests() ADDITIONAL_OPTIONS+=('--s3-storage') fi + if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--shared-catalog') + fi + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--replicated-database') # Too many tests fail for DatabaseReplicated in parallel. @@ -264,6 +298,12 @@ do echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 fi + + if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) + echo "$err" + [[ "0" != "${#err}" ]] && failed_to_save_logs=1 + fi done # Stop server so we can safely read data with clickhouse-local. @@ -275,6 +315,10 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: fi +if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: +fi + rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & @@ -302,6 +346,10 @@ if [ $failed_to_save_logs -ne 0 ]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: fi + + if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: + fi done fi @@ -341,3 +389,10 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||: fi + +if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: + zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: + mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: + tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: +fi diff --git a/docs/changelogs/v24.3.2.23-lts.md b/docs/changelogs/v24.3.2.23-lts.md new file mode 100644 index 000000000000..4d59a1cedf60 --- /dev/null +++ b/docs/changelogs/v24.3.2.23-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.2.23-lts (8b7d910960c) FIXME as compared to v24.3.1.2672-lts (2c5c589a882) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix logical error in group_by_use_nulls + grouping set + analyzer + materialize/constant [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix external table cannot parse data type Bool [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)). +* Revert "Merge pull request [#61564](https://github.com/ClickHouse/ClickHouse/issues/61564) from liuneng1994/optimize_in_single_value" [#62135](https://github.com/ClickHouse/ClickHouse/pull/62135) ([Raúl Marín](https://github.com/Algunenano)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#62030](https://github.com/ClickHouse/ClickHouse/issues/62030):. [#61869](https://github.com/ClickHouse/ClickHouse/pull/61869) ([Nikita Fomichev](https://github.com/fm4v)). +* Backported in [#62057](https://github.com/ClickHouse/ClickHouse/issues/62057): ... [#62044](https://github.com/ClickHouse/ClickHouse/pull/62044) ([Max K.](https://github.com/maxknv)). +* Backported in [#62204](https://github.com/ClickHouse/ClickHouse/issues/62204):. [#62190](https://github.com/ClickHouse/ClickHouse/pull/62190) ([Konstantin Bogdanov](https://github.com/thevar1able)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix some crashes with analyzer and group_by_use_nulls. [#61933](https://github.com/ClickHouse/ClickHouse/pull/61933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix scalars create as select [#61998](https://github.com/ClickHouse/ClickHouse/pull/61998) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Ignore IfChainToMultiIfPass if returned type changed. [#62059](https://github.com/ClickHouse/ClickHouse/pull/62059) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix type for ConvertInToEqualPass [#62066](https://github.com/ClickHouse/ClickHouse/pull/62066) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Revert output Pretty in tty [#62090](https://github.com/ClickHouse/ClickHouse/pull/62090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 62191d9b5e4c..7a449f400fdc 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -68,6 +68,12 @@ In the results of `SELECT` query, the values of `AggregateFunction` type have im ## Example of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view} +The following examples assumes that you have a database named `test` so make sure you create that if it doesn't already exist: + +```sql +CREATE DATABASE test; +``` + We will create the table `test.visits` that contain the raw data: ``` sql @@ -80,17 +86,24 @@ CREATE TABLE test.visits ) ENGINE = MergeTree ORDER BY (StartDate, CounterID); ``` +Next, we need to create an `AggregatingMergeTree` table that will store `AggregationFunction`s that keep track of the total number of visits and the number of unique users. + `AggregatingMergeTree` materialized view that watches the `test.visits` table, and use the `AggregateFunction` type: ``` sql -CREATE MATERIALIZED VIEW test.mv_visits -( +CREATE TABLE test.agg_visits ( StartDate DateTime64 NOT NULL, CounterID UInt64, Visits AggregateFunction(sum, Nullable(Int32)), Users AggregateFunction(uniq, Nullable(Int32)) ) -ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID) +ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID); +``` + +And then let's create a materialized view that populates `test.agg_visits` from `test.visits` : + +```sql +CREATE MATERIALIZED VIEW test.visits_mv TO test.agg_visits AS SELECT StartDate, CounterID, @@ -104,25 +117,45 @@ Inserting data into the `test.visits` table. ``` sql INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) - VALUES (1667446031, 1, 3, 4) -INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) - VALUES (1667446031, 1, 6, 3) + VALUES (1667446031000, 1, 3, 4), (1667446031000, 1, 6, 3); ``` -The data is inserted in both the table and the materialized view `test.mv_visits`. +The data is inserted in both `test.visits` and `test.agg_visits`. To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`: -``` sql +```sql SELECT StartDate, sumMerge(Visits) AS Visits, uniqMerge(Users) AS Users -FROM test.mv_visits +FROM test.agg_visits GROUP BY StartDate ORDER BY StartDate; ``` +```text +┌───────────────StartDate─┬─Visits─┬─Users─┐ +│ 2022-11-03 03:27:11.000 │ 9 │ 2 │ +└─────────────────────────┴────────┴───────┘ +``` + +And how about if we add another couple of records to `test.visits`, but this time we'll use a different timestamp for one of the records: + +```sql +INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) + VALUES (1669446031000, 2, 5, 10), (1667446031000, 3, 7, 5); +``` + +If we then run the `SELECT` query again, we'll see the following output: + +```text +┌───────────────StartDate─┬─Visits─┬─Users─┐ +│ 2022-11-03 03:27:11.000 │ 16 │ 3 │ +│ 2022-11-26 07:00:31.000 │ 5 │ 1 │ +└─────────────────────────┴────────┴───────┘ +``` + ## Related Content - Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md index 19b5c798a76b..f28157ebde2f 100644 --- a/docs/en/engines/table-engines/special/memory.md +++ b/docs/en/engines/table-engines/special/memory.md @@ -45,6 +45,11 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000; ``` +**Modify settings** +```sql +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; +``` + **Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to. ## Examples {#examples} @@ -97,3 +102,4 @@ SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and data │ 65536 │ 10000 │ └─────────────┴────────────┘ ``` + diff --git a/docs/en/getting-started/example-datasets/menus.md b/docs/en/getting-started/example-datasets/menus.md index 32fe62865d4e..5a35c1d45bc2 100644 --- a/docs/en/getting-started/example-datasets/menus.md +++ b/docs/en/getting-started/example-datasets/menus.md @@ -18,6 +18,9 @@ Run the command: ```bash wget https://s3.amazonaws.com/menusdata.nypl.org/gzips/2021_08_01_07_01_17_data.tgz +# Option: Validate the checksum +md5sum 2021_08_01_07_01_17_data.tgz +# Checksum should be equal to: db6126724de939a5481e3160a2d67d15 ``` Replace the link to the up to date link from http://menus.nypl.org/data if needed. diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index df28809495cf..c0b4d96725da 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -7,7 +7,7 @@ title: "Crowdsourced air traffic data from The OpenSky Network 2020" The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic. -Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd +Source: https://zenodo.org/records/5092942 Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders "Crowdsourced air traffic data from the OpenSky Network 2019–2020" @@ -19,7 +19,7 @@ https://doi.org/10.5194/essd-13-357-2021 Run the command: ```bash -wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget +wget -O- https://zenodo.org/records/5092942 | grep -oE 'https://zenodo.org/records/5092942/files/flightlist_[0-9]+_[0-9]+\.csv\.gz' | xargs wget ``` Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB. @@ -127,15 +127,15 @@ Average flight distance is around 1000 km. Query: ```sql -SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky; +SELECT round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2) FROM opensky; ``` Result: ```text -┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐ -│ 1041090.6465708319 │ -└────────────────────────────────────────────────────────────────────┘ + ┌─round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2)─┐ +1. │ 1041090.67 │ -- 1.04 million + └──────────────────────────────────────────────────────────────────────────────┘ ``` ### Most busy origin airports and the average distance seen {#busy-airports-average-distance} diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index b6235fd11825..03cf345349e2 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -79,7 +79,7 @@ The supported formats are: | [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ | +| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✗ | | [Native](#native) | ✔ | ✔ | | [Null](#null) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ | @@ -1487,7 +1487,7 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000 - [output_format_pretty_max_value_width](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. - [output_format_pretty_color](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. - [output_format_pretty_grid_charset](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. -- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. +- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `true`. ## RowBinary {#rowbinary} @@ -2465,7 +2465,7 @@ Result: ## Npy {#data-format-npy} -This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse: +This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse: | Npy type | ClickHouse type | |:--------:|:---------------:| | b1 | UInt8 | diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 4eeb19cefcfa..bba5cde16f1a 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -507,16 +507,18 @@ Example: ``` xml - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> GET TEST_HEADER_VALUE - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> predefined_query_handler - SELECT value FROM system.settings WHERE name = {name_1:String} - SELECT name, value FROM system.settings WHERE name = {name_2:String} + + SELECT name, value FROM system.settings + WHERE name IN ({name_1:String}, {name_2:String}) + @@ -524,13 +526,13 @@ Example: ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' -1 -max_final_threads 2 +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2' +max_final_threads 2 +max_threads 1 ``` :::note -In one `predefined_query_handler` only supports one `query` of an insert type. +In one `predefined_query_handler` only one `query` is supported. ::: ### dynamic_query_handler {#dynamic_query_handler} diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index a8532bc22b7e..7a920671fc23 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -67,8 +67,7 @@ SETTINGS use_query_cache = true, enable_writes_to_query_cache = false; For maximum control, it is generally recommended to provide settings `use_query_cache`, `enable_writes_to_query_cache` and `enable_reads_from_query_cache` only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET -use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables -may return cached results then. +use_query_cache = true`) but one should keep in mind that all `SELECT` queries may return cached results then. The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table [system.query_cache](system-tables/query_cache.md). The number of query cache hits and misses since database start are shown as events @@ -175,6 +174,10 @@ Also, results of queries with non-deterministic functions are not cached by defa To force caching of results of queries with non-deterministic functions regardless, use setting [query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling). +Results of queries that involve system tables, e.g. `system.processes` or `information_schema.tables`, are not cached by default. To force +caching of results of queries with system tables regardless, use setting +[query_cache_system_table_handling](settings/settings.md#query-cache-system-table-handling). + :::note Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 436321c8fe8f..eb93d9cda5bb 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -42,6 +42,19 @@ Type: UInt32 Default: 1 +## auth_use_forwarded_address + +Use originating address for authentication for clients connected through proxy. + +:::note +This setting should be used with extra caution since forwarded address can be easily spoofed - server accepting such authentication should not be accessed directly but rather exclusively through a trusted proxy. +::: + +Type: Bool + +Default: 0 + + ## background_buffer_flush_schedule_pool_size The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in the background. @@ -436,7 +449,7 @@ Default: 0 Restriction on dropping partitions. If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_partition_size_to_drop` (in bytes), you can’t drop a partition using a [DROP PARTITION](../../sql-reference/statements/alter/partition.md#drop-partitionpart) query. -This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. +This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. Default value: 50 GB. The value 0 means that you can drop partitions without any restrictions. @@ -518,7 +531,7 @@ Restriction on deleting tables. If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you can’t delete it using a [DROP](../../sql-reference/statements/drop.md) query or [TRUNCATE](../../sql-reference/statements/truncate.md) query. -This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. +This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. Default value: 50 GB. The value 0 means that you can delete all tables without any restrictions. @@ -1570,7 +1583,7 @@ Restriction on deleting tables. If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you can’t delete it using a [DROP](../../sql-reference/statements/drop.md) query or [TRUNCATE](../../sql-reference/statements/truncate.md) query. -This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. +This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. Default value: 50 GB. @@ -1588,7 +1601,7 @@ Restriction on dropping partitions. If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_partition_size_to_drop` (in bytes), you can’t drop a partition using a [DROP PARTITION](../../sql-reference/statements/alter/partition.md#drop-partitionpart) query. -This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. +This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. Default value: 50 GB. diff --git a/docs/en/operations/settings/composable-protocols.md b/docs/en/operations/settings/composable-protocols.md new file mode 100644 index 000000000000..b68a5906abf1 --- /dev/null +++ b/docs/en/operations/settings/composable-protocols.md @@ -0,0 +1,155 @@ +--- +slug: /en/operations/settings/composable-protocols +sidebar_position: 64 +sidebar_label: Composable Protocols +--- + +# Composable Protocols + +Composable protocols allows more flexible configuration of TCP access to the ClickHouse server. This configuration can co-exist with or replace conventional configuration. + +## Composable protocols section is denoted as `protocols` in configuration xml +**Example:** +``` xml + + + +``` + +## Basic modules define protocol layers +**Example:** +``` xml + + + + + http + + + +``` +where: +- `plain_http` - name which can be referred by another layer +- `type` - denotes protocol handler which will be instantiated to process data, set of protocol handlers is predefined: + * `tcp` - native clickhouse protocol handler + * `http` - http clickhouse protocol handler + * `tls` - TLS encryption layer + * `proxy1` - PROXYv1 layer + * `mysql` - MySQL compatibility protocol handler + * `postgres` - PostgreSQL compatibility protocol handler + * `prometheus` - Prometheus protocol handler + * `interserver` - clickhouse interserver handler + +:::note +`gRPC` protocol handler is not implemented for `Composable protocols` +::: + +## Endpoint (i.e. listening port) is denoted by `` and (optional) `` tags +**Example:** +``` xml + + + + + http + + 127.0.0.1 + 8123 + + + + +``` +If `` is omitted, then `` from root config is used. + +## Layers sequence is defined by `` tag, referencing another module +**Example:** definition for HTTPS protocol +``` xml + + + + + http + + + + + tls + plain_http + 127.0.0.1 + 8443 + + + +``` + +## Endpoint can be attached to any layer +**Example:** definition for HTTP (port 8123) and HTTPS (port 8443) endpoints +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + + + +``` + +## Additional endpoints can be defined by referencing any module and omitting `` tag +**Example:** `another_http` endpoint is defined for `plain_http` module +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + + + + plain_http + 127.0.0.1 + 8223 + + + +``` + +## Some modules can contain specific for its layer parameters +**Example:** for TLS layer private key (`privateKeyFile`) and certificate files (`certificateFile`) can be specified +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + another_server.key + another_server.crt + + + +``` diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 3e411a51ff46..9327d52227f9 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -287,7 +287,7 @@ Default value: 0 (seconds) ## remote_fs_execute_merges_on_single_replica_time_threshold -When this setting has a value greater than than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled. +When this setting has a value greater than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled. :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 831c70941147..f455fcba840a 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1642,7 +1642,7 @@ Possible values: - 0 — Output without row numbers. - 1 — Output with row numbers. -Default value: `0`. +Default value: `1`. **Example** diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e4e7be83f7da..f9fe5f1b2d32 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1689,6 +1689,18 @@ Possible values: Default value: `throw`. +## query_cache_system_table_handling {#query-cache-system-table-handling} + +Controls how the [query cache](../query-cache.md) handles `SELECT` queries against system tables, i.e. tables in databases `system.*` and `information_schema.*`. + +Possible values: + +- `'throw'` - Throw an exception and don't cache the query result. +- `'save'` - Cache the query result. +- `'ignore'` - Don't cache the query result and don't throw an exception. + +Default value: `throw`. + ## query_cache_min_query_runs {#query-cache-min-query-runs} Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md). @@ -5302,7 +5314,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8); ## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key} When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files. -When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. +When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. Default value: `false`. diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 9ffbb64c1ed2..2c642dd2f0b5 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -36,7 +36,7 @@ E.g. configuration option s3 https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -47,7 +47,7 @@ is equal to configuration (from `24.1`): s3 local https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -56,7 +56,7 @@ Configuration s3_plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -67,7 +67,7 @@ is equal to s3 plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -79,7 +79,7 @@ Example of full storage configuration will look like: s3 https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 @@ -105,7 +105,7 @@ Starting with 24.1 clickhouse version, it can also look like: s3 local https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 @@ -324,7 +324,7 @@ Configuration: s3_plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -337,7 +337,7 @@ Configuration: azure plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` diff --git a/docs/en/operations/system-tables/blob_storage_log.md b/docs/en/operations/system-tables/blob_storage_log.md index 2328f7f0346d..8c0c33a504ad 100644 --- a/docs/en/operations/system-tables/blob_storage_log.md +++ b/docs/en/operations/system-tables/blob_storage_log.md @@ -7,6 +7,7 @@ Contains logging entries with information about various blob storage operations Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the event. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the event with microseconds precision. @@ -38,6 +39,7 @@ SELECT * FROM system.blob_storage_log WHERE query_id = '7afe0450-504d-4e4b-9a80- ```text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-10-31 event_time: 2023-10-31 16:03:40 event_time_microseconds: 2023-10-31 16:03:40.481437 diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md index e9e2c367610d..2424ff952378 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md @@ -7,26 +7,33 @@ sidebar_position: 351 [Cramer's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramer's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation. +:::note +For a bias corrected version of Cramer's V see: [cramersVBiasCorrected](./cramersvbiascorrected.md) +::: + **Syntax** ``` sql cramersV(column1, column2) ``` -**Arguments** +**Parameters** -- `column1` and `column2` are the columns to be compared +- `column1`: first column to be compared. +- `column2`: second column to be compared. **Returned value** - a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). -**Return type** is always [Float64](../../../sql-reference/data-types/float.md). +Type: always [Float64](../../../sql-reference/data-types/float.md). **Example** The following two columns being compared below have no association with each other, so the result of `cramersV` is 0: +Query: + ``` sql SELECT cramersV(a, b) diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md index f5ad3a8a937a..939c04e3fdc2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md @@ -5,31 +5,31 @@ sidebar_position: 352 # cramersVBiasCorrected - Cramer's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramer's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction). - - **Syntax** ``` sql cramersVBiasCorrected(column1, column2) ``` -**Arguments** +**Parameters** -- `column1` and `column2` are the columns to be compared +- `column1`: first column to be compared. +- `column2`: second column to be compared. **Returned value** - a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). -**Return type** is always [Float64](../../../sql-reference/data-types/float.md). +Type: always [Float64](../../../sql-reference/data-types/float.md). **Example** The following two columns being compared below have a small association with each other. Notice the result of `cramersVBiasCorrected` is smaller than the result of `cramersV`: +Query: + ``` sql SELECT cramersV(a, b), diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index 2f3efde859d8..18f44d2fcc44 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -15,9 +15,9 @@ The `uniqCombined` function is a good choice for calculating the number of diffe **Arguments** -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. +- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each). +- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. -`HLL_precision` is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each). **Returned value** @@ -25,26 +25,43 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Implementation details** -Function: +The `uniqCombined` function: - Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. - - Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. - - For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. - + - For a small number of distinct elements, an array is used. + - When the set size is larger, a hash table is used. + - For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. - Provides the result deterministically (it does not depend on the query processing order). :::note -Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +Since it uses a 32-bit hash for non-`String` types, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64). ::: -Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined`: +Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined` function: - Consumes several times less memory. - Calculates with several times higher accuracy. - Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network. +**Example** + +Query: + +```sql +SELECT uniqCombined(number) FROM numbers(1e6); +``` + +Result: + +```response +┌─uniqCombined(number)─┐ +│ 1001148 │ -- 1.00 million +└──────────────────────┘ +``` + +See the example section of [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) for an example of the difference between `uniqCombined` and `uniqCombined64` for much larger inputs. + **See Also** - [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md index 9f010da57f21..b6e09bcaae34 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md @@ -5,4 +5,78 @@ sidebar_position: 193 # uniqCombined64 -Same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses 64-bit hash for all data types. +Calculates the approximate number of different argument values. It is the same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses a 64-bit hash for all data types rather than just for the String data type. + +``` sql +uniqCombined64(HLL_precision)(x[, ...]) +``` + +**Parameters** + +- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optionally, you can use the function as `uniqCombined64(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each). +- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. + +**Returned value** + +- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. + +**Implementation details** + +The `uniqCombined64` function: +- Calculates a hash (64-bit hash for all data types) for all parameters in the aggregate, then uses it in calculations. +- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. + - For a small number of distinct elements, an array is used. + - When the set size is larger, a hash table is used. + - For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. +- Provides the result deterministically (it does not depend on the query processing order). + +:::note +Since it uses 64-bit hash for all types, the result does not suffer from very high error for cardinalities significantly larger than `UINT_MAX` like [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) does, which uses a 32-bit hash for non-`String` types. +::: + +Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined64` function: + +- Consumes several times less memory. +- Calculates with several times higher accuracy. + +**Example** + +In the example below `uniqCombined64` is run on `1e10` different numbers returning a very close approximation of the number of different argument values. + +Query: + +```sql +SELECT uniqCombined64(number) FROM numbers(1e10); +``` + +Result: + +```response +┌─uniqCombined64(number)─┐ +│ 9998568925 │ -- 10.00 billion +└────────────────────────┘ +``` + +By comparison the `uniqCombined` function returns a rather poor approximation for an input this size. + +Query: + +```sql +SELECT uniqCombined(number) FROM numbers(1e10); +``` + +Result: + +```response +┌─uniqCombined(number)─┐ +│ 5545308725 │ -- 5.55 billion +└──────────────────────┘ +``` + +**See Also** + +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index fe6d7ebe0dc4..87511a505dc9 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/aggregatefunction -sidebar_position: 53 +sidebar_position: 46 sidebar_label: AggregateFunction --- diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 0ee7c8de93ce..e5a8ce5d18b1 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/array -sidebar_position: 52 +sidebar_position: 32 sidebar_label: Array(T) --- diff --git a/docs/en/sql-reference/data-types/boolean.md b/docs/en/sql-reference/data-types/boolean.md index 70abf767a41b..4c59bd947ded 100644 --- a/docs/en/sql-reference/data-types/boolean.md +++ b/docs/en/sql-reference/data-types/boolean.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/boolean -sidebar_position: 43 +sidebar_position: 22 sidebar_label: Boolean --- diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index 26e4610aec76..7adee3bbf3cf 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/date -sidebar_position: 47 +sidebar_position: 12 sidebar_label: Date --- diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index 38a07cd817da..a08c931b7fc3 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/date32 -sidebar_position: 48 +sidebar_position: 14 sidebar_label: Date32 --- diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 1adff18f598e..ac9a72c2641c 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/datetime -sidebar_position: 48 +sidebar_position: 16 sidebar_label: DateTime --- @@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter. -ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. +ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. -When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting. +When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting. ## Examples @@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index 504d0e2b0a6d..ef452a723e6b 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/datetime64 -sidebar_position: 49 +sidebar_position: 18 sidebar_label: DateTime64 --- diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index 2b32e72a28f9..dfdefdff5a5e 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/decimal -sidebar_position: 42 +sidebar_position: 6 sidebar_label: Decimal --- diff --git a/docs/en/sql-reference/data-types/enum.md b/docs/en/sql-reference/data-types/enum.md index 02e73a0360ea..ccfeb7f3416a 100644 --- a/docs/en/sql-reference/data-types/enum.md +++ b/docs/en/sql-reference/data-types/enum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/enum -sidebar_position: 50 +sidebar_position: 20 sidebar_label: Enum --- diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index a56b3fccbc17..0316df7fe348 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/fixedstring -sidebar_position: 45 +sidebar_position: 10 sidebar_label: FixedString(N) --- -# FixedString +# FixedString(N) A fixed-length string of `N` bytes (neither characters nor code points). diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index be7b2a7fcd87..23131d5b4fe3 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/float -sidebar_position: 41 +sidebar_position: 4 sidebar_label: Float32, Float64 --- diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 1d37b829dd56..7e3c32b34517 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -1,8 +1,8 @@ --- slug: /en/sql-reference/data-types/geo -sidebar_position: 62 +sidebar_position: 54 sidebar_label: Geo -title: "Geo Data Types" +title: "Geometric" --- ClickHouse supports data types for representing geographical objects — locations, lands, etc. diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index ffd063590fa8..fcb0b60d0226 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/ sidebar_label: List of data types -sidebar_position: 37 +sidebar_position: 1 --- -# ClickHouse Data Types +# Data Types in ClickHouse ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any. diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md index 520454a859fb..52d2982de19e 100644 --- a/docs/en/sql-reference/data-types/int-uint.md +++ b/docs/en/sql-reference/data-types/int-uint.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/int-uint -sidebar_position: 40 +sidebar_position: 2 sidebar_label: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 --- diff --git a/docs/en/sql-reference/data-types/ipv4.md b/docs/en/sql-reference/data-types/ipv4.md index 288806f47b35..637ed543e084 100644 --- a/docs/en/sql-reference/data-types/ipv4.md +++ b/docs/en/sql-reference/data-types/ipv4.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/ipv4 -sidebar_position: 59 +sidebar_position: 28 sidebar_label: IPv4 --- diff --git a/docs/en/sql-reference/data-types/ipv6.md b/docs/en/sql-reference/data-types/ipv6.md index 97959308b58d..642a7db81fc3 100644 --- a/docs/en/sql-reference/data-types/ipv6.md +++ b/docs/en/sql-reference/data-types/ipv6.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/ipv6 -sidebar_position: 60 +sidebar_position: 30 sidebar_label: IPv6 --- diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index fd548a0d5a28..39e37abad82c 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/json -sidebar_position: 54 +sidebar_position: 26 sidebar_label: JSON --- diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index db10103282d4..133ac2bd72eb 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/lowcardinality -sidebar_position: 51 -sidebar_label: LowCardinality +sidebar_position: 42 +sidebar_label: LowCardinality(T) --- -# LowCardinality +# LowCardinality(T) Changes the internal representation of other data types to be dictionary-encoded. diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index e0c8b98f9f83..2c734969afcb 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/data-types/map -sidebar_position: 65 -sidebar_label: Map(key, value) +sidebar_position: 36 +sidebar_label: Map(K, V) --- -# Map(key, value) +# Map(K, V) -`Map(key, value)` data type stores `key:value` pairs. +`Map(K, V)` data type stores `key:value` pairs. **Parameters** diff --git a/docs/en/sql-reference/data-types/multiword-types.md b/docs/en/sql-reference/data-types/multiword-types.md deleted file mode 100644 index ebbe1d845447..000000000000 --- a/docs/en/sql-reference/data-types/multiword-types.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -slug: /en/sql-reference/data-types/multiword-types -sidebar_position: 61 -sidebar_label: Multiword Type Names -title: "Multiword Types" ---- - -When creating tables, you can use data types with a name consisting of several words. This is implemented for better SQL compatibility. - -## Multiword Types Support - -| Multiword types | Simple types | -|----------------------------------|--------------------------------------------------------------| -| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) | -| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) | -| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| BINARY VARYING | [String](../../sql-reference/data-types/string.md) | diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md index 5504765e4a08..abcb87a0c1b1 100644 --- a/docs/en/sql-reference/data-types/nullable.md +++ b/docs/en/sql-reference/data-types/nullable.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/data-types/nullable -sidebar_position: 55 -sidebar_label: Nullable +sidebar_position: 44 +sidebar_label: Nullable(T) --- # Nullable(T) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 517a28576f03..39f8409c1e1a 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -1,5 +1,7 @@ --- slug: /en/sql-reference/data-types/simpleaggregatefunction +sidebar_position: 48 +sidebar_label: SimpleAggregateFunction --- # SimpleAggregateFunction diff --git a/docs/en/sql-reference/data-types/string.md b/docs/en/sql-reference/data-types/string.md index f891a9303e58..8a4f346fdfc8 100644 --- a/docs/en/sql-reference/data-types/string.md +++ b/docs/en/sql-reference/data-types/string.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/string -sidebar_position: 44 +sidebar_position: 8 sidebar_label: String --- @@ -13,7 +13,7 @@ When creating tables, numeric parameters for string fields can be set (e.g. `VAR Aliases: -- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`. +- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`, `CHAR LARGE OBJECT`, `CHAR VARYING`, `CHARACTER LARGE OBJECT`, `CHARACTER VARYING`, `NCHAR LARGE OBJECT`, `NCHAR VARYING`, `NATIONAL CHARACTER LARGE OBJECT`, `NATIONAL CHARACTER VARYING`, `NATIONAL CHAR VARYING`, `NATIONAL CHARACTER`, `NATIONAL CHAR`, `BINARY LARGE OBJECT`, `BINARY VARYING`, ## Encodings diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index 8f87eeca075f..0525a3b04766 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/tuple -sidebar_position: 54 +sidebar_position: 34 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(T1, T2, …) +# Tuple(T1, T2, ...) A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element. diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md index 40f756b95888..75e163f50639 100644 --- a/docs/en/sql-reference/data-types/uuid.md +++ b/docs/en/sql-reference/data-types/uuid.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/uuid -sidebar_position: 46 +sidebar_position: 24 sidebar_label: UUID --- diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index 7d10d4b0e977..3c2b6e0a3628 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/variant -sidebar_position: 55 -sidebar_label: Variant +sidebar_position: 40 +sidebar_label: Variant(T1, T2, ...) --- -# Variant(T1, T2, T3, ...) +# Variant(T1, T2, ...) This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). @@ -190,22 +190,67 @@ SELECT toTypeName(variantType(v)) FROM test LIMIT 1; └─────────────────────────────────────────────────────────────────────┘ ``` -## Conversion between Variant column and other columns +## Conversion between a Variant column and other columns -There are 3 possible conversions that can be performed with Variant column. +There are 4 possible conversions that can be performed with a column of type `Variant`. -### Converting an ordinary column to a Variant column +### Converting a String column to a Variant column -It is possible to convert ordinary column with type `T` to a `Variant` column containing this type: +Conversion from `String` to `Variant` is performed by parsing a value of `Variant` type from the string value: ```sql -SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant; +SELECT '42'::Variant(String, UInt64) as variant, variantType(variant) as variant_type ``` ```text -┌─type_name──────────────────────────────┬─variant───────┐ -│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │ -└────────────────────────────────────────┴───────────────┘ +┌─variant─┬─variant_type─┐ +│ 42 │ UInt64 │ +└─────────┴──────────────┘ +``` + +```sql +SELECT '[1, 2, 3]'::Variant(String, Array(UInt64)) as variant, variantType(variant) as variant_type +``` + +```text +┌─variant─┬─variant_type──┐ +│ [1,2,3] │ Array(UInt64) │ +└─────────┴───────────────┘ +``` + +```sql +SELECT CAST(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01'), 'Map(String, Variant(UInt64, Bool, Date))') as map_of_variants, mapApply((k, v) -> (k, variantType(v)), map_of_variants) as map_of_variant_types``` +``` + +```text +┌─map_of_variants─────────────────────────────┬─map_of_variant_types──────────────────────────┐ +│ {'key1':42,'key2':true,'key3':'2020-01-01'} │ {'key1':'UInt64','key2':'Bool','key3':'Date'} │ +└─────────────────────────────────────────────┴───────────────────────────────────────────────┘ +``` + +### Converting an ordinary column to a Variant column + +It is possible to convert an ordinary column with type `T` to a `Variant` column containing this type: + +```sql +SELECT toTypeName(variant) as type_name, [1,2,3]::Array(UInt64)::Variant(UInt64, String, Array(UInt64)) as variant, variantType(variant) as variant_name + ``` + +```text +┌─type_name──────────────────────────────┬─variant─┬─variant_name──┐ +│ Variant(Array(UInt64), String, UInt64) │ [1,2,3] │ Array(UInt64) │ +└────────────────────────────────────────┴─────────┴───────────────┘ +``` + +Note: converting from `String` type is always performed through parsing, if you need to convert `String` column to `String` variant of a `Variant` without parsing, you can do the following: +```sql +SELECT '[1, 2, 3]'::Variant(String)::Variant(String, Array(UInt64), UInt64) as variant, variantType(variant) as variant_type +``` + +```sql +┌─variant───┬─variant_type─┐ +│ [1, 2, 3] │ String │ +└───────────┴──────────────┘ ``` ### Converting a Variant column to an ordinary column @@ -395,3 +440,37 @@ SELECT v, variantType(v) FROM test ORDER by v; │ 100 │ UInt32 │ └─────┴────────────────┘ ``` + +## JSONExtract functions with Variant + +All `JSONExtract*` functions support `Variant` type: + +```sql +SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Variant(UInt32, String, Array(UInt32))') AS variant, variantType(variant) AS variant_type; +``` + +```text +┌─variant─┬─variant_type──┐ +│ [1,2,3] │ Array(UInt32) │ +└─────────┴───────────────┘ +``` + +```sql +SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_variants, mapApply((k, v) -> (k, variantType(v)), map_of_variants) AS map_of_variant_types +``` + +```text +┌─map_of_variants──────────────────┬─map_of_variant_types────────────────────────────┐ +│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │ +└──────────────────────────────────┴─────────────────────────────────────────────────┘ +``` + +```sql +SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS variants, arrayMap(x -> (x.1, variantType(x.2)), variants) AS variant_types +``` + +```text +┌─variants───────────────────────────────┬─variant_types─────────────────────────────────────────┐ +│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │ +└────────────────────────────────────────┴───────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 67a4c0268518..87e733a4b0cc 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -774,6 +774,59 @@ Returns the number of elements for which `func(arr1[i], …, arrN[i])` returns s Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +## arrayDotProduct + +Returns the dot product of two arrays. + +**Syntax** + +```sql +arrayDotProduct(vector1, vector2) +``` + +Alias: `scalarProduct`, `dotProduct` + +**Parameters** + +- `vector1`: First vector. [Array](../data-types/array.md) or [Tuple](../data-types/tuple.md) of numeric values. +- `vector2`: Second vector. [Array](../data-types/array.md) or [Tuple](../data-types/tuple.md) of numeric values. + +:::note +The sizes of the two vectors must be equal. Arrays and Tuples may also contain mixed element types. +::: + +**Returned value** + +- The dot product of the two vectors. + +Type: numeric - determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype. + +**Examples** + +Query: + +```sql +SELECT arrayDotProduct([1, 2, 3], [4, 5, 6]) AS res, toTypeName(res); +``` + +Result: + +```response +32 UInt16 +``` + +Query: + +```sql +SELECT dotProduct((1::UInt16, 2::UInt8, 3::Float32),(4::Int16, 5::Float32, 6::UInt8)) AS res, toTypeName(res); +``` + +Result: + +```response +32 Float64 +``` + ## countEqual(arr, x) Returns the number of elements in the array equal to x. Equivalent to arrayCount (elem -\> elem = x, arr). @@ -888,6 +941,66 @@ SELECT arrayEnumerateUniq([1, 1, 1, 2, 2, 2], [1, 1, 2, 1, 1, 2]) AS res This is necessary when using ARRAY JOIN with a nested data structure and further aggregation across multiple elements in this structure. +## arrayEnumerateUniqRanked + +Returns an array the same size as the source array, indicating for each element what its position is among elements with the same value. It allows for enumeration of a multidimensional array with the ability to specify how deep to look inside the array. + +**Syntax** + +```sql +arrayEnumerateUniqRanked(clear_depth, arr, max_array_depth) +``` + +**Parameters** + +- `clear_depth`: Enumerate elements at the specified level separately. Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`. +- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md). +- `max_array_depth`: The maximum effective depth. Positive [Integer](../data-types/int-uint.md) less than or equal to the depth of `arr`. + +**Example** + +With `clear_depth=1` and `max_array_depth=1`, the result of `arrayEnumerateUniqRanked` is identical to that which [`arrayEnumerateUniq`](#arrayenumerateuniqarr) would give for the same array. + +Query: + +``` sql +SELECT arrayEnumerateUniqRanked(1, [1,2,1], 1); +``` + +Result: + +``` text +[1,1,2] +``` + +In this example, `arrayEnumerateUniqRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[1,2,3]`, the corresponding result is `[1,1,1]`, indicating that this is the first time `1`,`2` and `3` are encountered. For the second row of the provided array,`[2,2,1]`, the corresponding result is `[2,3,3]`, indicating that `2` is encountered for a second and third time, and `1` is encountered for the second time. Likewise, for the third row of the provided array `[3]` the corresponding result is `[2]` indicating that `3` is encountered for the second time. + +Query: + +``` sql +SELECT arrayEnumerateUniqRanked(1, [[1,2,3],[2,2,1],[3]], 2); +``` + +Result: + +``` text +[[1,1,1],[2,3,2],[2]] +``` + +Changing `clear_depth=2`, results in elements being enumerated separately for each row. + +Query: + +``` sql +SELECT arrayEnumerateUniqRanked(2, [[1,2,3],[2,2,1],[3]], 2); +``` + +Result: + +``` text +[[1,1,1],[1,2,1],[1]] +``` + ## arrayPopBack Removes the last item from the array. @@ -1303,6 +1416,125 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. +## arrayShuffle + +Returns an array of the same size as the original array containing the elements in shuffled order. +Elements are reordered in such a way that each possible permutation of those elements has equal probability of appearance. + +**Syntax** + +```sql +arrayShuffle(arr[, seed]) +``` + +**Parameters** + +- `arr`: The array to partially shuffle. [Array](../data-types/array.md). +- `seed` (optional): seed to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md). + +**Returned value** + +- Array with elements shuffled. + +**Implementation details** + +:::note +This function will not materialize constants. +::: + +**Examples** + +In this example, `arrayShuffle` is used without providing a `seed` and will therefore generate one randomly itself. + +Query: + +```sql +SELECT arrayShuffle([1, 2, 3, 4]); +``` + +Note: when using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function. + +Result: + +```response +[1,4,2,3] +``` + +In this example, `arrayShuffle` is provided a `seed` and will produce stable results. + +Query: + +```sql +SELECT arrayShuffle([1, 2, 3, 4], 41); +``` + +Result: + +```response +[3,2,1,4] +``` + +## arrayPartialShuffle + +Given an input array of cardinality `N`, returns an array of size N where elements in the range `[1...limit]` are shuffled and the remaining elements in the range `(limit...n]` are unshuffled. + +**Syntax** + +```sql +arrayPartialShuffle(arr[, limit[, seed]]) +``` + +**Parameters** + +- `arr`: The array size `N` to partially shuffle. [Array](../data-types/array.md). +- `limit` (optional): The number to limit element swaps to, in the range `[1..N]`. [UInt or Int](../data-types/int-uint.md). +- `seed` (optional): The seed value to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md) + +**Returned value** + +- Array with elements partially shuffled. + +**Implementation details** + +:::note +This function will not materialize constants. + +The value of `limit` should be in the range `[1..N]`. Values outside of that range are equivalent to performing full [arrayShuffle](#arrayshuffle). +::: + +**Examples** + +Note: when using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function. + +Query: + +```sql +SELECT arrayPartialShuffle([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 1) +``` + +Result: + +The order of elements is preserved (`[2,3,4,5], [7,8,9,10]`) except for the two shuffled elements `[1, 6]`. No `seed` is provided so the function selects its own randomly. + +```response +[6,2,3,4,5,1,7,8,9,10] +``` + +In this example, the `limit` is increased to `2` and a `seed` value is provided. The order + +Query: + +```sql +SELECT arrayPartialShuffle([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 2); +``` + +The order of elements is preserved (`[4, 5, 6, 7, 8], [10]`) except for the four shuffled elements `[1, 2, 3, 9]`. + +Result: +```response +[3,9,1,4,5,6,7,8,2,10] +``` + ## arrayUniq(arr, …) If one argument is passed, it counts the number of different elements in the array. @@ -1400,21 +1632,91 @@ Result: └────────────────────────────────┘ ``` -## arrayEnumerateDense(arr) +## arrayEnumerateDense Returns an array of the same size as the source array, indicating where each element first appears in the source array. -Example: +**Syntax** + +```sql +arrayEnumerateDense(arr) +``` + +**Example** + +Query: ``` sql SELECT arrayEnumerateDense([10, 20, 10, 30]) ``` +Result: + ``` text ┌─arrayEnumerateDense([10, 20, 10, 30])─┐ │ [1,2,1,3] │ └───────────────────────────────────────┘ ``` +## arrayEnumerateDenseRanked + +Returns an array the same size as the source array, indicating where each element first appears in the source array. It allows for enumeration of a multidimensional array with the ability to specify how deep to look inside the array. + +**Syntax** + +```sql +arrayEnumerateDenseRanked(clear_depth, arr, max_array_depth) +``` + +**Parameters** + +- `clear_depth`: Enumerate elements at the specified level separately. Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`. +- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md). +- `max_array_depth`: The maximum effective depth. Positive [Integer](../data-types/int-uint.md) less than or equal to the depth of `arr`. + +**Example** + +With `clear_depth=1` and `max_array_depth=1`, the result is identical to what [arrayEnumerateDense](#arrayenumeratedense) would give. + +Query: + +``` sql +SELECT arrayEnumerateDenseRanked(1,[10, 20, 10, 30],1); +``` + +Result: + +``` text +[1,2,1,3] +``` + +In this example, `arrayEnumerateDenseRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[10,10,30,20]`, the corresponding first row of the result is `[1,1,2,3]`, indicating that `10` is the first number encountered in position 1 and 2, `30` the second number encountered in position 3 and `20` is the third number encountered in position 4. For the second row, `[40, 50, 10, 30]`, the corresponding second row of the result is `[4,5,1,2]`, indicating that `40` and `50` are the fourth and fifth numbers encountered in position 1 and 2 of that row, that another `10` (the first encountered number) is in position 3 and `30` (the second number encountered) is in the last position. + + +Query: + +``` sql +SELECT arrayEnumerateDenseRanked(1,[[10,10,30,20],[40,50,10,30]],2); +``` + +Result: + +``` text +[[1,1,2,3],[4,5,1,2]] +``` + +Changing `clear_depth=2` results in the enumeration occurring separately for each row anew. + +Query: + +``` sql +SELECT arrayEnumerateDenseRanked(2,[[10,10,30,20],[40,50,10,30]],2); +``` + +Result: + +``` text +[[1,1,2,3],[1,2,3,4]] +``` ## arrayIntersect(arr) @@ -1652,7 +1954,7 @@ flatten(array_of_arrays) Alias: `flatten`. -**Arguments** +**Parameters** - `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. @@ -1928,7 +2230,67 @@ Note that the `arrayAll` is a [higher-order function](../../sql-reference/functi Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. -Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. +## arrayFirstOrNull + +Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise it returns `NULL`. + +**Syntax** + +```sql +arrayFirstOrNull(func, arr1, …) +``` + +**Parameters** + +- `func`: Lambda function. [Lambda function](../functions/#higher-order-functions---operator-and-lambdaparams-expr-function). +- `arr1`: Array to operate on. [Array](../data-types/array.md). + +**Returned value** + +- The first element in the passed array. +- Otherwise, returns `NULL` + +**Implementation details** + +Note that the `arrayFirstOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +**Example** + +Query: + +```sql +SELECT arrayFirstOrNull(x -> x >= 2, [1, 2, 3]); +``` + +Result: + +```response +2 +``` + +Query: + +```sql +SELECT arrayFirstOrNull(x -> x >= 2, emptyArrayUInt8()); +``` + +Result: + +```response +\N +``` + +Query: + +```sql +SELECT arrayLastOrNull((x,f) -> f, [1,2,3,NULL], [0,1,0,1]); +``` + +Result: + +```response +\N +``` ## arrayLast(func, arr1, …) @@ -1936,6 +2298,56 @@ Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[ Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. +## arrayLastOrNull + +Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise returns `NULL`. + +**Syntax** + +```sql +arrayLastOrNull(func, arr1, …) +``` + +**Parameters** + +- `func`: Lambda function. [Lambda function](../functions/#higher-order-functions---operator-and-lambdaparams-expr-function). +- `arr1`: Array to operate on. [Array](../data-types/array.md). + +**Returned value** + +- The last element in the passed array. +- Otherwise, returns `NULL` + +**Implementation details** + +Note that the `arrayLastOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +**Example** + +Query: + +```sql +SELECT arrayLastOrNull(x -> x >= 2, [1, 2, 3]); +``` + +Result: + +```response +3 +``` + +Query: + +```sql +SELECT arrayLastOrNull(x -> x >= 2, emptyArrayUInt8()); +``` + +Result: + +```response +\N +``` + ## arrayFirstIndex(func, arr1, …) Returns the index of the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 4c4190043446..3bb9d4e7dbe6 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1906,7 +1906,7 @@ Aliases: `dateAdd`, `DATE_ADD`. **Arguments** -- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to add. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. Possible values: - `second` @@ -1961,7 +1961,7 @@ Aliases: `dateSub`, `DATE_SUB`. **Arguments** -- `unit` — The type of interval to subtract. Note: The unit should be unquoted. +- `unit` — The type of interval to subtract. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. Possible values: diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index e20c35c6b6f1..5f3514049c77 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -81,6 +81,43 @@ Result: │ 2.23606797749979 │ └──────────────────┘ ``` +## L2SquaredNorm + +Calculates the square root of the sum of the squares of the vector values (the [L2Norm](#l2norm)) squared. + +**Syntax** + +```sql +L2SquaredNorm(vector) +``` + +Alias: `normL2Squared`. + +***Arguments** + +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- L2-norm squared. + +Type: [Float](../../sql-reference/data-types/float.md). + +**Example** + +Query: + +```sql +SELECT L2SquaredNorm((1, 2)); +``` + +Result: + +```text +┌─L2SquaredNorm((1, 2))─┐ +│ 5 │ +└───────────────────────┘ +``` ## LinfNorm diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 90c7d8c2206b..1cd7eeb7c83b 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -594,6 +594,45 @@ Calculates JumpConsistentHash form a UInt64. Accepts two arguments: a UInt64-type key and the number of buckets. Returns Int32. For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf) +## kostikConsistentHash + +An O(1) time and space consistent hash algorithm by Konstantin 'kostik' Oblakov. Previously `yandexConsistentHash`. + +**Syntax** + +```sql +kostikConsistentHash(input, n) +``` + +Alias: `yandexConsistentHash` (left for backwards compatibility sake). + +**Parameters** + +- `input`: A UInt64-type key [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- `n`: Number of buckets. [UInt16](/docs/en/sql-reference/data-types/int-uint.md). + +**Returned value** + +- A [UInt16](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. + +**Implementation details** + +It is efficient only if n <= 32768. + +**Example** + +Query: + +```sql +SELECT kostikConsistentHash(16045690984833335023, 2); +``` + +```response +┌─kostikConsistentHash(16045690984833335023, 2)─┐ +│ 1 │ +└───────────────────────────────────────────────┘ +``` + ## murmurHash2_32, murmurHash2_64 Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value. @@ -1153,6 +1192,42 @@ Result: └────────────┘ ``` +## wyHash64 + +Produces a 64-bit [wyHash64](https://github.com/wangyi-fudan/wyhash) hash value. + +**Syntax** + +```sql +wyHash64(string) +``` + +**Arguments** + +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- Hash value. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT wyHash64('ClickHouse') AS Hash; +``` + +Result: + +```response +┌─────────────────Hash─┐ +│ 12336419557878201794 │ +└──────────────────────┘ +``` + ## ngramMinHash Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index e9f8bc6e547c..187f248e92df 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -916,6 +916,34 @@ Returns the larger value of a and b. Returns the server’s uptime in seconds. If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +**Syntax** + +``` sql +uptime() +``` + +**Returned value** + +- Time value of seconds. + +Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT uptime() as Uptime; +``` + +Result: + +``` response +┌─Uptime─┐ +│ 55867 │ +└────────┘ +``` + ## version() Returns the current version of ClickHouse as a string in the form of: diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 573790f7ff7c..f4d667300111 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -254,14 +254,70 @@ Result: Converts the ASCII Latin symbols in a string to lowercase. +*Syntax** + +``` sql +lower(input) +``` + Alias: `lcase` +**Parameters** + +- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- A [String](/docs/en/sql-reference/data-types/string.md) data type value. + +**Example** + +Query: + +```sql +SELECT lower('CLICKHOUSE'); +``` + +```response +┌─lower('CLICKHOUSE')─┐ +│ clickhouse │ +└─────────────────────┘ +``` + ## upper Converts the ASCII Latin symbols in a string to uppercase. +**Syntax** + +``` sql +upper(input) +``` + Alias: `ucase` +**Parameters** + +- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- A [String](/docs/en/sql-reference/data-types/string.md) data type value. + +**Examples** + +Query: + +``` sql +SELECT upper('clickhouse'); +``` + +``` response +┌─upper('clickhouse')─┐ +│ CLICKHOUSE │ +└─────────────────────┘ +``` + ## lowerUTF8 Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. @@ -278,6 +334,34 @@ Does not detect the language, e.g. for Turkish the result might not be exactly c If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +**Syntax** + +``` sql +upperUTF8(input) +``` + +**Parameters** + +- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- A [String](/docs/en/sql-reference/data-types/string.md) data type value. + +**Example** + +Query: + +``` sql +SELECT upperUTF8('München') as Upperutf8; +``` + +``` response +┌─Upperutf8─┐ +│ MÜNCHEN │ +└───────────┘ +``` + ## isValidUTF8 Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0. diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index c7bd16cad4aa..60fe286de258 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -193,3 +193,33 @@ Result: ## translateUTF8 Like [translate](#translate) but assumes `s`, `from` and `to` are UTF-8 encoded strings. + +**Syntax** + +``` sql +translateUTF8(s, from, to) +``` + +**Parameters** + +- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `from`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `to`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT translateUTF8('Münchener Straße', 'üß', 'us') AS res; +``` + +``` response +┌─res──────────────┐ +│ Munchener Strase │ +└──────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 117e3818dc6c..f7e56e73520f 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -6,14 +6,17 @@ sidebar_label: Searching in Strings # Functions for Searching in Strings -All functions in this section search by default case-sensitively. Case-insensitive search is usually provided by separate function variants. -Note that case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in English language is -`I` whereas in Turkish language it is `İ` - results for languages other than English may be unexpected. +All functions in this section search case-sensitively by default. Case-insensitive search is usually provided by separate function variants. -Functions in this section also assume that the searched string and the search string are single-byte encoded text. If this assumption is +:::note +Case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in the English language is +`I` whereas in the Turkish language it is `İ` - results for languages other than English may be unexpected. +::: + +Functions in this section also assume that the searched string (referred to in this section as `haystack`) and the search string (referred to in this section as `needle`) are single-byte encoded text. If this assumption is violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the -results are undefined. Note that no automatic Unicode normalization is performed, you can use the +results are undefined. Note that no automatic Unicode normalization is performed, however you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. [General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately. @@ -54,6 +57,8 @@ Type: `Integer`. **Examples** +Query: + ``` sql SELECT position('Hello, world!', '!'); ``` @@ -68,6 +73,8 @@ Result: Example with `start_pos` argument: +Query: + ``` sql SELECT position('Hello, world!', 'o', 1), @@ -84,6 +91,8 @@ Result: Example for `needle IN haystack` syntax: +Query: + ```sql SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s); ``` @@ -98,6 +107,8 @@ Result: Examples with empty `needle` substring: +Query: + ``` sql SELECT position('abc', ''), @@ -109,6 +120,8 @@ SELECT position('abc', '', 5) ``` +Result: + ``` text ┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐ │ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │ @@ -132,7 +145,23 @@ locate(needle, haystack[, start_pos]) ## positionCaseInsensitive -Like [position](#position) but searches case-insensitively. +A case insensitive invariant of [position](#position). + +**Example** + +Query: + +``` sql +SELECT position('Hello, world!', 'hello'); +``` + +Result: + +``` text +┌─position('Hello, world!', 'hello')─┐ +│ 0 │ +└────────────────────────────────────┘ +``` ## positionUTF8 @@ -142,6 +171,8 @@ Like [position](#position) but assumes `haystack` and `needle` are UTF-8 encoded Function `positionUTF8` correctly counts character `ö` (represented by two points) as a single Unicode codepoint: +Query: + ``` sql SELECT positionUTF8('Motörhead', 'r'); ``` @@ -175,14 +206,17 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) **Arguments** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. Array +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned values** -- Array of the starting position in bytes and counting from 1 (if the substring was found) or 0 (if the substring was not found) +- Array of the starting position in bytes and counting from 1, if the substring was found. +- 0, if the substring was not found. **Example** +Query: + ``` sql SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']); ``` @@ -194,45 +228,535 @@ Result: │ [0,13,0] │ └───────────────────────────────────────────────────────────────────┘ ``` +## multiSearchAllPositionsCaseInsensitive + +Like [multiSearchAllPositions](#multisearchallpositions) but ignores case. + +**Syntax** + +```sql +multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- Array of the starting position in bytes and counting from 1 (if the substring was found). +- 0 if the substring was not found. + +**Example** + +Query: + +```sql +SELECT multiSearchAllPositionsCaseInsensitive('ClickHouse',['c','h']); +``` + +Result: + +```response +["1","6"] +``` ## multiSearchAllPositionsUTF8 -Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings. +Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings. + +**Syntax** + +```sql +multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- Array of the starting position in bytes and counting from 1 (if the substring was found). +- 0 if the substring was not found. + +**Example** + +Given `ClickHouse` as a UTF-8 string, find the positions of `C` (`\x43`) and `H` (`\x48`). + +Query: + +```sql +SELECT multiSearchAllPositionsUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']); +``` + +Result: + +```response +["1","6"] +``` + +## multiSearchAllPositionsCaseInsensitiveUTF8 + +Like [multiSearchAllPositionsUTF8](#multisearchallpositionsutf8) but ignores case. + +**Syntax** + +```sql +multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- Array of the starting position in bytes and counting from 1 (if the substring was found). +- 0 if the substring was not found. + +**Example** + +Given `ClickHouse` as a UTF-8 string, find the positions of `c` (`\x63`) and `h` (`\x68`). + +Query: + +```sql +SELECT multiSearchAllPositionsCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x63','\x68']); +``` + +Result: + +```response +["1","6"] +``` ## multiSearchFirstPosition -Like `position` but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. +Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. + +Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. + +**Syntax** + +```sql +multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings. +- 0, if there was no match. + +**Example** + +Query: + +```sql +SELECT multiSearchFirstPosition('Hello World',['llo', 'Wor', 'ld']); +``` + +Result: + +```response +3 +``` + +## multiSearchFirstPositionCaseInsensitive + +Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but ignores case. + +**Syntax** + +```sql +multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings. +- 0, if there was no match. + +**Example** + +Query: + +```sql +SELECT multiSearchFirstPositionCaseInsensitive('HELLO WORLD',['wor', 'ld', 'ello']); +``` + +Result: + +```response +2 +``` + +## multiSearchFirstPositionUTF8 + +Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings. + +**Syntax** + +```sql +multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings. +- 0, if there was no match. + +**Example** + +Find the leftmost offset in UTF-8 string `hello world` which matches any of the given needles. + +Query: + +```sql +SELECT multiSearchFirstPositionUTF8('\x68\x65\x6c\x6c\x6f\x20\x77\x6f\x72\x6c\x64',['wor', 'ld', 'ello']); +``` + +Result: + +```response +2 +``` + +## multiSearchFirstPositionCaseInsensitiveUTF8 -Functions `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` and `multiSearchFirstPositionCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. +Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case. **Syntax** ```sql -multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) +multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings, ignoring case. +- 0, if there was no match. + +**Example** + +Find the leftmost offset in UTF-8 string `HELLO WORLD` which matches any of the given needles. + +Query: + +```sql +SELECT multiSearchFirstPositionCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['wor', 'ld', 'ello']); +``` + +Result: + +```response +2 ``` ## multiSearchFirstIndex Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. -Functions `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` and `multiSearchFirstIndexCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. +Functions [`multiSearchFirstIndexCaseInsensitive`](#multiSearchFirstIndexCaseInsensitive), [`multiSearchFirstIndexUTF8`](#multiSearchFirstIndexUTF8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multiSearchFirstIndexCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. + +**Syntax** + +```sql +multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN]) +``` +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- index (starting from 1) of the leftmost found needle. +- 0, if there was no match. + +**Example** + +Query: + +```sql +SELECT multiSearchFirstIndex('Hello World',['World','Hello']); +``` + +Result: + +```response +1 +``` + +## multiSearchFirstIndexCaseInsensitive + +Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. Ignores case. + +**Syntax** + +```sql +multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- index (starting from 1) of the leftmost found needle. +- 0, if there was no match. + +**Example** + +Query: + +```sql +SELECT multiSearchFirstIndexCaseInsensitive('hElLo WoRlD',['World','Hello']); +``` + +Result: + +```response +1 +``` + +## multiSearchFirstIndexUTF8 + +Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings. + +**Syntax** + +```sql +multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- index (starting from 1) of the leftmost found needle. +- 0, if there was no match. + +**Example** + +Given `Hello World` as a UTF-8 string, find the first index of UTF-8 strings `Hello` and `World`. + +Query: + +```sql +SELECT multiSearchFirstIndexUTF8('\x48\x65\x6c\x6c\x6f\x20\x57\x6f\x72\x6c\x64',['\x57\x6f\x72\x6c\x64','\x48\x65\x6c\x6c\x6f']); +``` + +Result: + +```response +1 +``` + +## multiSearchFirstIndexCaseInsensitiveUTF8 + +Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings. Ignores case. **Syntax** ```sql -multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- index (starting from 1) of the leftmost found needle. +- 0, if there was no match. + +**Example** + +Given `HELLO WORLD` as a UTF-8 string, find the first index of UTF-8 strings `hello` and `world`. + +Query: + +```sql +SELECT multiSearchFirstIndexCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['\x68\x65\x6c\x6c\x6f','\x77\x6f\x72\x6c\x64']); +``` + +Result: + +```response +1 ``` -## multiSearchAny {#multisearchany} +## multiSearchAny Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. -Functions `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` and `multiSearchAnyCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. +Functions [`multiSearchAnyCaseInsensitive`](#multiSearchAnyCaseInsensitive), [`multiSearchAnyUTF8`](#multiSearchAnyUTF8) and []`multiSearchAnyCaseInsensitiveUTF8`](#multiSearchAnyCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. + +**Syntax** + +```sql +multiSearchAny(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- 1, if there was at least one match. +- 0, if there was not at least one match. + +**Example** + +Query: + +```sql +SELECT multiSearchAny('ClickHouse',['C','H']); +``` + +Result: + +```response +1 +``` + +## multiSearchAnyCaseInsensitive + +Like [multiSearchAny](#multisearchany) but ignores case. **Syntax** ```sql -multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) +multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- 1, if there was at least one case-insensitive match. +- 0, if there was not at least one case-insensitive match. + +**Example** + +Query: + +```sql +SELECT multiSearchAnyCaseInsensitive('ClickHouse',['c','h']); +``` + +Result: + +```response +1 +``` + +## multiSearchAnyUTF8 + +Like [multiSearchAny](#multisearchany) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings. + +*Syntax** + +```sql +multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- 1, if there was at least one match. +- 0, if there was not at least one match. + +**Example** + +Given `ClickHouse` as a UTF-8 string, check if there are any `C` ('\x43') or `H` ('\x48') letters in the word. + +Query: + +```sql +SELECT multiSearchAnyUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']); +``` + +Result: + +```response +1 +``` + +## multiSearchAnyCaseInsensitiveUTF8 + +Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case. + +*Syntax** + +```sql +multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- 1, if there was at least one case-insensitive match. +- 0, if there was not at least one case-insensitive match. + +**Example** + +Given `ClickHouse` as a UTF-8 string, check if there is any letter `h`(`\x68`) in the word, ignoring case. + +Query: + +```sql +SELECT multiSearchAnyCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x68']); +``` + +Result: + +```response +1 ``` ## match {#match} diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index b089de67e98f..8b695f1450d8 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -521,67 +521,300 @@ Result: └──────────────────────────────────┘ ``` -## dotProduct +## tupleConcat + +Combines tuples passed as arguments. + +``` sql +tupleConcat(tuples) +``` + +**Arguments** + +- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type. + +**Example** + +``` sql +SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res +``` + +``` text +┌─res──────────────────┐ +│ (1,2,3,4,true,false) │ +└──────────────────────┘ +``` + +## tupleIntDiv -Calculates the scalar product of two tuples of the same size. +Does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients. **Syntax** ```sql -dotProduct(tuple1, tuple2) +tupleIntDiv(tuple_num, tuple_div) ``` -Alias: `scalarProduct`. +**Parameters** -**Arguments** +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type. -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +**Returned value** + +- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values. + +**Implementation details** + +- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor. +- An error will be thrown for division by 0. + +**Examples** + +Query: + +``` sql +SELECT tupleIntDiv((15, 10, 5), (5, 5, 5)); +``` + +Result: + +``` text +┌─tupleIntDiv((15, 10, 5), (5, 5, 5))─┐ +│ (3,2,1) │ +└─────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5)); +``` + +Result: + +``` text +┌─tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5))─┐ +│ (2,1,0) │ +└───────────────────────────────────────────┘ +``` + +## tupleIntDivOrZero + +Like [tupleIntDiv](#tupleintdiv) it does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0. + +**Syntax** + +```sql +tupleIntDivOrZero(tuple_num, tuple_div) +``` + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type. **Returned value** -- Scalar product. +- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values. +- Returns 0 for quotients where the divisor is 0. -Type: [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +**Implementation details** -**Example** +- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDiv](#tupleintdiv). + +**Examples** Query: +``` sql +SELECT tupleIntDivOrZero((5, 10, 15), (0, 0, 0)); +``` + +Result: + +``` text +┌─tupleIntDivOrZero((5, 10, 15), (0, 0, 0))─┐ +│ (0,0,0) │ +└───────────────────────────────────────────┘ +``` + +## tupleIntDivByNumber + +Does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients. + +**Syntax** + ```sql -SELECT dotProduct((1, 2), (2, 3)); +tupleIntDivByNumber(tuple_num, div) +``` + +**Parameters** + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type. + +**Returned value** + +- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values. + +**Implementation details** + +- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor. +- An error will be thrown for division by 0. + +**Examples** + +Query: + +``` sql +SELECT tupleIntDivByNumber((15, 10, 5), 5); ``` Result: -```text -┌─dotProduct((1, 2), (2, 3))─┐ -│ 8 │ -└────────────────────────────┘ +``` text +┌─tupleIntDivByNumber((15, 10, 5), 5)─┐ +│ (3,2,1) │ +└─────────────────────────────────────┘ ``` -## tupleConcat +Query: -Combines tuples passed as arguments. +``` sql +SELECT tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8); +``` + +Result: + +``` text +┌─tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8)─┐ +│ (2,1,0) │ +└─────────────────────────────────────────────┘ +``` + +## tupleIntDivOrZeroByNumber + +Like [tupleIntDivByNumber](#tupleintdivbynumber) it does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0. + +**Syntax** + +```sql +tupleIntDivOrZeroByNumber(tuple_num, div) +``` + +**Parameters** + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type. + +**Returned value** + +- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values. +- Returns 0 for quotients where the divisor is 0. + +**Implementation details** + +- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDivByNumber](#tupleintdivbynumber). + +**Examples** + +Query: ``` sql -tupleConcat(tuples) +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 5); ``` -**Arguments** +Result: -- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type. +``` text +┌─tupleIntDivOrZeroByNumber((15, 10, 5), 5)─┐ +│ (3,2,1) │ +└───────────────────────────────────────────┘ +``` -**Example** +Query: ``` sql -SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 0) ``` +Result: + ``` text -┌─res──────────────────┐ -│ (1,2,3,4,true,false) │ -└──────────────────────┘ +┌─tupleIntDivOrZeroByNumber((15, 10, 5), 0)─┐ +│ (0,0,0) │ +└───────────────────────────────────────────┘ +``` + +## tupleModulo + +Returns a tuple of the moduli (remainders) of division operations of two tuples. + +**Syntax** + +```sql +tupleModulo(tuple_num, tuple_mod) +``` + +**Parameters** + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `tuple_div`: Tuple of modulus values. [Tuple](../data-types/tuple) of numeric type. + +**Returned value** + +- Tuple of the remainders of division of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of non-zero integer values. +- An error is thrown for division by zero. + +**Examples** + +Query: + +``` sql +SELECT tupleModulo((15, 10, 5), (5, 3, 2)); +``` + +Result: + +``` text +┌─tupleModulo((15, 10, 5), (5, 3, 2))─┐ +│ (0,1,1) │ +└─────────────────────────────────────┘ +``` + +## tupleModuloByNumber + +Returns a tuple of the moduli (remainders) of division operations of a tuple and a given divisor. + +**Syntax** + +```sql +tupleModuloByNumber(tuple_num, div) +``` + +**Parameters** + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type. + +**Returned value** + +- Tuple of the remainders of division of `tuple_num` and `div`. [Tuple](../data-types/tuple) of non-zero integer values. +- An error is thrown for division by zero. + +**Examples** + +Query: + +``` sql +SELECT tupleModuloByNumber((15, 10, 5), 2); +``` + +Result: + +``` text +┌─tupleModuloByNumber((15, 10, 5), 2)─┐ +│ (1,0,1) │ +└─────────────────────────────────────┘ ``` ## Distance functions diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index 59045afdeb60..e063b27424e7 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -8,7 +8,7 @@ sidebar_label: VIEW You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. -This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. +This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underlying storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. **Example with TO table** diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 159ab09ab946..98b849ecf3b8 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -20,11 +20,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC] ## DROP TABLE -Deletes the table. -In case when `IF EMPTY` clause is specified server will check if table is empty only on replica that received initial query. +Deletes one or more tables. :::tip -Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) +To undo the deletion of a table, please see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) ::: Syntax: @@ -33,7 +32,9 @@ Syntax: DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db1.]name_1[, [db2.]name_2, ...] [ON CLUSTER cluster] [SYNC] ``` -Note that deleting multiple tables at the same time is a non-atomic deletion. If a table fails to be deleted, subsequent tables will not be deleted. +Limitations: +- If the clause `IF EMPTY` is specified, the server checks the emptiness of the table only on the replica which received the query. +- Deleting multiple tables at once is not an atomic operation, i.e. if the deletion of a table fails, subsequent tables will not be deleted. ## DROP DICTIONARY diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 9b2ded7b6cee..32ebc6d028f7 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -12,25 +12,23 @@ Some of the calculations that you can do are similar to those that can be done w ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. -| Feature | Support or workaround | +| Feature | Supported? | |------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported | -| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | supported | -| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported | -| `ROWS` frame | supported | -| `RANGE` frame | supported, the default | -| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead (`RANGE` works with any numeric type). | -| `GROUPS` frame | not supported | -| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported | -| `rank()`, `dense_rank()`, `row_number()` | supported | -| `lag/lead(value, offset)` | Not supported. Workarounds: | -| | 1) replace with `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead` | -| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | -| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | +| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ | +| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ | +| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ | +| `ROWS` frame | ✅ | +| `RANGE` frame | ✅ (the default) | +| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) | +| `GROUPS` frame | ❌ | +| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | +| `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | +| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | ## ClickHouse-specific Window Functions -There are also the following window function that's specific to ClickHouse: +There is also the following ClickHouse specific window function: ### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS]) @@ -89,6 +87,102 @@ These functions can be used only as a window function. Let's have a look at some examples of how window functions can be used. +### Numbering rows + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'); +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary) AS row +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─row─┐ +│ Michael Stanley │ 150000 │ 1 │ +│ Scott Harrison │ 150000 │ 2 │ +│ Charles Juarez │ 190000 │ 3 │ +│ Gary Chen │ 195000 │ 4 │ +│ Robert George │ 195000 │ 5 │ +└─────────────────┴────────┴─────┘ +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary) AS row, + rank() OVER (ORDER BY salary) AS rank, + dense_rank() OVER (ORDER BY salary) AS denseRank +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─row─┬─rank─┬─denseRank─┐ +│ Michael Stanley │ 150000 │ 1 │ 1 │ 1 │ +│ Scott Harrison │ 150000 │ 2 │ 1 │ 1 │ +│ Charles Juarez │ 190000 │ 3 │ 3 │ 2 │ +│ Gary Chen │ 195000 │ 4 │ 4 │ 3 │ +│ Robert George │ 195000 │ 5 │ 4 │ 3 │ +└─────────────────┴────────┴─────┴──────┴───────────┘ +``` + +### Aggregation functions + +Compare each player's salary to the average for their team. + +```sql +SELECT player, salary, team, + avg(salary) OVER (PARTITION BY team) AS teamAvg, + salary - teamAvg AS diff +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐ +│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 170000 │ 20000 │ +│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 170000 │ -20000 │ +│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │ +│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 180000 │ -30000 │ +│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │ +└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘ +``` + +Compare each player's salary to the maximum for their team. + +```sql +SELECT player, salary, team, + max(salary) OVER (PARTITION BY team) AS teamAvg, + salary - teamAvg AS diff +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐ +│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 190000 │ 0 │ +│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 190000 │ -40000 │ +│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │ +│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 195000 │ -45000 │ +│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │ +└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘ +``` + +### Partitioning by column + ```sql CREATE TABLE wf_partition ( @@ -120,6 +214,8 @@ ORDER BY └──────────┴───────┴───────┴──────────────┘ ``` +### Frame bounding + ```sql CREATE TABLE wf_frame ( @@ -131,14 +227,19 @@ ENGINE = Memory; INSERT INTO wf_frame FORMAT Values (1,1,1), (1,2,2), (1,3,3), (1,4,4), (1,5,5); +``` --- frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +```sql +-- Frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -151,7 +252,9 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4,5] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- short form - no bound expression, no order by SELECT part_key, @@ -169,14 +272,19 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4,5] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` --- frame is bounded by the beggining of a partition and the current row +```sql +-- frame is bounded by the beginning of a partition and the current row SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -189,8 +297,10 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` --- short form (frame is bounded by the beggining of a partition and the current row) +```sql +-- short form (frame is bounded by the beginning of a partition and the current row) SELECT part_key, value, @@ -207,8 +317,10 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` --- frame is bounded by the beggining of a partition and the current row, but order is backward +```sql +-- frame is bounded by the beginning of a partition and the current row, but order is backward SELECT part_key, value, @@ -225,14 +337,19 @@ ORDER BY │ 1 │ 4 │ 4 │ [5,4] │ │ 1 │ 5 │ 5 │ [5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- sliding frame - 1 PRECEDING ROW AND CURRENT ROW SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN 1 PRECEDING AND CURRENT ROW) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN 1 PRECEDING AND CURRENT ROW + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -245,14 +362,19 @@ ORDER BY │ 1 │ 4 │ 4 │ [3,4] │ │ 1 │ 5 │ 5 │ [4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- sliding frame - Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -264,7 +386,9 @@ ORDER BY │ 1 │ 4 │ 4 │ [3,4,5] │ │ 1 │ 5 │ 5 │ [4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- row_number does not respect the frame, so rn_1 = rn_2 = rn_3 != rn_4 SELECT part_key, @@ -278,8 +402,11 @@ SELECT FROM wf_frame WINDOW w1 AS (PARTITION BY part_key ORDER BY order DESC), - w2 AS (PARTITION BY part_key ORDER BY order DESC - Rows BETWEEN 1 PRECEDING AND CURRENT ROW) + w2 AS ( + PARTITION BY part_key + ORDER BY order DESC + Rows BETWEEN 1 PRECEDING AND CURRENT ROW + ) ORDER BY part_key ASC, value ASC; @@ -290,7 +417,9 @@ ORDER BY │ 1 │ 4 │ 4 │ [5,4] │ 2 │ 2 │ 2 │ 2 │ │ 1 │ 5 │ 5 │ [5] │ 1 │ 1 │ 1 │ 1 │ └──────────┴───────┴───────┴──────────────┴──────┴──────┴──────┴──────┘ +``` +```sql -- first_value and last_value respect the frame SELECT groupArray(value) OVER w1 AS frame_values_1, @@ -313,7 +442,9 @@ ORDER BY │ [1,2,3,4] │ 1 │ 4 │ [3,4] │ 3 │ 4 │ │ [1,2,3,4,5] │ 1 │ 5 │ [4,5] │ 4 │ 5 │ └────────────────┴───────────────┴──────────────┴────────────────┴───────────────┴──────────────┘ +``` +```sql -- second value within the frame SELECT groupArray(value) OVER w1 AS frame_values_1, @@ -330,7 +461,9 @@ ORDER BY │ [1,2,3,4] │ 2 │ │ [2,3,4,5] │ 3 │ └────────────────┴──────────────┘ +``` +```sql -- second value within the frame + Null for missing values SELECT groupArray(value) OVER w1 AS frame_values_1, @@ -351,7 +484,9 @@ ORDER BY ## Real world examples -### Maximum/total salary per department. +The following examples solve common real-world problems. + +### Maximum/total salary per department ```sql CREATE TABLE employees @@ -369,7 +504,9 @@ INSERT INTO employees FORMAT Values ('IT', 'Tim', 200), ('IT', 'Anna', 300), ('IT', 'Elen', 500); +``` +```sql SELECT department, employee_name AS emp, @@ -386,8 +523,10 @@ FROM max(salary) OVER wndw AS max_salary_per_dep, sum(salary) OVER wndw AS total_salary_per_dep FROM employees - WINDOW wndw AS (PARTITION BY department - rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) + WINDOW wndw AS ( + PARTITION BY department + rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) ORDER BY department ASC, employee_name ASC @@ -403,7 +542,7 @@ FROM └────────────┴──────┴────────┴────────────────────┴──────────────────────┴──────────────────┘ ``` -### Cumulative sum. +### Cumulative sum ```sql CREATE TABLE warehouse @@ -421,7 +560,9 @@ INSERT INTO warehouse VALUES ('sku1', '2020-01-01', 1), ('sku1', '2020-02-01', 1), ('sku1', '2020-03-01', 1); +``` +```sql SELECT item, ts, @@ -461,13 +602,18 @@ insert into sensors values('cpu_temp', '2020-01-01 00:00:00', 87), ('cpu_temp', '2020-01-01 00:00:05', 87), ('cpu_temp', '2020-01-01 00:00:06', 87), ('cpu_temp', '2020-01-01 00:00:07', 87); +``` + +```sql SELECT metric, ts, value, - avg(value) OVER - (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN 2 PRECEDING AND CURRENT ROW) - AS moving_avg_temp + avg(value) OVER ( + PARTITION BY metric + ORDER BY ts ASC + Rows BETWEEN 2 PRECEDING AND CURRENT ROW + ) AS moving_avg_temp FROM sensors ORDER BY metric ASC, @@ -536,7 +682,9 @@ insert into sensors values('ambient_temp', '2020-01-01 00:00:00', 16), ('ambient_temp', '2020-03-01 12:00:00', 16), ('ambient_temp', '2020-03-01 12:00:00', 16), ('ambient_temp', '2020-03-01 12:00:00', 16); +``` +```sql SELECT metric, ts, diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index be8cfbdda6c7..5f11f1b430bd 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -434,16 +434,18 @@ $ curl -v 'http://localhost:8123/predefined_query' ``` xml - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> GET TEST_HEADER_VALUE - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> predefined_query_handler - SELECT value FROM system.settings WHERE name = {name_1:String} - SELECT name, value FROM system.settings WHERE name = {name_2:String} + + SELECT name, value FROM system.settings + WHERE name IN ({name_1:String}, {name_2:String}) + @@ -451,13 +453,13 @@ $ curl -v 'http://localhost:8123/predefined_query' ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' -1 -max_final_threads 2 +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2' +max_final_threads 2 +max_threads 1 ``` :::note Предупреждение -В одном `predefined_query_handler` поддерживается только один запрос типа `INSERT`. +В одном `predefined_query_handler` поддерживается только один запрос. ::: ### dynamic_query_handler {#dynamic_query_handler} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index a56afda641b4..f9456e34a56f 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2776,7 +2776,7 @@ SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes; - 0 — номера строк не выводятся. - 1 — номера строк выводятся. -Значение по умолчанию: `0`. +Значение по умолчанию: `1`. **Пример** @@ -2798,7 +2798,7 @@ SELECT TOP 3 name, value FROM system.settings; ``` ### output_format_pretty_color {#output_format_pretty_color} -Включает/выключает управляющие последовательности ANSI в форматах Pretty. +Включает/выключает управляющие последовательности ANSI в форматах Pretty. Возможные значения: @@ -4123,7 +4123,7 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca ## session_timezone {#session_timezone} Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан часовой пояс, будут интерпретированы как относящиеся к указанной зоне. -При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. +При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. Функции `timeZone()` and `serverTimezone()` возвращают часовой пояс текущей сессии и сервера соответственно. diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 57f24786bb70..34cd44d47095 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -120,7 +120,7 @@ FROM dt - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) - [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) -- [Настройка `date_time_output_format`](../../operations/settings/index.md) +- [Настройка `date_time_output_format`](../../operations/settings/index.md#settings-date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) diff --git a/docs/ru/sql-reference/data-types/multiword-types.md b/docs/ru/sql-reference/data-types/multiword-types.md deleted file mode 100644 index cca2d71e480b..000000000000 --- a/docs/ru/sql-reference/data-types/multiword-types.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -slug: /ru/sql-reference/data-types/multiword-types -sidebar_position: 61 -sidebar_label: Составные типы ---- - -# Составные типы {#multiword-types} - -При создании таблиц вы можете использовать типы данных с названием, состоящим из нескольких слов. Такие названия поддерживаются для лучшей совместимости с SQL. - -## Поддержка составных типов {#multiword-types-support} - -| Составные типы | Обычные типы | -|-------------------------------------|-----------------------------------------------------------| -| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) | -| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) | -| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| BINARY VARYING | [String](../../sql-reference/data-types/string.md) | diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index 84ca5ed0c47e..f55cf41936f4 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -427,29 +427,32 @@ $ curl -v 'http://localhost:8123/predefined_query' ``` xml - [^/]+)(/(?P[^/]+))?]]> - GET + [^/]+)]]> + GET TEST_HEADER_VALUE - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> predefined_query_handler - SELECT value FROM system.settings WHERE name = {name_1:String} - SELECT name, value FROM system.settings WHERE name = {name_2:String} + + SELECT name, value FROM system.settings + WHERE name IN ({name_1:String}, {name_2:String}) + + ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' -1 -max_final_threads 2 +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2' +max_final_threads 2 +max_threads 1 ``` :::warning -在一个`predefined_query_handler`中,只支持insert类型的一个`查询`。 +在一个`predefined_query_handler`中,只支持的一个`查询`。 ::: ### 动态查询 {#dynamic_query_handler} diff --git a/docs/zh/sql-reference/data-types/multiword-types.mdx b/docs/zh/sql-reference/data-types/multiword-types.mdx deleted file mode 100644 index 85431d47efd6..000000000000 --- a/docs/zh/sql-reference/data-types/multiword-types.mdx +++ /dev/null @@ -1,10 +0,0 @@ ---- -slug: /zh/sql-reference/data-types/multiword-types -sidebar_position: 61 -sidebar_label: Multiword Type Names -title: "Multiword Types" ---- - -import Content from '@site/docs/en/sql-reference/data-types/multiword-types.md'; - - diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml index 96de4c17d88f..74b7fa8381bc 100644 --- a/packages/clickhouse-common-static-dbg.yaml +++ b/packages/clickhouse-common-static-dbg.yaml @@ -30,10 +30,6 @@ conflicts: contents: - src: root/usr/lib/debug/usr/bin/clickhouse.debug dst: /usr/lib/debug/usr/bin/clickhouse.debug -- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug - dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug -- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug - dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS diff --git a/packages/clickhouse-common-static.yaml b/packages/clickhouse-common-static.yaml index 383ad39591cd..db330f808e15 100644 --- a/packages/clickhouse-common-static.yaml +++ b/packages/clickhouse-common-static.yaml @@ -36,10 +36,6 @@ contents: dst: /usr/bin/clickhouse - src: root/usr/bin/clickhouse-extract-from-config dst: /usr/bin/clickhouse-extract-from-config -- src: root/usr/bin/clickhouse-library-bridge - dst: /usr/bin/clickhouse-library-bridge -- src: root/usr/bin/clickhouse-odbc-bridge - dst: /usr/bin/clickhouse-odbc-bridge - src: root/usr/share/bash-completion/completions dst: /usr/share/bash-completion/completions - src: root/usr/share/clickhouse diff --git a/packages/clickhouse-library-bridge.yaml b/packages/clickhouse-library-bridge.yaml new file mode 100644 index 000000000000..d041e7a26dbc --- /dev/null +++ b/packages/clickhouse-library-bridge.yaml @@ -0,0 +1,35 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-library-bridge" +description: | + ClickHouse Library Bridge - is a separate process for loading libraries for the 'library' dictionary sources and the CatBoost library. + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + +# Common packages config +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" +maintainer: "ClickHouse Dev Team " +deb: + fields: + Source: clickhouse + +# Package specific content +contents: +- src: root/usr/bin/clickhouse-library-bridge + dst: /usr/bin/clickhouse-library-bridge +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-library-bridge/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-library-bridge/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-library-bridge/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-library-bridge/README.md diff --git a/packages/clickhouse-odbc-bridge.yaml b/packages/clickhouse-odbc-bridge.yaml new file mode 100644 index 000000000000..98c459c8c26b --- /dev/null +++ b/packages/clickhouse-odbc-bridge.yaml @@ -0,0 +1,35 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-odbc-bridge" +description: | + ClickHouse ODBC Bridge - is a separate process for loading ODBC drivers and interacting with external databases using the ODBC protocol. + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + +# Common packages config +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" +maintainer: "ClickHouse Dev Team " +deb: + fields: + Source: clickhouse + +# Package specific content +contents: +- src: root/usr/bin/clickhouse-odbc-bridge + dst: /usr/bin/clickhouse-odbc-bridge +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-odbc-bridge/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-odbc-bridge/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-odbc-bridge/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-odbc-bridge/README.md diff --git a/programs/bash-completion/completions/clickhouse b/programs/bash-completion/completions/clickhouse index ff0a60c60be8..3c895a660754 100644 --- a/programs/bash-completion/completions/clickhouse +++ b/programs/bash-completion/completions/clickhouse @@ -3,7 +3,7 @@ function _clickhouse_get_utils() { local cmd=$1 && shift - "$cmd" --help |& awk '/^clickhouse.*args/ { print $2 }' + "$cmd" help |& awk '/^clickhouse.*args/ { print $2 }' } function _complete_for_clickhouse_entrypoint_bin() diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 192f9e61891d..090dda383b33 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -687,7 +687,11 @@ bool Client::processWithFuzzing(const String & full_query) try { const char * begin = full_query.data(); - orig_ast = parseQuery(begin, begin + full_query.size(), true); + orig_ast = parseQuery(begin, begin + full_query.size(), + global_context->getSettingsRef(), + /*allow_multi_statements=*/ true, + /*is_interactive=*/ is_interactive, + /*ignore_error=*/ ignore_error); } catch (const Exception & e) { @@ -934,8 +938,8 @@ void Client::addOptions(OptionsDescription & options_description) ("user,u", po::value()->default_value("default"), "user") ("password", po::value(), "password") ("ask-password", "ask-password") - ("ssh-key-file", po::value(), "File containing ssh private key needed for authentication. If not set does password authentication.") - ("ssh-key-passphrase", po::value(), "Passphrase for imported ssh key.") + ("ssh-key-file", po::value(), "File containing the SSH private key for authenticate with the server.") + ("ssh-key-passphrase", po::value(), "Passphrase for the SSH private key specified by --ssh-key-file.") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") ("max_client_network_bandwidth", po::value(), "the maximum speed of data exchange over the network for the client in bytes per second.") @@ -950,6 +954,7 @@ void Client::addOptions(OptionsDescription & options_description) ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") ("no-warnings", "disable warnings when client connects to server") + /// TODO: Left for compatibility as it's used in upgrade check, remove after next release and use server setting ignore_drop_queries_probability ("fake-drop", "Ignore all DROP queries, should be used only for testing") ("accept-invalid-certificate", "Ignore certificate verification errors, equal to config parameters openSSL.client.invalidCertificateHandler.name=AcceptCertificateHandler and openSSL.client.verificationMode=none") ; @@ -1093,7 +1098,7 @@ void Client::processOptions(const OptionsDescription & options_description, if (options.count("no-warnings")) config().setBool("no-warnings", true); if (options.count("fake-drop")) - fake_drop = true; + config().setString("ignore_drop_queries_probability", "1"); if (options.count("accept-invalid-certificate")) { config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler"); diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index b7c3c7f5c97a..6c7687992216 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -166,7 +166,7 @@ int DisksApp::main(const std::vector & /*args*/) { String config_path = config().getString("config-file", getDefaultConfigFileName()); ConfigProcessor config_processor(config_path, false, false); - config_processor.setConfigPath(fs::path(config_path).parent_path()); + ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); config().add(loaded_config.configuration.duplicate(), false, false); } diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index fc73eda6815a..d4b975ce1e88 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -237,7 +237,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ASTPtr res = parseQueryAndMovePosition( parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks); - std::unique_ptr insert_query_payload = nullptr; + std::unique_ptr insert_query_payload; /// If the query is INSERT ... VALUES, then we will try to parse the data. if (auto * insert_query = res->as(); insert_query && insert_query->data) { diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 0ff0faff6248..f2ef3857d63c 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -46,12 +46,12 @@ INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml"); * * The following steps are performed: * - * - copying the binary to binary directory (/usr/bin). + * - copying the binary to binary directory (/usr/bin/) * - creation of symlinks for tools. * - creation of clickhouse user and group. - * - creation of config directory (/etc/clickhouse-server). + * - creation of config directory (/etc/clickhouse-server/). * - creation of default configuration files. - * - creation of a directory for logs (/var/log/clickhouse-server). + * - creation of a directory for logs (/var/log/clickhouse-server/). * - creation of a data directory if not exists. * - setting a password for default user. * - choose an option to listen connections. @@ -226,7 +226,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") +#if defined (OS_DARWIN) + /// https://stackoverflow.com/a/36734569/22422288 + ("binary-path", po::value()->default_value("usr/local/bin"), "where to install binaries") +#else ("binary-path", po::value()->default_value("usr/bin"), "where to install binaries") +#endif ("config-path", po::value()->default_value("etc/clickhouse-server"), "where to install configs") ("log-path", po::value()->default_value("var/log/clickhouse-server"), "where to create log directory") ("data-path", po::value()->default_value("var/lib/clickhouse"), "directory for data") @@ -662,7 +667,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv) " \n" " " << (config_dir / "server.crt").string() << "\n" " " << (config_dir / "server.key").string() << "\n" - " " << (config_dir / "dhparam.pem").string() << "\n" " \n" " \n" "\n"; @@ -1217,7 +1221,12 @@ int mainEntryClickHouseStart(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") +#if defined (OS_DARWIN) + /// https://stackoverflow.com/a/36734569/22422288 + ("binary-path", po::value()->default_value("usr/local/bin"), "directory with binary") +#else ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") +#endif ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") @@ -1333,7 +1342,12 @@ int mainEntryClickHouseRestart(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") +#if defined (OS_DARWIN) + /// https://stackoverflow.com/a/36734569/22422288 + ("binary-path", po::value()->default_value("usr/local/bin"), "directory with binary") +#else ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") +#endif ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index 8297fab5ed98..52d825f30e6b 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -368,7 +368,7 @@ int KeeperClient::main(const std::vector & /* args */) DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml")); /// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present. - config_processor.registerEmbeddedConfig("config.xml", ""); + ConfigProcessor::registerEmbeddedConfig("config.xml", ""); auto clickhouse_config = config_processor.loadConfig(); Poco::Util::AbstractConfiguration::Keys keys; diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 98d8848502d2..2fca10ce4d77 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -24,9 +24,4 @@ target_link_libraries(clickhouse-library-bridge PRIVATE set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) -if (SPLIT_DEBUG_SYMBOLS) - clickhouse_split_debug_symbols(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-library-bridge) -else() - clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR}) - install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) -endif() +install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/library-bridge/ExternalDictionaryLibraryUtils.h b/programs/library-bridge/ExternalDictionaryLibraryUtils.h index c9d03d27f75b..e6bf8f2a4c3b 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryUtils.h +++ b/programs/library-bridge/ExternalDictionaryLibraryUtils.h @@ -35,7 +35,7 @@ class CStringsHolder ExternalDictionaryLibraryAPI::CStrings strings; // will pass pointer to lib private: - std::unique_ptr ptr_holder = nullptr; + std::unique_ptr ptr_holder; Container strings_holder; }; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 05c9830ee2c2..a0aa6d085d8d 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -122,7 +122,7 @@ void LocalServer::initialize(Poco::Util::Application & self) { const auto config_path = config().getString("config-file", "config.xml"); ConfigProcessor config_processor(config_path, false, true); - config_processor.setConfigPath(fs::path(config_path).parent_path()); + ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); } @@ -413,8 +413,20 @@ void LocalServer::setupUsers() void LocalServer::connect() { connection_parameters = ConnectionParameters(config(), "localhost"); + + ReadBuffer * in; + auto table_file = config().getString("table-file", "-"); + if (table_file == "-" || table_file == "stdin") + { + in = &std_in; + } + else + { + input = std::make_unique(table_file); + in = input.get(); + } connection = LocalConnection::createConnection( - connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name); + connection_parameters, global_context, in, need_render_progress, need_render_profile_events, server_display_name); } diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index ca0ce513b093..9b67aab02d4a 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -65,6 +65,8 @@ class LocalServer : public ClientBase, public Loggers std::optional status; std::optional temporary_directory_to_delete; + + std::unique_ptr input; }; } diff --git a/programs/main.cpp b/programs/main.cpp index 7162a18d7641..9ad8b016c824 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -487,7 +487,7 @@ int main(int argc_, char ** argv_) /// Interpret binary without argument or with arguments starts with dash /// ('-') as clickhouse-local for better usability: /// - /// clickhouse # dumps help + /// clickhouse help # dumps help /// clickhouse -q 'select 1' # use local /// clickhouse # spawn local /// clickhouse local # spawn local diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index d6cbe8f7215a..83839cc21acd 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -30,12 +30,7 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) target_compile_options (clickhouse-odbc-bridge PRIVATE -Wno-reserved-id-macro -Wno-keyword-macro) -if (SPLIT_DEBUG_SYMBOLS) - clickhouse_split_debug_symbols(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-odbc-bridge) -else() - clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR}) - install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) -endif() +install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) if(ENABLE_TESTS) add_subdirectory(tests) diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 4cb15de3b2cb..5ff985b3d121 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index ca7044fdf328..610fb128c9de 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -5,7 +5,6 @@ #if USE_ODBC #include -#include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 450e1696c115..a048bebc45b9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -734,13 +734,17 @@ try LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info); #endif + bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log"); + // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. GlobalThreadPool::initialize( server_settings.max_thread_pool_size, server_settings.max_thread_pool_free_size, - server_settings.thread_pool_queue_size); + server_settings.thread_pool_queue_size, + will_have_trace_collector ? server_settings.global_profiler_real_time_period_ns : 0, + will_have_trace_collector ? server_settings.global_profiler_cpu_time_period_ns : 0); /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed). SCOPE_EXIT({ Stopwatch watch; diff --git a/programs/server/config.xml b/programs/server/config.xml index ea3ead47c32f..e92381eeb1e3 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -96,7 +96,7 @@ https://{bucket}.s3.amazonaws.com - https://{bucket}.storage.googleapis.com + https://storage.googleapis.com/{bucket} https://{bucket}.oss.aliyuncs.com diff --git a/src/Access/AccessBackup.cpp b/src/Access/AccessBackup.cpp index ba89899dd8f1..1110b9c4b213 100644 --- a/src/Access/AccessBackup.cpp +++ b/src/Access/AccessBackup.cpp @@ -16,6 +16,8 @@ #include #include #include + +#include #include namespace fs = std::filesystem; diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 47187d831548..bf1fe3feec3e 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -4,11 +4,12 @@ #include #include #include -#include #include +#include +#include #include -#include +#include "config.h" namespace DB { @@ -74,7 +75,7 @@ namespace } #if USE_SSH - bool checkSshSignature(const std::vector & keys, std::string_view signature, std::string_view original) + bool checkSshSignature(const std::vector & keys, std::string_view signature, std::string_view original) { for (const auto & key: keys) if (key.isPublic() && key.verifySignature(signature, original)) @@ -114,7 +115,11 @@ bool Authentication::areCredentialsValid( throw Authentication::Require("ClickHouse X.509 Authentication"); case AuthenticationType::SSH_KEY: - throw Authentication::Require("Ssh Keys Authentication"); +#if USE_SSH + throw Authentication::Require("SSH Keys Authentication"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); +#endif case AuthenticationType::MAX: break; @@ -145,7 +150,11 @@ bool Authentication::areCredentialsValid( throw Authentication::Require("ClickHouse X.509 Authentication"); case AuthenticationType::SSH_KEY: - throw Authentication::Require("Ssh Keys Authentication"); +#if USE_SSH + throw Authentication::Require("SSH Keys Authentication"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); +#endif case AuthenticationType::MAX: break; @@ -178,7 +187,11 @@ bool Authentication::areCredentialsValid( throw Authentication::Require("ClickHouse X.509 Authentication"); case AuthenticationType::SSH_KEY: - throw Authentication::Require("Ssh Keys Authentication"); +#if USE_SSH + throw Authentication::Require("SSH Keys Authentication"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); +#endif case AuthenticationType::BCRYPT_PASSWORD: return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); @@ -216,13 +229,18 @@ bool Authentication::areCredentialsValid( return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName()); case AuthenticationType::SSH_KEY: - throw Authentication::Require("Ssh Keys Authentication"); +#if USE_SSH + throw Authentication::Require("SSH Keys Authentication"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); +#endif case AuthenticationType::MAX: break; } } +#if USE_SSH if (const auto * ssh_credentials = typeid_cast(&credentials)) { switch (auth_data.getType()) @@ -243,15 +261,12 @@ bool Authentication::areCredentialsValid( throw Authentication::Require("ClickHouse X.509 Authentication"); case AuthenticationType::SSH_KEY: -#if USE_SSH return checkSshSignature(auth_data.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal()); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); -#endif case AuthenticationType::MAX: break; } } +#endif if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) return true; diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index da90a0f5842c..a4c25b438e88 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -105,7 +105,10 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash) && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm) && (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names) - && (lhs.ssh_keys == rhs.ssh_keys) && (lhs.http_auth_scheme == rhs.http_auth_scheme) +#if USE_SSH + && (lhs.ssh_keys == rhs.ssh_keys) +#endif + && (lhs.http_auth_scheme == rhs.http_auth_scheme) && (lhs.http_auth_server_name == rhs.http_auth_server_name); } @@ -326,7 +329,7 @@ std::shared_ptr AuthenticationData::toAST() const break; #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } case AuthenticationType::HTTP: @@ -355,7 +358,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que { #if USE_SSH AuthenticationData auth_data(*query.type); - std::vector keys; + std::vector keys; size_t args_size = query.children.size(); for (size_t i = 0; i < args_size; ++i) @@ -366,7 +369,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que try { - keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(key_base64, type)); + keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(key_base64, type)); } catch (const std::invalid_argument &) { @@ -377,7 +380,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que auth_data.setSSHKeys(std::move(keys)); return auth_data; #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } diff --git a/src/Access/AuthenticationData.h b/src/Access/AuthenticationData.h index feef4d71d668..c97e0327b569 100644 --- a/src/Access/AuthenticationData.h +++ b/src/Access/AuthenticationData.h @@ -2,14 +2,16 @@ #include #include +#include #include #include -#include #include #include #include +#include "config.h" + namespace DB { @@ -59,8 +61,10 @@ class AuthenticationData const boost::container::flat_set & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; } void setSSLCertificateCommonNames(boost::container::flat_set common_names_); - const std::vector & getSSHKeys() const { return ssh_keys; } - void setSSHKeys(std::vector && ssh_keys_) { ssh_keys = std::forward>(ssh_keys_); } +#if USE_SSH + const std::vector & getSSHKeys() const { return ssh_keys; } + void setSSHKeys(std::vector && ssh_keys_) { ssh_keys = std::forward>(ssh_keys_); } +#endif HTTPAuthenticationScheme getHTTPAuthenticationScheme() const { return http_auth_scheme; } void setHTTPAuthenticationScheme(HTTPAuthenticationScheme scheme) { http_auth_scheme = scheme; } @@ -94,7 +98,9 @@ class AuthenticationData String kerberos_realm; boost::container::flat_set ssl_certificate_common_names; String salt; - std::vector ssh_keys; +#if USE_SSH + std::vector ssh_keys; +#endif /// HTTP authentication properties String http_auth_server_name; HTTPAuthenticationScheme http_auth_scheme = HTTPAuthenticationScheme::BASIC; diff --git a/src/Access/Common/AuthenticationType.h b/src/Access/Common/AuthenticationType.h index 48ace3ca00a9..506c8abd3b11 100644 --- a/src/Access/Common/AuthenticationType.h +++ b/src/Access/Common/AuthenticationType.h @@ -34,8 +34,8 @@ enum class AuthenticationType /// Password is encrypted in bcrypt hash. BCRYPT_PASSWORD, - /// Server sends a random string named `challenge` which client needs to encrypt with private key. - /// The check is performed on server side by decrypting the data and comparing with the original string. + /// Server sends a random string named `challenge` to the client. The client encrypts it with its SSH private key. + /// The server decrypts the result using the SSH public key registered for the user and compares with the original string. SSH_KEY, /// Authentication through HTTP protocol diff --git a/src/Access/Credentials.h b/src/Access/Credentials.h index 77b90eaaebce..d04f8a66541d 100644 --- a/src/Access/Credentials.h +++ b/src/Access/Credentials.h @@ -3,6 +3,7 @@ #include #include +#include "config.h" namespace DB { @@ -86,10 +87,11 @@ class MySQLNative41Credentials : public CredentialsWithScramble using CredentialsWithScramble::CredentialsWithScramble; }; +#if USE_SSH class SshCredentials : public Credentials { public: - explicit SshCredentials(const String& user_name_, const String& signature_, const String& original_) + SshCredentials(const String & user_name_, const String & signature_, const String & original_) : Credentials(user_name_), signature(signature_), original(original_) { is_ready = true; @@ -117,5 +119,6 @@ class SshCredentials : public Credentials String signature; String original; }; +#endif } diff --git a/src/Access/User.cpp b/src/Access/User.cpp index 39930c9cf76b..ef5cf7221130 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -31,7 +31,7 @@ void User::setName(const String & name_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name is empty"); if (name_ == EncodedUserInfo::USER_INTERSERVER_MARKER) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); - if (startsWith(name_, EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER)) + if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); name = name_; } diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index b4b843fc77ea..e3c45eb45aeb 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -10,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -214,7 +214,7 @@ namespace Poco::Util::AbstractConfiguration::Keys entries; config.keys(ssh_keys_config, entries); - std::vector keys; + std::vector keys; for (const String& entry : entries) { const auto conf_pref = ssh_keys_config + "." + entry + "."; @@ -237,7 +237,7 @@ namespace try { - keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(base64_key, type)); + keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(base64_key, type)); } catch (const std::invalid_argument &) { @@ -249,7 +249,7 @@ namespace } user->auth_data.setSSHKeys(std::move(keys)); #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } else if (has_http_auth) diff --git a/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp b/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp index 4f4d4a19cba1..ffddd46f2e38 100644 --- a/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp +++ b/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp @@ -115,34 +115,34 @@ class AggregateFunctionAnyHeavy final : public IAggregateFunctionDataHelperdata(place).add(*columns[0], row_num, arena); + data(place).add(*columns[0], row_num, arena); } void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override { - this->data(place).addManyDefaults(*columns[0], 0, arena); + data(place).addManyDefaults(*columns[0], 0, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - this->data(place).add(this->data(rhs), arena); + data(place).add(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf, *serialization); + data(place).write(buf, *serialization); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, *serialization, arena); + data(place).read(buf, *serialization, arena); } bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - this->data(place).insertResultInto(to); + data(place).insertResultInto(to); } }; diff --git a/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp b/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp index 8582c8c56fc2..3d7d6eff6084 100644 --- a/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp +++ b/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp @@ -54,30 +54,30 @@ class AggregateFunctionExponentialMovingAverage final { const auto & value = columns[0]->getFloat64(row_num); const auto & time = columns[1]->getFloat64(row_num); - this->data(place).add(value, time, half_decay); + data(place).add(value, time, half_decay); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs), half_decay); + data(place).merge(data(rhs), half_decay); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - writeBinary(this->data(place).value, buf); - writeBinary(this->data(place).time, buf); + writeBinary(data(place).value, buf); + writeBinary(data(place).time, buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - readBinary(this->data(place).value, buf); - readBinary(this->data(place).time, buf); + readBinary(data(place).value, buf); + readBinary(data(place).time, buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { auto & column = assert_cast &>(to); - column.getData().push_back(this->data(place).get(half_decay)); + column.getData().push_back(data(place).get(half_decay)); } }; diff --git a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp index f3d990460360..33e318b6c2fa 100644 --- a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp +++ b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp @@ -559,7 +559,7 @@ class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelperdata(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); + data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); } void addManyDefaults( @@ -572,7 +572,7 @@ class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelperdata(place).merge(this->data(rhs), arena); + data(place).merge(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional /* version */) const override @@ -590,7 +590,7 @@ class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper(to); auto & str = assert_cast(array.getData()); - this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); + data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); array.getOffsets().push_back(str.size()); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 6af8b1018dd6..630026521668 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -89,10 +89,10 @@ struct GroupArraySamplerData chassert(lim != 0); /// With a large number of values, we will generate random numbers several times slower. - if (lim <= static_cast(rng.max())) + if (lim <= static_cast(pcg32_fast::max())) return rng() % lim; else - return (static_cast(rng()) * (static_cast(rng.max()) + 1ULL) + static_cast(rng())) % lim; + return (static_cast(rng()) * (static_cast(pcg32::max()) + 1ULL) + static_cast(rng())) % lim; } void randomShuffle() diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp index 882150325be3..e1224fae2fba 100644 --- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp @@ -293,32 +293,32 @@ class AggregateFunctionKolmogorovSmirnov final: Float64 value = columns[0]->getFloat64(row_num); UInt8 is_second = columns[1]->getUInt(row_num); if (is_second) - this->data(place).addY(value, arena); + data(place).addY(value, arena); else - this->data(place).addX(value, arena); + data(place).addX(value, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - this->data(place).merge(this->data(rhs), arena); + data(place).merge(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - if (!this->data(place).size_x || !this->data(place).size_y) + if (!data(place).size_x || !data(place).size_y) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName()); - auto [d_statistic, p_value] = this->data(place).getResult(alternative, method); + auto [d_statistic, p_value] = data(place).getResult(alternative, method); /// Because p-value is a probability. p_value = std::min(1.0, std::max(0.0, p_value)); diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp index d5abdbc12fbe..b24b6c8996f2 100644 --- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp +++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp @@ -242,7 +242,7 @@ class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunc { Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type); Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type); - this->data(place).add(x, y, arena); + data(place).add(x, y, arena); } Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const @@ -264,25 +264,25 @@ class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunc void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - auto & a = this->data(place); - const auto & b = this->data(rhs); + auto & a = data(place); + const auto & b = data(rhs); a.merge(b, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override { - auto res = this->data(place).getResult(total_buckets, arena); + auto res = data(place).getResult(total_buckets, arena); auto & col = assert_cast(to); auto & col_offsets = assert_cast(col.getOffsetsColumn()); diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp index a70da7b35d57..e7bc5df335f7 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp @@ -205,35 +205,35 @@ class AggregateFunctionMannWhitney final: UInt8 is_second = columns[1]->getUInt(row_num); if (is_second) - this->data(place).addY(value, arena); + data(place).addY(value, arena); else - this->data(place).addX(value, arena); + data(place).addX(value, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - auto & a = this->data(place); - const auto & b = this->data(rhs); + auto & a = data(place); + const auto & b = data(rhs); a.merge(b, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - if (!this->data(place).size_x || !this->data(place).size_y) + if (!data(place).size_x || !data(place).size_y) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName()); - auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction); + auto [u_statistic, p_value] = data(place).getResult(alternative, continuity_correction); /// Because p-value is a probability. p_value = std::min(1.0, std::max(0.0, p_value)); diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp index d338808c7170..0c4726734ce9 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp @@ -66,31 +66,31 @@ class AggregateFunctionRankCorrelation : { Float64 new_x = columns[0]->getFloat64(row_num); Float64 new_y = columns[1]->getFloat64(row_num); - this->data(place).addX(new_x, arena); - this->data(place).addY(new_y, arena); + data(place).addX(new_x, arena); + data(place).addY(new_y, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - auto & a = this->data(place); - const auto & b = this->data(rhs); + auto & a = data(place); + const auto & b = data(rhs); a.merge(b, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - auto answer = this->data(place).getResult(); + auto answer = data(place).getResult(); auto & column = static_cast &>(to); column.getData().push_back(answer); diff --git a/src/AggregateFunctions/AggregateFunctionRetention.cpp b/src/AggregateFunctions/AggregateFunctionRetention.cpp index 5eaa1a7a39c2..e9b46e62c146 100644 --- a/src/AggregateFunctions/AggregateFunctionRetention.cpp +++ b/src/AggregateFunctions/AggregateFunctionRetention.cpp @@ -102,24 +102,24 @@ class AggregateFunctionRetention final auto event = assert_cast *>(columns[i])->getData()[row_num]; if (event) { - this->data(place).add(i); + data(place).add(i); } } } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs)); + data(place).merge(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override @@ -130,13 +130,13 @@ class AggregateFunctionRetention final ColumnArray::Offset current_offset = data_to.size(); data_to.resize(current_offset + events_size); - const bool first_flag = this->data(place).events.test(0); + const bool first_flag = data(place).events.test(0); data_to[current_offset] = first_flag; ++current_offset; for (size_t i = 1; i < events_size; ++i) { - data_to[current_offset] = (first_flag && this->data(place).events.test(i)); + data_to[current_offset] = (first_flag && data(place).events.test(i)); ++current_offset; } diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp index 75d2fe595d84..ce2f7ee195db 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp +++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp @@ -123,22 +123,22 @@ class AggregateFunctionSimpleLinearRegression final : public IAggregateFunctionD Float64 x = columns[0]->getFloat64(row_num); Float64 y = columns[1]->getFloat64(row_num); - this->data(place).add(x, y); + data(place).add(x, y); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs)); + data(place).merge(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } static DataTypePtr createResultType() @@ -168,8 +168,8 @@ class AggregateFunctionSimpleLinearRegression final : public IAggregateFunctionD IColumn & to, Arena *) const override { - Float64 k = this->data(place).getK(); - Float64 b = this->data(place).getB(k); + Float64 k = data(place).getK(); + Float64 b = data(place).getB(k); auto & col_tuple = assert_cast(to); auto & col_k = assert_cast &>(col_tuple.getColumn(0)); diff --git a/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp b/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp index b14af34c5fc1..0625e37d1b0f 100644 --- a/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp @@ -120,7 +120,7 @@ class AggregateFunctionSingleValueOrNull final void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { - this->data(place).add(*columns[0], row_num, arena); + data(place).add(*columns[0], row_num, arena); } void addBatchSinglePlace( @@ -131,7 +131,7 @@ class AggregateFunctionSingleValueOrNull final Arena * arena, ssize_t if_argument_pos) const override { - if (this->data(place).isNull()) + if (data(place).isNull()) return; IAggregateFunctionDataHelper::addBatchSinglePlace( row_begin, row_end, place, columns, arena, if_argument_pos); @@ -146,7 +146,7 @@ class AggregateFunctionSingleValueOrNull final Arena * arena, ssize_t if_argument_pos) const override { - if (this->data(place).isNull()) + if (data(place).isNull()) return; IAggregateFunctionDataHelper::addBatchSinglePlaceNotNull( row_begin, row_end, place, columns, null_map, arena, if_argument_pos); @@ -154,29 +154,29 @@ class AggregateFunctionSingleValueOrNull final void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override { - this->data(place).add(*columns[0], 0, arena); + data(place).add(*columns[0], 0, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - this->data(place).add(this->data(rhs), arena); + data(place).add(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf, *serialization); + data(place).write(buf, *serialization); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, *serialization, arena); + data(place).read(buf, *serialization, arena); } bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - this->data(place).insertResultInto(to); + data(place).insertResultInto(to); } }; diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.cpp b/src/AggregateFunctions/AggregateFunctionStatistics.cpp index e9d9b7409cad..15fede94fe76 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.cpp +++ b/src/AggregateFunctions/AggregateFunctionStatistics.cpp @@ -150,13 +150,13 @@ class AggregateFunctionVariance final Float64 getResult(ConstAggregateDataPtr __restrict place) const { - const auto & data = this->data(place); + const auto & dt = data(place); switch (kind) { - case VarKind::varSampStable: return getVarSamp(data.m2, data.count); - case VarKind::stddevSampStable: return getStddevSamp(data.m2, data.count); - case VarKind::varPopStable: return getVarPop(data.m2, data.count); - case VarKind::stddevPopStable: return getStddevPop(data.m2, data.count); + case VarKind::varSampStable: return getVarSamp(dt.m2, dt.count); + case VarKind::stddevSampStable: return getStddevSamp(dt.m2, dt.count); + case VarKind::varPopStable: return getVarPop(dt.m2, dt.count); + case VarKind::stddevPopStable: return getStddevPop(dt.m2, dt.count); } } @@ -182,22 +182,22 @@ class AggregateFunctionVariance final void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { - this->data(place).update(*columns[0], row_num); + data(place).update(*columns[0], row_num); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).mergeWith(this->data(rhs)); + data(place).mergeWith(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h index 306e293cae74..9d13b77664dd 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h @@ -491,7 +491,7 @@ class AggregateFunctionNullVariadic final : public AggregateFunctionNullBase< std::vector nullable_filters; const IColumn * nested_columns[number_of_arguments]; - std::unique_ptr final_flags = nullptr; + std::unique_ptr final_flags; const UInt8 * final_flags_ptr = nullptr; if (if_argument_pos >= 0) diff --git a/src/AggregateFunctions/DDSketch.h b/src/AggregateFunctions/DDSketch.h index 65ce73226966..dae935bd43d3 100644 --- a/src/AggregateFunctions/DDSketch.h +++ b/src/AggregateFunctions/DDSketch.h @@ -147,6 +147,8 @@ class DDSketchDenseLogarithmic negative_store->merge(other.negative_store.get()); } + /// NOLINTBEGIN(readability-static-accessed-through-instance) + void serialize(WriteBuffer& buf) const { // Write the mapping @@ -201,6 +203,8 @@ class DDSketchDenseLogarithmic count = static_cast(negative_store->count + zero_count + store->count); } + /// NOLINTEND(readability-static-accessed-through-instance) + private: std::unique_ptr mapping; std::unique_ptr store; diff --git a/src/AggregateFunctions/DDSketch/Store.h b/src/AggregateFunctions/DDSketch/Store.h index 428b2a6c9b8f..f12c3875dcf2 100644 --- a/src/AggregateFunctions/DDSketch/Store.h +++ b/src/AggregateFunctions/DDSketch/Store.h @@ -87,6 +87,8 @@ class DDSketchDenseStore count += other->count; } + /// NOLINTBEGIN(readability-static-accessed-through-instance) + void serialize(WriteBuffer& buf) const { @@ -179,6 +181,8 @@ class DDSketchDenseStore } } + /// NOLINTEND(readability-static-accessed-through-instance) + private: UInt32 chunk_size; DDSketchEncoding enc; diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 97e0e89aee98..ee227db6d9d5 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -1,17 +1,18 @@ #pragma once +#include #include #include #include #include #include #include +#include #include #include #include #include #include -#include #include "config.h" @@ -46,13 +47,6 @@ class IWindowFunction; using DataTypePtr = std::shared_ptr; using DataTypes = std::vector; -using AggregateDataPtr = char *; -using AggregateDataPtrs = std::vector; -using ConstAggregateDataPtr = const char *; - -class IAggregateFunction; -using AggregateFunctionPtr = std::shared_ptr; - struct AggregateFunctionProperties; /** Aggregate functions interface. diff --git a/src/AggregateFunctions/IAggregateFunction_fwd.h b/src/AggregateFunctions/IAggregateFunction_fwd.h new file mode 100644 index 000000000000..7c78e32c6528 --- /dev/null +++ b/src/AggregateFunctions/IAggregateFunction_fwd.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include + +namespace DB +{ +using AggregateDataPtr = char *; +using AggregateDataPtrs = std::vector; +using ConstAggregateDataPtr = const char *; + +class IAggregateFunction; +using AggregateFunctionPtr = std::shared_ptr; +} diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index 37fc05a2e4ce..7b6ef1b2dc0e 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -258,10 +258,10 @@ class ReservoirSampler chassert(limit > 0); /// With a large number of values, we will generate random numbers several times slower. - if (limit <= static_cast(rng.max())) + if (limit <= static_cast(pcg32_fast::max())) return rng() % limit; else - return (static_cast(rng()) * (static_cast(rng.max()) + 1ULL) + static_cast(rng())) % limit; + return (static_cast(rng()) * (static_cast(pcg32_fast::max()) + 1ULL) + static_cast(rng())) % limit; } void sortIfNeeded() diff --git a/src/AggregateFunctions/SingleValueData.cpp b/src/AggregateFunctions/SingleValueData.cpp index 72eaf36e254e..a14caf00f730 100644 --- a/src/AggregateFunctions/SingleValueData.cpp +++ b/src/AggregateFunctions/SingleValueData.cpp @@ -579,7 +579,7 @@ std::optional SingleValueDataFixed::getGreatestIndexNotNullIf( return std::nullopt; for (size_t i = index + 1; i < row_end; i++) - if ((!if_map || if_map[i] != 0) && (!null_map || null_map[i] == 0) && (vec[i] < vec[index])) + if ((!if_map || if_map[i] != 0) && (!null_map || null_map[i] == 0) && (vec[i] > vec[index])) index = i; return {index}; } diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp index ee6bd80150d1..59389d4f2a86 100644 --- a/src/Analyzer/ArrayJoinNode.cpp +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -1,14 +1,12 @@ #include - +#include +#include +#include #include #include -#include - -#include #include - -#include -#include +#include +#include namespace DB { @@ -33,13 +31,13 @@ void ArrayJoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_stat getJoinExpressionsNode()->dumpTreeImpl(buffer, format_state, indent + 4); } -bool ArrayJoinNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ArrayJoinNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return is_left == rhs_typed.is_left; } -void ArrayJoinNode::updateTreeHashImpl(HashState & state) const +void ArrayJoinNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(is_left); } diff --git a/src/Analyzer/ArrayJoinNode.h b/src/Analyzer/ArrayJoinNode.h index 89cb0b7b8c10..1772e2b3ca07 100644 --- a/src/Analyzer/ArrayJoinNode.h +++ b/src/Analyzer/ArrayJoinNode.h @@ -93,9 +93,9 @@ class ArrayJoinNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp index 3d9f5d1640ef..2b514a85121b 100644 --- a/src/Analyzer/ColumnNode.cpp +++ b/src/Analyzer/ColumnNode.cpp @@ -1,14 +1,12 @@ #include - -#include - +#include +#include #include #include -#include - #include +#include +#include -#include namespace DB { @@ -70,20 +68,26 @@ void ColumnNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t } } -bool ColumnNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ColumnNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const { const auto & rhs_typed = assert_cast(rhs); - return column == rhs_typed.column; + if (column.name != rhs_typed.column.name) + return false; + + return !compare_options.compare_types || column.type->equals(*rhs_typed.column.type); } -void ColumnNode::updateTreeHashImpl(HashState & hash_state) const +void ColumnNode::updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const { hash_state.update(column.name.size()); hash_state.update(column.name); - const auto & column_type_name = column.type->getName(); - hash_state.update(column_type_name.size()); - hash_state.update(column_type_name); + if (compare_options.compare_types) + { + const auto & column_type_name = column.type->getName(); + hash_state.update(column_type_name.size()); + hash_state.update(column_type_name); + } } QueryTreeNodePtr ColumnNode::cloneImpl() const diff --git a/src/Analyzer/ColumnNode.h b/src/Analyzer/ColumnNode.h index 46e7c8eb5007..f6fac5ce7f9a 100644 --- a/src/Analyzer/ColumnNode.h +++ b/src/Analyzer/ColumnNode.h @@ -131,9 +131,9 @@ class ColumnNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ColumnTransformers.cpp b/src/Analyzer/ColumnTransformers.cpp index 3a6b9e3b2916..356344c1aec9 100644 --- a/src/Analyzer/ColumnTransformers.cpp +++ b/src/Analyzer/ColumnTransformers.cpp @@ -74,13 +74,13 @@ void ApplyColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState expression_node->dumpTreeImpl(buffer, format_state, indent + 4); } -bool ApplyColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ApplyColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return apply_transformer_type == rhs_typed.apply_transformer_type; } -void ApplyColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const +void ApplyColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const { hash_state.update(static_cast(getTransformerType())); hash_state.update(static_cast(getApplyTransformerType())); @@ -178,7 +178,7 @@ void ExceptColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState } } -bool ExceptColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ExceptColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); if (except_transformer_type != rhs_typed.except_transformer_type || @@ -198,7 +198,7 @@ bool ExceptColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const return column_matcher->pattern() == rhs_column_matcher->pattern(); } -void ExceptColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const +void ExceptColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const { hash_state.update(static_cast(getTransformerType())); hash_state.update(static_cast(getExceptTransformerType())); @@ -302,13 +302,13 @@ void ReplaceColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatStat } } -bool ReplaceColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ReplaceColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return is_strict == rhs_typed.is_strict && replacements_names == rhs_typed.replacements_names; } -void ReplaceColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const +void ReplaceColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const { hash_state.update(static_cast(getTransformerType())); diff --git a/src/Analyzer/ColumnTransformers.h b/src/Analyzer/ColumnTransformers.h index 8fa8e28f1947..9ae1f14575b7 100644 --- a/src/Analyzer/ColumnTransformers.h +++ b/src/Analyzer/ColumnTransformers.h @@ -137,9 +137,9 @@ class ApplyColumnTransformerNode final : public IColumnTransformerNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override; + void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; @@ -214,9 +214,9 @@ class ExceptColumnTransformerNode final : public IColumnTransformerNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override; + void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; @@ -290,9 +290,9 @@ class ReplaceColumnTransformerNode final : public IColumnTransformerNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override; + void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index e26500a9886f..46c1f7fb1edf 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -126,17 +126,29 @@ void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state } } -bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const +void ConstantNode::convertToNullable() +{ + constant_value = std::make_shared(constant_value->getValue(), makeNullableSafe(constant_value->getType())); +} + +bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const { const auto & rhs_typed = assert_cast(rhs); - return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string; + + if (value_string != rhs_typed.value_string || constant_value->getValue() != rhs_typed.constant_value->getValue()) + return false; + + return !compare_options.compare_types || constant_value->getType()->equals(*rhs_typed.constant_value->getType()); } -void ConstantNode::updateTreeHashImpl(HashState & hash_state) const +void ConstantNode::updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const { - auto type_name = constant_value->getType()->getName(); - hash_state.update(type_name.size()); - hash_state.update(type_name); + if (compare_options.compare_types) + { + auto type_name = constant_value->getType()->getName(); + hash_state.update(type_name.size()); + hash_state.update(type_name); + } hash_state.update(value_string.size()); hash_state.update(value_string); diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 98a8eb782776..0c88862b8792 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -87,17 +87,14 @@ class ConstantNode final : public IQueryTreeNode mask_id = id; } - void convertToNullable() override - { - constant_value = std::make_shared(constant_value->getValue(), makeNullableSafe(constant_value->getType())); - } + void convertToNullable() override; void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ConstantValue.h b/src/Analyzer/ConstantValue.h index a9e2ffd9e658..335072b92867 100644 --- a/src/Analyzer/ConstantValue.h +++ b/src/Analyzer/ConstantValue.h @@ -34,14 +34,4 @@ class ConstantValue DataTypePtr data_type; }; -inline bool operator==(const ConstantValue & lhs, const ConstantValue & rhs) -{ - return lhs.getValue() == rhs.getValue() && lhs.getType()->equals(*rhs.getType()); -} - -inline bool operator!=(const ConstantValue & lhs, const ConstantValue & rhs) -{ - return !(lhs == rhs); -} - } diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index e902ac2274e6..f13842cf67cc 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -142,7 +142,7 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state } } -bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const { const auto & rhs_typed = assert_cast(rhs); if (function_name != rhs_typed.function_name || isAggregateFunction() != rhs_typed.isAggregateFunction() @@ -150,6 +150,9 @@ bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const || nulls_action != rhs_typed.nulls_action) return false; + if (!compare_options.compare_types) + return true; + if (isResolved() != rhs_typed.isResolved()) return false; if (!isResolved()) @@ -168,7 +171,7 @@ bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const return true; } -void FunctionNode::updateTreeHashImpl(HashState & hash_state) const +void FunctionNode::updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const { hash_state.update(function_name.size()); hash_state.update(function_name); @@ -177,6 +180,9 @@ void FunctionNode::updateTreeHashImpl(HashState & hash_state) const hash_state.update(isWindowFunction()); hash_state.update(nulls_action); + if (!compare_options.compare_types) + return; + if (!isResolved()) return; diff --git a/src/Analyzer/FunctionNode.h b/src/Analyzer/FunctionNode.h index 0ff3e6896327..8d14b7eeb0dd 100644 --- a/src/Analyzer/FunctionNode.h +++ b/src/Analyzer/FunctionNode.h @@ -208,9 +208,9 @@ class FunctionNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/HashUtils.h b/src/Analyzer/HashUtils.h index eb6aac88fe96..77ade7a4705b 100644 --- a/src/Analyzer/HashUtils.h +++ b/src/Analyzer/HashUtils.h @@ -11,37 +11,37 @@ namespace DB * Example of usage: * std::unordered_map map; */ -template +template struct QueryTreeNodeWithHash { QueryTreeNodeWithHash(QueryTreeNodePtrType node_) /// NOLINT : node(std::move(node_)) - , hash(node->getTreeHash({.compare_aliases = compare_aliases})) + , hash(node->getTreeHash({.compare_aliases = compare_aliases, .compare_types = compare_types})) {} QueryTreeNodePtrType node = nullptr; CityHash_v1_0_2::uint128 hash; }; -template -inline bool operator==(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) +template +inline bool operator==(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) { - return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node, {.compare_aliases = compare_aliases}); + return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node, {.compare_aliases = compare_aliases, .compare_types = compare_types}); } -template -inline bool operator!=(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) +template +inline bool operator!=(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) { return !(lhs == rhs); } using QueryTreeNodePtrWithHash = QueryTreeNodeWithHash; -using QueryTreeNodePtrWithHashWithoutAlias = QueryTreeNodeWithHash; +using QueryTreeNodePtrWithHashIgnoreTypes = QueryTreeNodeWithHash; using QueryTreeNodeRawPtrWithHash = QueryTreeNodeWithHash; using QueryTreeNodeConstRawPtrWithHash = QueryTreeNodeWithHash; using QueryTreeNodePtrWithHashSet = std::unordered_set; -using QueryTreeNodePtrWithHashWithoutAliasSet = std::unordered_set; +using QueryTreeNodePtrWithHashIgnoreTypesSet = std::unordered_set; using QueryTreeNodeConstRawPtrWithHashSet = std::unordered_set; template @@ -52,10 +52,10 @@ using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map -struct std::hash> +template +struct std::hash> { - size_t operator()(const DB::QueryTreeNodeWithHash & node_with_hash) const + size_t operator()(const DB::QueryTreeNodeWithHash & node_with_hash) const { return node_with_hash.hash.low64; } diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp index 7815b93c3aca..cd085babf384 100644 --- a/src/Analyzer/IQueryTreeNode.cpp +++ b/src/Analyzer/IQueryTreeNode.cpp @@ -107,7 +107,7 @@ bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs, CompareOptions compare_ } if (lhs_node_to_compare->getNodeType() != rhs_node_to_compare->getNodeType() || - !lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare)) + !lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare, compare_options)) return false; if (compare_options.compare_aliases && lhs_node_to_compare->alias != rhs_node_to_compare->alias) @@ -207,7 +207,7 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash(CompareOptions compare_options) hash_state.update(node_to_process->alias); } - node_to_process->updateTreeHashImpl(hash_state); + node_to_process->updateTreeHashImpl(hash_state, compare_options); hash_state.update(node_to_process->children.size()); diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h index 92e34616c4d7..fc2cb2c53f66 100644 --- a/src/Analyzer/IQueryTreeNode.h +++ b/src/Analyzer/IQueryTreeNode.h @@ -97,6 +97,7 @@ class IQueryTreeNode : public TypePromotion struct CompareOptions { bool compare_aliases = true; + bool compare_types = true; }; /** Is tree equal to other tree with node root. @@ -104,7 +105,7 @@ class IQueryTreeNode : public TypePromotion * With default compare options aliases of query tree nodes are compared during isEqual call. * Original ASTs of query tree nodes are not compared during isEqual call. */ - bool isEqual(const IQueryTreeNode & rhs, CompareOptions compare_options = { .compare_aliases = true }) const; + bool isEqual(const IQueryTreeNode & rhs, CompareOptions compare_options = { .compare_aliases = true, .compare_types = true }) const; using Hash = CityHash_v1_0_2::uint128; using HashState = SipHash; @@ -114,7 +115,7 @@ class IQueryTreeNode : public TypePromotion * Alias of query tree node is part of query tree hash. * Original AST is not part of query tree hash. */ - Hash getTreeHash(CompareOptions compare_options = { .compare_aliases = true }) const; + Hash getTreeHash(CompareOptions compare_options = { .compare_aliases = true, .compare_types = true }) const; /// Get a deep copy of the query tree QueryTreeNodePtr clone() const; @@ -264,12 +265,12 @@ class IQueryTreeNode : public TypePromotion /** Subclass must compare its internal state with rhs node internal state and do not compare children or weak pointers to other * query tree nodes. */ - virtual bool isEqualImpl(const IQueryTreeNode & rhs) const = 0; + virtual bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const = 0; /** Subclass must update tree hash with its internal state and do not update tree hash for children or weak pointers to other * query tree nodes. */ - virtual void updateTreeHashImpl(HashState & hash_state) const = 0; + virtual void updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const = 0; /** Subclass must clone its internal state and do not clone children or weak pointers to other * query tree nodes. diff --git a/src/Analyzer/IdentifierNode.cpp b/src/Analyzer/IdentifierNode.cpp index 88b3daacb125..181e75a57fd8 100644 --- a/src/Analyzer/IdentifierNode.cpp +++ b/src/Analyzer/IdentifierNode.cpp @@ -38,13 +38,13 @@ void IdentifierNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_sta } } -bool IdentifierNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool IdentifierNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return identifier == rhs_typed.identifier && table_expression_modifiers == rhs_typed.table_expression_modifiers; } -void IdentifierNode::updateTreeHashImpl(HashState & state) const +void IdentifierNode::updateTreeHashImpl(HashState & state, CompareOptions) const { const auto & identifier_name = identifier.getFullName(); state.update(identifier_name.size()); diff --git a/src/Analyzer/IdentifierNode.h b/src/Analyzer/IdentifierNode.h index 872bb14d5128..1b07f0b3765c 100644 --- a/src/Analyzer/IdentifierNode.h +++ b/src/Analyzer/IdentifierNode.h @@ -53,9 +53,9 @@ class IdentifierNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/InterpolateNode.cpp b/src/Analyzer/InterpolateNode.cpp index d78993c7b855..e4f7e22b8039 100644 --- a/src/Analyzer/InterpolateNode.cpp +++ b/src/Analyzer/InterpolateNode.cpp @@ -28,13 +28,13 @@ void InterpolateNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_st getInterpolateExpression()->dumpTreeImpl(buffer, format_state, indent + 4); } -bool InterpolateNode::isEqualImpl(const IQueryTreeNode &) const +bool InterpolateNode::isEqualImpl(const IQueryTreeNode &, CompareOptions) const { /// No state in interpolate node return true; } -void InterpolateNode::updateTreeHashImpl(HashState &) const +void InterpolateNode::updateTreeHashImpl(HashState &, CompareOptions) const { /// No state in interpolate node } diff --git a/src/Analyzer/InterpolateNode.h b/src/Analyzer/InterpolateNode.h index c45800ebaaff..9269d3924f5b 100644 --- a/src/Analyzer/InterpolateNode.h +++ b/src/Analyzer/InterpolateNode.h @@ -53,9 +53,9 @@ class InterpolateNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/JoinNode.cpp b/src/Analyzer/JoinNode.cpp index 9b61c8b19d0d..6cb33d805564 100644 --- a/src/Analyzer/JoinNode.cpp +++ b/src/Analyzer/JoinNode.cpp @@ -1,16 +1,14 @@ #include #include - +#include +#include #include #include -#include - -#include -#include #include +#include +#include #include - -#include +#include namespace DB { @@ -81,13 +79,13 @@ void JoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, si } } -bool JoinNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool JoinNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return locality == rhs_typed.locality && strictness == rhs_typed.strictness && kind == rhs_typed.kind; } -void JoinNode::updateTreeHashImpl(HashState & state) const +void JoinNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(locality); state.update(strictness); diff --git a/src/Analyzer/JoinNode.h b/src/Analyzer/JoinNode.h index 4f071e03856f..734162d95469 100644 --- a/src/Analyzer/JoinNode.h +++ b/src/Analyzer/JoinNode.h @@ -142,9 +142,9 @@ class JoinNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/LambdaNode.cpp b/src/Analyzer/LambdaNode.cpp index 4be4d69c1907..bca2616d85a9 100644 --- a/src/Analyzer/LambdaNode.cpp +++ b/src/Analyzer/LambdaNode.cpp @@ -46,13 +46,13 @@ void LambdaNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, getExpression()->dumpTreeImpl(buffer, format_state, indent + 4); } -bool LambdaNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool LambdaNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return argument_names == rhs_typed.argument_names; } -void LambdaNode::updateTreeHashImpl(HashState & state) const +void LambdaNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(argument_names.size()); for (const auto & argument_name : argument_names) diff --git a/src/Analyzer/LambdaNode.h b/src/Analyzer/LambdaNode.h index ea44a7e8187e..0b2882125f0e 100644 --- a/src/Analyzer/LambdaNode.h +++ b/src/Analyzer/LambdaNode.h @@ -97,9 +97,9 @@ class LambdaNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ListNode.cpp b/src/Analyzer/ListNode.cpp index 799c471d6859..217cd6cefa33 100644 --- a/src/Analyzer/ListNode.cpp +++ b/src/Analyzer/ListNode.cpp @@ -38,13 +38,13 @@ void ListNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, si } } -bool ListNode::isEqualImpl(const IQueryTreeNode &) const +bool ListNode::isEqualImpl(const IQueryTreeNode &, CompareOptions) const { /// No state return true; } -void ListNode::updateTreeHashImpl(HashState &) const +void ListNode::updateTreeHashImpl(HashState &, CompareOptions) const { /// No state } diff --git a/src/Analyzer/ListNode.h b/src/Analyzer/ListNode.h index 5b1abc36ae96..379919f190fb 100644 --- a/src/Analyzer/ListNode.h +++ b/src/Analyzer/ListNode.h @@ -51,9 +51,9 @@ class ListNode final : public IQueryTreeNode const_iterator end() const { return children.end(); } protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState &) const override; + void updateTreeHashImpl(HashState &, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/MatcherNode.cpp b/src/Analyzer/MatcherNode.cpp index f573b83e5383..341c4b8eec75 100644 --- a/src/Analyzer/MatcherNode.cpp +++ b/src/Analyzer/MatcherNode.cpp @@ -160,7 +160,7 @@ void MatcherNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, } } -bool MatcherNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool MatcherNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); if (matcher_type != rhs_typed.matcher_type || @@ -181,7 +181,7 @@ bool MatcherNode::isEqualImpl(const IQueryTreeNode & rhs) const return columns_matcher->pattern() == rhs_columns_matcher->pattern(); } -void MatcherNode::updateTreeHashImpl(HashState & hash_state) const +void MatcherNode::updateTreeHashImpl(HashState & hash_state, CompareOptions) const { hash_state.update(static_cast(matcher_type)); diff --git a/src/Analyzer/MatcherNode.h b/src/Analyzer/MatcherNode.h index d6f077e224be..a7ec7d984c66 100644 --- a/src/Analyzer/MatcherNode.h +++ b/src/Analyzer/MatcherNode.h @@ -135,9 +135,9 @@ class MatcherNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index d0a5656d3344..3e2a2055fdba 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -15,6 +15,7 @@ #include #include +#include namespace DB @@ -61,47 +62,7 @@ const QueryTreeNodePtr & getEquiArgument(const QueryTreeNodePtr & cond, size_t i return func->getArguments().getNodes()[index]; } - -/// Check that node has only one source and return it. -/// {_, false} - multiple sources -/// {nullptr, true} - no sources -/// {source, true} - single source -std::pair getExpressionSource(const QueryTreeNodePtr & node) -{ - if (const auto * column = node->as()) - { - auto source = column->getColumnSourceOrNull(); - if (!source) - return {nullptr, false}; - return {source.get(), true}; - } - - if (const auto * func = node->as()) - { - const IQueryTreeNode * source = nullptr; - const auto & args = func->getArguments().getNodes(); - for (const auto & arg : args) - { - auto [arg_source, is_ok] = getExpressionSource(arg); - if (!is_ok) - return {nullptr, false}; - - if (!source) - source = arg_source; - else if (arg_source && !source->isEqual(*arg_source)) - return {nullptr, false}; - } - return {source, true}; - - } - - if (node->as()) - return {nullptr, true}; - - return {nullptr, false}; -} - -bool findInTableExpression(const IQueryTreeNode * source, const QueryTreeNodePtr & table_expression) +bool findInTableExpression(const QueryTreeNodePtr & source, const QueryTreeNodePtr & table_expression) { if (!source) return true; @@ -115,7 +76,6 @@ bool findInTableExpression(const IQueryTreeNode * source, const QueryTreeNodePtr || findInTableExpression(source, join_node->getRightTableExpression()); } - return false; } @@ -169,10 +129,10 @@ class CrossToInnerJoinVisitor : public InDepthQueryTreeVisitorWithContext; - explicit JoinOnLogicalExpressionOptimizerVisitor(ContextPtr context) + explicit JoinOnLogicalExpressionOptimizerVisitor(const JoinNode * join_node_, ContextPtr context) : Base(std::move(context)) + , join_node(join_node_) {} void enterImpl(QueryTreeNodePtr & node) @@ -55,10 +56,11 @@ class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWi } private: + const JoinNode * join_node; bool need_rerun_resolve = false; /// Returns true if type of some operand is changed and parent function needs to be re-resolved - static bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context) + bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context) { auto & function_node = node->as(); chassert(function_node.getFunctionName() == "or"); @@ -93,6 +95,21 @@ class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWi const auto & func_name = argument_function->getFunctionName(); if (func_name == "equals" || func_name == "isNotDistinctFrom") { + const auto & argument_nodes = argument_function->getArguments().getNodes(); + if (argument_nodes.size() != 2) + continue; + /// We can rewrite to a <=> b only if we are joining on a and b, + /// because the function is not yet implemented for other cases. + auto first_src = getExpressionSource(argument_nodes[0]); + auto second_src = getExpressionSource(argument_nodes[1]); + if (!first_src || !second_src) + continue; + const auto & lhs_join = *join_node->getLeftTableExpression(); + const auto & rhs_join = *join_node->getRightTableExpression(); + bool arguments_from_both_sides = (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) || + (first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join)); + if (!arguments_from_both_sides) + continue; equals_functions_indices.push_back(or_operands.size() - 1); } else if (func_name == "and") @@ -231,7 +248,7 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont /// Operator <=> is not supported outside of JOIN ON section if (join_node->hasJoinExpression()) { - JoinOnLogicalExpressionOptimizerVisitor join_on_visitor(getContext()); + JoinOnLogicalExpressionOptimizerVisitor join_on_visitor(join_node, getContext()); join_on_visitor.visit(join_node->getJoinExpression()); } return; diff --git a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp index 618932025253..a30ad2a1590f 100644 --- a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp +++ b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp @@ -12,24 +12,6 @@ namespace DB namespace { -const std::unordered_set possibly_injective_function_names -{ - "dictGet", - "dictGetString", - "dictGetUInt8", - "dictGetUInt16", - "dictGetUInt32", - "dictGetUInt64", - "dictGetInt8", - "dictGetInt16", - "dictGetInt32", - "dictGetInt64", - "dictGetFloat32", - "dictGetFloat64", - "dictGetDate", - "dictGetDateTime" -}; - class OptimizeGroupByInjectiveFunctionsVisitor : public InDepthQueryTreeVisitorWithContext { using Base = InDepthQueryTreeVisitorWithContext; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f5474ddb662a..aedf860f5be4 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -776,7 +776,7 @@ struct IdentifierResolveScope /// Table expression node to data std::unordered_map table_expression_node_to_data; - QueryTreeNodePtrWithHashWithoutAliasSet nullable_group_by_keys; + QueryTreeNodePtrWithHashIgnoreTypesSet nullable_group_by_keys; /// Here we count the number of nullable GROUP BY keys we met resolving expression. /// E.g. for a query `SELECT tuple(tuple(number)) FROM numbers(10) GROUP BY (number, tuple(number)) with cube` /// both `number` and `tuple(number)` would be in nullable_group_by_keys. @@ -1940,8 +1940,7 @@ std::vector QueryAnalyzer::collectIdentifierTypoHints(const Identifier & for (const auto & valid_identifier : valid_identifiers) prompting_strings.push_back(valid_identifier.getFullName()); - NamePrompter<1> prompter; - return prompter.getHints(unresolved_identifier.getFullName(), prompting_strings); + return NamePrompter<1>::getHints(unresolved_identifier.getFullName(), prompting_strings); } /** Wrap expression node in tuple element function calls for nested paths. @@ -2276,6 +2275,10 @@ void QueryAnalyzer::mergeWindowWithParentWindow(const QueryTreeNodePtr & window_ */ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope) { + const auto & settings = scope.context->getSettingsRef(); + if (!settings.enable_positional_arguments || scope.context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY) + return; + auto & node_list_typed = node_list->as(); for (auto & node : node_list_typed.getNodes()) @@ -2288,7 +2291,8 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_ auto * constant_node = (*node_to_replace)->as(); if (!constant_node - || (constant_node->getValue().getType() != Field::Types::UInt64 && constant_node->getValue().getType() != Field::Types::Int64)) + || (constant_node->getValue().getType() != Field::Types::UInt64 + && constant_node->getValue().getType() != Field::Types::Int64)) continue; UInt64 pos; @@ -3993,9 +3997,15 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const } else if (resolved_identifier->as()) { - lookup_result.resolved_identifier = resolved_identifier; return lookup_result; } + else if (auto * resolved_function = resolved_identifier->as()) + { + /// Special case: scalar subquery was executed and replaced by __getScalar function. + /// Handle it as a constant. + if (resolved_function->getFunctionName() == "__getScalar") + return lookup_result; + } throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Resolve identifier '{}' from parent scope only supported for constants and CTE. Actual {} node type {}. In scope {}", @@ -5794,7 +5804,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi return result_projection_names; } - FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); + FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); /// NOLINT(readability-static-accessed-through-instance) bool is_executable_udf = true; IdentifierResolveScope::ResolvedFunctionsCache * function_cache = nullptr; @@ -5824,7 +5834,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi { std::vector possible_function_names; - auto function_names = UserDefinedExecutableFunctionFactory::instance().getRegisteredNames(scope.context); + auto function_names = UserDefinedExecutableFunctionFactory::instance().getRegisteredNames(scope.context); /// NOLINT(readability-static-accessed-through-instance) possible_function_names.insert(possible_function_names.end(), function_names.begin(), function_names.end()); function_names = UserDefinedSQLFunctionFactory::instance().getAllRegisteredNames(); @@ -5842,8 +5852,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi possible_function_names.push_back(name); } - NamePrompter<2> name_prompter; - auto hints = name_prompter.getHints(function_name, possible_function_names); + auto hints = NamePrompter<2>::getHints(function_name, possible_function_names); throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Function with name '{}' does not exists. In scope {}{}", @@ -6083,7 +6092,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi * Example: SELECT toTypeName(sum(number)) FROM numbers(10); */ if (column && isColumnConst(*column) && !typeid_cast(column.get())->getDataColumn().isDummy() && - (!hasAggregateFunctionNodes(node) && !hasFunctionNode(node, "arrayJoin"))) + !hasAggregateFunctionNodes(node) && !hasFunctionNode(node, "arrayJoin") && + /// Sanity check: do not convert large columns to constants + column->byteSize() < 1_MiB) { /// Replace function node with result constant node Field column_constant_value; @@ -6155,12 +6166,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id return resolved_expression_it->second; } - bool is_nullable_group_by_key = scope.nullable_group_by_keys.contains(node) && !scope.expressions_in_resolve_process_stack.hasAggregateFunction(); - if (is_nullable_group_by_key) - ++scope.found_nullable_group_by_key_in_scope; - - SCOPE_EXIT(scope.found_nullable_group_by_key_in_scope -= is_nullable_group_by_key); - String node_alias = node->getAlias(); ProjectionNames result_projection_names; @@ -6452,10 +6457,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id validateTreeSize(node, scope.context->getSettingsRef().max_expanded_ast_elements, node_to_tree_size); - if (is_nullable_group_by_key && scope.found_nullable_group_by_key_in_scope == 1) + if (!scope.expressions_in_resolve_process_stack.hasAggregateFunction()) { - node = node->clone(); - node->convertToNullable(); + auto it = scope.nullable_group_by_keys.find(node); + if (it != scope.nullable_group_by_keys.end()) + { + node = it->node->clone(); + node->convertToNullable(); + } } /** Update aliases after expression node was resolved. @@ -6675,52 +6684,45 @@ void expandTuplesInList(QueryTreeNodes & key_list) */ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierResolveScope & scope) { - const auto & settings = scope.context->getSettingsRef(); - if (query_node_typed.isGroupByWithGroupingSets()) { - QueryTreeNodes nullable_group_by_keys; for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes()) { - if (settings.enable_positional_arguments) - replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope); + replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope); + + resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key. // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2) auto & group_by_list = grouping_sets_keys_list_node->as().getNodes(); expandTuplesInList(group_by_list); - - if (scope.group_by_use_nulls) - for (const auto & group_by_elem : group_by_list) - nullable_group_by_keys.push_back(group_by_elem->clone()); - - resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); } - for (auto & nullable_group_by_key : nullable_group_by_keys) - scope.nullable_group_by_keys.insert(std::move(nullable_group_by_key)); + if (scope.group_by_use_nulls) + { + for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes()) + { + for (const auto & group_by_elem : grouping_set->as()->getNodes()) + scope.nullable_group_by_keys.insert(group_by_elem); + } + } } else { - if (settings.enable_positional_arguments) - replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope); + replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope); + + resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key. // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2) auto & group_by_list = query_node_typed.getGroupBy().getNodes(); expandTuplesInList(group_by_list); - QueryTreeNodes nullable_group_by_keys; if (scope.group_by_use_nulls) { for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes()) - nullable_group_by_keys.push_back(group_by_elem->clone()); + scope.nullable_group_by_keys.insert(group_by_elem); } - - resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); - - for (auto & nullable_group_by_key : nullable_group_by_keys) - scope.nullable_group_by_keys.insert(std::move(nullable_group_by_key)); } } @@ -7854,8 +7856,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.isCTE()) cte_in_resolve_process.insert(query_node_typed.getCTEName()); - const auto & settings = scope.context->getSettingsRef(); - bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube(); if (query_node_typed.isGroupByWithGroupingSets() @@ -8031,6 +8031,16 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.hasGroupBy()) resolveGroupByNode(query_node_typed, scope); + if (scope.group_by_use_nulls) + { + resolved_expressions.clear(); + /// Clone is needed cause aliases share subtrees. + /// If not clone, the same (shared) subtree could be resolved again with different (Nullable) type + /// See 03023_group_by_use_nulls_analyzer_crashes + for (auto & [_, node] : scope.alias_name_to_expression_node) + node = node->clone(); + } + if (query_node_typed.hasHaving()) resolveExpressionNode(query_node_typed.getHaving(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); @@ -8039,8 +8049,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.hasOrderBy()) { - if (settings.enable_positional_arguments) - replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope); + replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope); + + const auto & settings = scope.context->getSettingsRef(); expandOrderByAll(query_node_typed, settings); resolveSortNodeList(query_node_typed.getOrderByNode(), scope); @@ -8063,8 +8074,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.hasLimitBy()) { - if (settings.enable_positional_arguments) - replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope); + replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope); resolveExpressionNodeList(query_node_typed.getLimitByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); } diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index d7d11e9a5802..b801865c9a5a 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -29,7 +29,8 @@ NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) return {}; NamesAndTypes result; - for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren()) + const auto & group_by_elements = query_node->getGroupByNode()->getChildren(); + for (const auto & group_by_element : group_by_elements) { const auto & projection_columns = query_node->getProjectionColumns(); const auto & projection_nodes = query_node->getProjection().getNodes(); @@ -38,10 +39,18 @@ NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) for (size_t i = 0; i < projection_columns.size(); i++) { - if (projection_nodes[i]->isEqual(*group_by_ele)) + if (projection_nodes[i]->isEqual(*group_by_element)) + { result.push_back(projection_columns[i]); + break; + } } } + /// If some group by keys are not matched, we cannot apply optimization, + /// because prefix of group by keys may not be unique. + if (result.size() != group_by_elements.size()) + return {}; + return result; } diff --git a/src/Analyzer/QueryNode.cpp b/src/Analyzer/QueryNode.cpp index bc7a29247e44..f1361c328dbf 100644 --- a/src/Analyzer/QueryNode.cpp +++ b/src/Analyzer/QueryNode.cpp @@ -247,7 +247,7 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s } } -bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); @@ -266,7 +266,7 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const settings_changes == rhs_typed.settings_changes; } -void QueryNode::updateTreeHashImpl(HashState & state) const +void QueryNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(is_subquery); state.update(is_cte); diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h index 6f9067908dde..af187df72a80 100644 --- a/src/Analyzer/QueryNode.h +++ b/src/Analyzer/QueryNode.h @@ -589,9 +589,9 @@ class QueryNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState &) const override; + void updateTreeHashImpl(HashState &, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index df80f46b3cd1..a4f204724320 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -444,8 +444,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express nulls_sort_direction = order_by_element.nulls_direction == 1 ? SortDirection::ASCENDING : SortDirection::DESCENDING; std::shared_ptr collator; - if (order_by_element.collation) - collator = std::make_shared(order_by_element.collation->as().value.get()); + if (order_by_element.getCollation()) + collator = std::make_shared(order_by_element.getCollation()->as().value.get()); const auto & sort_expression_ast = order_by_element.children.at(0); auto sort_expression = buildExpression(sort_expression_ast, context); @@ -455,12 +455,12 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express std::move(collator), order_by_element.with_fill); - if (order_by_element.fill_from) - sort_node->getFillFrom() = buildExpression(order_by_element.fill_from, context); - if (order_by_element.fill_to) - sort_node->getFillTo() = buildExpression(order_by_element.fill_to, context); - if (order_by_element.fill_step) - sort_node->getFillStep() = buildExpression(order_by_element.fill_step, context); + if (order_by_element.getFillFrom()) + sort_node->getFillFrom() = buildExpression(order_by_element.getFillFrom(), context); + if (order_by_element.getFillTo()) + sort_node->getFillTo() = buildExpression(order_by_element.getFillTo(), context); + if (order_by_element.getFillStep()) + sort_node->getFillStep() = buildExpression(order_by_element.getFillStep(), context); list_node->getNodes().push_back(std::move(sort_node)); } @@ -558,7 +558,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co } else if (const auto * function = expression->as()) { - if (function->is_lambda_function) + if (function->is_lambda_function || isASTLambdaFunction(*function)) { const auto & lambda_arguments_and_expression = function->arguments->as().children; auto & lambda_arguments_tuple = lambda_arguments_and_expression.at(0)->as(); diff --git a/src/Analyzer/SortNode.cpp b/src/Analyzer/SortNode.cpp index 8e9913af442c..25c9c726d70d 100644 --- a/src/Analyzer/SortNode.cpp +++ b/src/Analyzer/SortNode.cpp @@ -71,7 +71,7 @@ void SortNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, si } } -bool SortNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool SortNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); if (sort_direction != rhs_typed.sort_direction || @@ -89,7 +89,7 @@ bool SortNode::isEqualImpl(const IQueryTreeNode & rhs) const return collator->getLocale() == rhs_typed.collator->getLocale(); } -void SortNode::updateTreeHashImpl(HashState & hash_state) const +void SortNode::updateTreeHashImpl(HashState & hash_state, CompareOptions) const { hash_state.update(sort_direction); /// use some determined value if `nulls_sort_direction` is `nullopt` @@ -120,17 +120,18 @@ ASTPtr SortNode::toASTImpl(const ConvertToASTOptions & options) const result->nulls_direction_was_explicitly_specified = nulls_sort_direction.has_value(); - result->with_fill = with_fill; - result->fill_from = hasFillFrom() ? getFillFrom()->toAST(options) : nullptr; - result->fill_to = hasFillTo() ? getFillTo()->toAST(options) : nullptr; - result->fill_step = hasFillStep() ? getFillStep()->toAST(options) : nullptr; result->children.push_back(getExpression()->toAST(options)); if (collator) - { - result->children.push_back(std::make_shared(Field(collator->getLocale()))); - result->collation = result->children.back(); - } + result->setCollation(std::make_shared(Field(collator->getLocale()))); + + result->with_fill = with_fill; + if (hasFillFrom()) + result->setFillFrom(getFillFrom()->toAST(options)); + if (hasFillTo()) + result->setFillTo(getFillTo()->toAST(options)); + if (hasFillStep()) + result->setFillStep(getFillStep()->toAST(options)); return result; } diff --git a/src/Analyzer/SortNode.h b/src/Analyzer/SortNode.h index b860fd19a900..4d1f6f7c0f0d 100644 --- a/src/Analyzer/SortNode.h +++ b/src/Analyzer/SortNode.h @@ -131,9 +131,9 @@ class SortNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp index e5158a06373c..87d2fdcffb52 100644 --- a/src/Analyzer/TableFunctionNode.cpp +++ b/src/Analyzer/TableFunctionNode.cpp @@ -82,7 +82,7 @@ void TableFunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_ } } -bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); if (table_function_name != rhs_typed.table_function_name) @@ -97,7 +97,7 @@ bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const return table_expression_modifiers == rhs_typed.table_expression_modifiers; } -void TableFunctionNode::updateTreeHashImpl(HashState & state) const +void TableFunctionNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(table_function_name.size()); state.update(table_function_name); diff --git a/src/Analyzer/TableFunctionNode.h b/src/Analyzer/TableFunctionNode.h index 69237ac84167..98121ef95c59 100644 --- a/src/Analyzer/TableFunctionNode.h +++ b/src/Analyzer/TableFunctionNode.h @@ -155,9 +155,9 @@ class TableFunctionNode : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/TableNode.cpp b/src/Analyzer/TableNode.cpp index f899c1ae6fe9..daf5db08551d 100644 --- a/src/Analyzer/TableNode.cpp +++ b/src/Analyzer/TableNode.cpp @@ -52,14 +52,14 @@ void TableNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s } } -bool TableNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool TableNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return storage_id == rhs_typed.storage_id && table_expression_modifiers == rhs_typed.table_expression_modifiers && temporary_table_name == rhs_typed.temporary_table_name; } -void TableNode::updateTreeHashImpl(HashState & state) const +void TableNode::updateTreeHashImpl(HashState & state, CompareOptions) const { if (!temporary_table_name.empty()) { diff --git a/src/Analyzer/TableNode.h b/src/Analyzer/TableNode.h index b0bf91fa01b9..2d66167acd15 100644 --- a/src/Analyzer/TableNode.h +++ b/src/Analyzer/TableNode.h @@ -100,9 +100,9 @@ class TableNode : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/UnionNode.cpp b/src/Analyzer/UnionNode.cpp index c60031265544..9bc2a197d9a6 100644 --- a/src/Analyzer/UnionNode.cpp +++ b/src/Analyzer/UnionNode.cpp @@ -145,7 +145,7 @@ void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s getQueriesNode()->dumpTreeImpl(buffer, format_state, indent + 4); } -bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); @@ -153,7 +153,7 @@ bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const union_mode == rhs_typed.union_mode; } -void UnionNode::updateTreeHashImpl(HashState & state) const +void UnionNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(is_subquery); state.update(is_cte); diff --git a/src/Analyzer/UnionNode.h b/src/Analyzer/UnionNode.h index 7686b73f5e06..189951f6375d 100644 --- a/src/Analyzer/UnionNode.h +++ b/src/Analyzer/UnionNode.h @@ -143,9 +143,9 @@ class UnionNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState &) const override; + void updateTreeHashImpl(HashState &, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 8ccf95deadc7..4299050a0949 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -760,4 +760,74 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty return function_node; } +/** Returns: + * {_, false} - multiple sources + * {nullptr, true} - no sources (for constants) + * {source, true} - single source + */ +std::pair getExpressionSourceImpl(const QueryTreeNodePtr & node) +{ + if (const auto * column = node->as()) + { + auto source = column->getColumnSourceOrNull(); + if (!source) + return {nullptr, false}; + return {source, true}; + } + + if (const auto * func = node->as()) + { + QueryTreeNodePtr source = nullptr; + const auto & args = func->getArguments().getNodes(); + for (const auto & arg : args) + { + auto [arg_source, is_ok] = getExpressionSourceImpl(arg); + if (!is_ok) + return {nullptr, false}; + + if (!source) + source = arg_source; + else if (arg_source && !source->isEqual(*arg_source)) + return {nullptr, false}; + } + return {source, true}; + + } + + if (node->as()) + return {nullptr, true}; + + return {nullptr, false}; +} + +QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node) +{ + auto [source, is_ok] = getExpressionSourceImpl(node); + if (!is_ok) + return nullptr; + return source; +} + +QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context) +{ + const auto & storage_snapshot = table_node->as()->getStorageSnapshot(); + auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary)); + size_t columns_to_select_size = columns_to_select.size(); + auto column_nodes_to_select = std::make_shared(); + column_nodes_to_select->getNodes().reserve(columns_to_select_size); + NamesAndTypes projection_columns; + projection_columns.reserve(columns_to_select_size); + for (auto & column : columns_to_select) + { + column_nodes_to_select->getNodes().emplace_back(std::make_shared(column, table_node)); + projection_columns.emplace_back(column.name, column.type); + } + auto subquery_for_table = std::make_shared(Context::createCopy(context)); + subquery_for_table->setIsSubquery(true); + subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select); + subquery_for_table->getJoinTree() = std::move(table_node); + subquery_for_table->resolveProjectionColumns(std::move(projection_columns)); + return subquery_for_table; +} + } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 8e32ef0464c1..93d439453c6c 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -105,4 +105,10 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); /// Wrap node into `_CAST` function QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); +/// Checks that node has only one source and returns it +QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node); + +/// Build subquery which we execute for `IN table` function. +QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context); + } diff --git a/src/Analyzer/WindowNode.cpp b/src/Analyzer/WindowNode.cpp index 0fbe7c51bc7c..defcdbb790fa 100644 --- a/src/Analyzer/WindowNode.cpp +++ b/src/Analyzer/WindowNode.cpp @@ -1,11 +1,9 @@ #include - -#include - -#include #include - +#include #include +#include +#include namespace DB { @@ -80,14 +78,14 @@ void WindowNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, } } -bool WindowNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool WindowNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return window_frame == rhs_typed.window_frame && parent_window_name == rhs_typed.parent_window_name; } -void WindowNode::updateTreeHashImpl(HashState & hash_state) const +void WindowNode::updateTreeHashImpl(HashState & hash_state, CompareOptions) const { hash_state.update(window_frame.is_default); hash_state.update(window_frame.type); diff --git a/src/Analyzer/WindowNode.h b/src/Analyzer/WindowNode.h index 30e1128b93c7..febbc02bedce 100644 --- a/src/Analyzer/WindowNode.h +++ b/src/Analyzer/WindowNode.h @@ -169,9 +169,9 @@ class WindowNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/tests/gtest_query_tree_node.cpp b/src/Analyzer/tests/gtest_query_tree_node.cpp index cf1ce78e4232..01556c9f9216 100644 --- a/src/Analyzer/tests/gtest_query_tree_node.cpp +++ b/src/Analyzer/tests/gtest_query_tree_node.cpp @@ -22,12 +22,12 @@ class SourceNode final : public IQueryTreeNode { } - bool isEqualImpl(const IQueryTreeNode &) const override + bool isEqualImpl(const IQueryTreeNode &, CompareOptions) const override { return true; } - void updateTreeHashImpl(HashState &) const override + void updateTreeHashImpl(HashState &, CompareOptions) const override { } diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index c71ce195388c..58dac0c0843a 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -1,22 +1,25 @@ +#include +#include #include #include -#include -#include #include #include +#include #include #include #include #include #include #include -#include #include #include +#include #include #include + +#include #include -#include + #include namespace fs = std::filesystem; @@ -122,7 +125,7 @@ BackupEntries BackupEntriesCollector::run() = BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num); /// Do renaming in the create queries according to the renaming config. - renaming_map = makeRenamingMapFromBackupQuery(backup_query_elements); + renaming_map = BackupUtils::makeRenamingMap(backup_query_elements); /// Calculate the root path for collecting backup entries, it's either empty or has the format "shards//replicas//". calculateRootPathInBackup(); @@ -567,17 +570,16 @@ std::vector> BackupEntriesCollector::findTablesInD checkIsQueryCancelled(); - auto filter_by_table_name = [my_database_info = &database_info](const String & table_name) + auto filter_by_table_name = [&](const String & table_name) { - /// We skip inner tables of materialized views. - if (table_name.starts_with(".inner_id.")) + if (BackupUtils::isInnerTable(database_name, table_name)) return false; - if (my_database_info->tables.contains(table_name)) + if (database_info.tables.contains(table_name)) return true; - if (my_database_info->all_tables) - return !my_database_info->except_table_names.contains(table_name); + if (database_info.all_tables) + return !database_info.except_table_names.contains(table_name); return false; }; diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 2063af2061cc..4b7e3d1e775a 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -124,11 +124,12 @@ BackupReaderS3::BackupReaderS3( bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, - const ContextPtr & context_) + const ContextPtr & context_, + bool is_internal_backup) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) { auto & request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context_->getSettingsRef()); @@ -214,11 +215,12 @@ BackupWriterS3::BackupWriterS3( const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, - const ContextPtr & context_) + const ContextPtr & context_, + bool is_internal_backup) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) { auto & request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context_->getSettingsRef()); diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index 57108d122ea5..f81eb975df3f 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -18,7 +18,15 @@ namespace DB class BackupReaderS3 : public BackupReaderDefault { public: - BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupReaderS3( + const S3::URI & s3_uri_, + const String & access_key_id_, + const String & secret_access_key_, + bool allow_s3_native_copy, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_, + bool is_internal_backup); ~BackupReaderS3() override; bool fileExists(const String & file_name) override; @@ -41,7 +49,16 @@ class BackupReaderS3 : public BackupReaderDefault class BackupWriterS3 : public BackupWriterDefault { public: - BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupWriterS3( + const S3::URI & s3_uri_, + const String & access_key_id_, + const String & secret_access_key_, + bool allow_s3_native_copy, + const String & storage_class_name, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_, + bool is_internal_backup); ~BackupWriterS3() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index 6efca053f059..fb448fb64adf 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -8,10 +8,10 @@ #include -namespace DB +namespace DB::BackupUtils { -DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements) +DDLRenamingMap makeRenamingMap(const ASTBackupQuery::Elements & elements) { DDLRenamingMap map; @@ -120,4 +120,15 @@ bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, con return compareRestoredTableDef(restored_database_create_query, create_query_from_backup, global_context); } +bool isInnerTable(const QualifiedTableName & table_name) +{ + return isInnerTable(table_name.database, table_name.table); +} + +bool isInnerTable(const String & /* database_name */, const String & table_name) +{ + /// We skip inner tables of materialized views. + return table_name.starts_with(".inner.") || table_name.starts_with(".inner_id."); +} + } diff --git a/src/Backups/BackupUtils.h b/src/Backups/BackupUtils.h index 7976de818e25..ba889eccc48e 100644 --- a/src/Backups/BackupUtils.h +++ b/src/Backups/BackupUtils.h @@ -9,9 +9,13 @@ namespace DB class IBackup; class AccessRightsElements; class DDLRenamingMap; +struct QualifiedTableName; + +namespace BackupUtils +{ /// Initializes a DDLRenamingMap from a BACKUP or RESTORE query. -DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements); +DDLRenamingMap makeRenamingMap(const ASTBackupQuery::Elements & elements); /// Returns access required to execute BACKUP query. AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements); @@ -20,4 +24,10 @@ AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context); bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context); +/// Returns true if this table should be skipped while making a backup because it's an inner table. +bool isInnerTable(const QualifiedTableName & table_name); +bool isInnerTable(const String & database_name, const String & table_name); + +} + } diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 96fe770227c8..bb04c72dd3c0 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -27,6 +27,8 @@ #include #include +#include + namespace CurrentMetrics { @@ -562,7 +564,7 @@ void BackupsWorker::doBackup( /// Checks access rights if this is not ON CLUSTER query. /// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.) - auto required_access = getRequiredAccessToBackup(backup_query->elements); + auto required_access = BackupUtils::getRequiredAccessToBackup(backup_query->elements); if (!on_cluster) context->checkAccess(required_access); @@ -940,6 +942,7 @@ void BackupsWorker::doRestore( backup_open_params.use_same_s3_credentials_for_base_backup = restore_settings.use_same_s3_credentials_for_base_backup; backup_open_params.read_settings = getReadSettingsForRestore(context); backup_open_params.write_settings = getWriteSettingsForRestore(context); + backup_open_params.is_internal_backup = restore_settings.internal; BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params); String current_database = context->getCurrentDatabase(); diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index ed1d5b8a103d..ec0a717cfcd6 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -24,6 +24,9 @@ #include #include #include + +#include + #include #include @@ -121,7 +124,7 @@ void RestorerFromBackup::run(Mode mode) restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); /// Do renaming in the create queries according to the renaming config. - renaming_map = makeRenamingMapFromBackupQuery(restore_query_elements); + renaming_map = BackupUtils::makeRenamingMap(restore_query_elements); /// Calculate the root path in the backup for restoring, it's either empty or has the format "shards//replicas//". findRootPathsInBackup(); @@ -343,12 +346,12 @@ void RestorerFromBackup::findDatabasesAndTablesInBackup() { case ASTBackupQuery::ElementType::TABLE: { - findTableInBackup({element.database_name, element.table_name}, element.partitions); + findTableInBackup({element.database_name, element.table_name}, /* skip_if_inner_table= */ false, element.partitions); break; } case ASTBackupQuery::ElementType::TEMPORARY_TABLE: { - findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, element.partitions); + findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, /* skip_if_inner_table= */ false, element.partitions); break; } case ASTBackupQuery::ElementType::DATABASE: @@ -367,14 +370,14 @@ void RestorerFromBackup::findDatabasesAndTablesInBackup() LOG_INFO(log, "Will restore {} databases and {} tables", getNumDatabases(), getNumTables()); } -void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions) +void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions) { schedule( - [this, table_name_in_backup, partitions]() { findTableInBackupImpl(table_name_in_backup, partitions); }, + [this, table_name_in_backup, skip_if_inner_table, partitions]() { findTableInBackupImpl(table_name_in_backup, skip_if_inner_table, partitions); }, "Restore_FindTbl"); } -void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, const std::optional & partitions) +void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions) { bool is_temporary_table = (table_name_in_backup.database == DatabaseCatalog::TEMPORARY_DATABASE); @@ -419,6 +422,10 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ = *root_path_in_use / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); } + QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); + if (skip_if_inner_table && BackupUtils::isInnerTable(table_name)) + return; + auto read_buffer = backup->readFile(*metadata_path); String create_query_str; readStringUntilEOF(create_query_str, *read_buffer); @@ -429,8 +436,6 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext()); String create_table_query_str = serializeAST(*create_table_query); - QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); - bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query); bool table_has_data = backup->hasFiles(data_path_in_backup); @@ -565,7 +570,7 @@ void RestorerFromBackup::findDatabaseInBackupImpl(const String & database_name_i if (except_table_names.contains({database_name_in_backup, table_name_in_backup})) continue; - findTableInBackup({database_name_in_backup, table_name_in_backup}, /* partitions= */ {}); + findTableInBackup({database_name_in_backup, table_name_in_backup}, /* skip_if_inner_table= */ true, /* partitions= */ {}); } } @@ -764,7 +769,7 @@ void RestorerFromBackup::checkDatabase(const String & database_name) ASTPtr existing_database_def = database->getCreateDatabaseQuery(); ASTPtr database_def_from_backup = database_info.create_database_query; - if (!compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext())) + if (!BackupUtils::compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext())) { throw Exception( ErrorCodes::CANNOT_RESTORE_DATABASE, @@ -935,7 +940,7 @@ void RestorerFromBackup::checkTable(const QualifiedTableName & table_name) { ASTPtr existing_table_def = database->getCreateTableQuery(resolved_id.table_name, context); ASTPtr table_def_from_backup = table_info.create_table_query; - if (!compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext())) + if (!BackupUtils::compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext())) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 238569ac7554..7b36eea0ba07 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -92,8 +92,8 @@ class RestorerFromBackup : private boost::noncopyable void findRootPathsInBackup(); void findDatabasesAndTablesInBackup(); - void findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions); - void findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, const std::optional & partitions); + void findTableInBackup(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions); + void findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions); void findDatabaseInBackup(const String & database_name_in_backup, const std::set & except_table_names); void findDatabaseInBackupImpl(const String & database_name_in_backup, const std::set & except_table_names); void findEverythingInBackup(const std::set & except_database_names, const std::set & except_table_names); diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index fed5c6b4d22c..c34dbe273f5e 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -110,7 +110,8 @@ void registerBackupEngineS3(BackupFactory & factory) params.allow_s3_native_copy, params.read_settings, params.write_settings, - params.context); + params.context, + params.is_internal_backup); return std::make_unique( params.backup_info, @@ -129,7 +130,8 @@ void registerBackupEngineS3(BackupFactory & factory) params.s3_storage_class, params.read_settings, params.write_settings, - params.context); + params.context, + params.is_internal_backup); return std::make_unique( params.backup_info, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 73aa409e9958..da17bc1f41f3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -85,7 +85,6 @@ add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) add_headers_and_sources(clickhouse_common_io Common/Scheduler) add_headers_and_sources(clickhouse_common_io Common/Scheduler/Nodes) -add_headers_and_sources(clickhouse_common_io Common/SSH) add_headers_and_sources(clickhouse_common_io IO) add_headers_and_sources(clickhouse_common_io IO/Archives) add_headers_and_sources(clickhouse_common_io IO/S3) @@ -99,7 +98,6 @@ add_headers_and_sources(clickhouse_compression Core) #Included these specific files to avoid linking grpc add_glob(clickhouse_compression_headers Server/ServerType.h) add_glob(clickhouse_compression_sources Server/ServerType.cpp) -add_headers_and_sources(clickhouse_compression Common/SSH) add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources}) @@ -370,8 +368,7 @@ if (TARGET ch_contrib::crc32-vpmsum) endif() if (TARGET ch_contrib::ssh) - target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh) - target_link_libraries(clickhouse_compression PUBLIC ch_contrib::ssh) + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh) endif() dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7a3192d1d9cf..d243a1d74b9e 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -329,12 +329,11 @@ void ClientBase::setupSignalHandler() } -ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const +ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error) { std::unique_ptr parser; ASTPtr res; - const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; if (!allow_multi_statements) @@ -343,11 +342,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const Dialect & dialect = settings.dialect; if (dialect == Dialect::kusto) - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); else if (dialect == Dialect::prql) parser = std::make_unique(max_length, settings.max_parser_depth, settings.max_parser_backtracks); else - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); if (is_interactive || ignore_error) { @@ -712,11 +711,20 @@ void ClientBase::adjustSettings() settings.input_format_values_allow_data_after_semicolon.changed = false; } - /// If pager is specified then output_format_pretty_max_rows is ignored, this should be handled by pager. - if (!pager.empty() && !global_context->getSettingsRef().output_format_pretty_max_rows.changed) + /// Do not limit pretty format output in case of --pager specified. + if (!pager.empty()) { - settings.output_format_pretty_max_rows = std::numeric_limits::max(); - settings.output_format_pretty_max_rows.changed = false; + if (!global_context->getSettingsRef().output_format_pretty_max_rows.changed) + { + settings.output_format_pretty_max_rows = std::numeric_limits::max(); + settings.output_format_pretty_max_rows.changed = false; + } + + if (!global_context->getSettingsRef().output_format_pretty_max_value_width.changed) + { + settings.output_format_pretty_max_value_width = std::numeric_limits::max(); + settings.output_format_pretty_max_value_width.changed = false; + } } global_context->setSettings(settings); @@ -907,7 +915,11 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// Some parts of a query (result output and formatting) are executed /// client-side. Thus we need to parse the query. const char * begin = full_query.data(); - auto parsed_query = parseQuery(begin, begin + full_query.size(), false); + auto parsed_query = parseQuery(begin, begin + full_query.size(), + global_context->getSettingsRef(), + /*allow_multi_statements=*/ false, + is_interactive, + ignore_error); if (!parsed_query) return; @@ -949,12 +961,8 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) processError(full_query); } - void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr parsed_query) { - if (fake_drop && parsed_query->as()) - return; - auto query = query_to_execute; /// Rewrite query only when we have query parameters. @@ -1964,7 +1972,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. - if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert_with_inlined_data) + if (insert && (!insert->select || input_function) && !is_async_insert_with_inlined_data) { if (input_function && insert->format.empty()) throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); @@ -2084,7 +2092,11 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( this_query_end = this_query_begin; try { - parsed_query = parseQuery(this_query_end, all_queries_end, true); + parsed_query = parseQuery(this_query_end, all_queries_end, + global_context->getSettingsRef(), + /*allow_multi_statements=*/ true, + is_interactive, + ignore_error); } catch (Exception & e) { diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 9ec87ababfc9..710a72a3238b 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -73,6 +73,7 @@ class ClientBase : public Poco::Util::Application, public IHints<2> void init(int argc, char ** argv); std::vector getAllRegisteredNames() const override { return cmd_options; } + static ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error); protected: void runInteractive(); @@ -98,7 +99,6 @@ class ClientBase : public Poco::Util::Application, public IHints<2> ASTPtr parsed_query, std::optional echo_query_ = {}, bool report_error = false); static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks); - ASTPtr parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const; static void setupSignalHandler(); bool executeMultiQuery(const String & all_queries_text); @@ -315,8 +315,6 @@ class ClientBase : public Poco::Util::Application, public IHints<2> QueryProcessingStage::Enum query_processing_stage; ClientInfo::QueryKind query_kind; - bool fake_drop = false; - struct HostAndPort { String host; diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 180942e6b838..4cc6c0b61af3 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -67,7 +67,7 @@ Connection::~Connection() = default; Connection::Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, - const ssh::SSHKey & ssh_private_key_, + [[maybe_unused]] const SSHKey & ssh_private_key_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -76,7 +76,9 @@ Connection::Connection(const String & host_, UInt16 port_, Protocol::Secure secure_) : host(host_), port(port_), default_database(default_database_) , user(user_), password(password_) +#if USE_SSH , ssh_private_key(ssh_private_key_) +#endif , quota_key(quota_key_) , cluster(cluster_) , cluster_secret(cluster_secret_) @@ -141,7 +143,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) async_callback(socket->impl()->sockfd(), connection_timeout, AsyncEventTimeoutType::CONNECT, description, AsyncTaskExecutor::READ | AsyncTaskExecutor::WRITE | AsyncTaskExecutor::ERROR); if (auto err = socket->impl()->socketError()) - socket->impl()->error(err); // Throws an exception + socket->impl()->error(err); // Throws an exception /// NOLINT(readability-static-accessed-through-instance) socket->setBlocking(true); } @@ -276,17 +278,6 @@ void Connection::disconnect() } -String Connection::packStringForSshSign(String challenge) -{ - String message; - message.append(std::to_string(DBMS_TCP_PROTOCOL_VERSION)); - message.append(default_database); - message.append(user); - message.append(challenge); - return message; -} - - void Connection::sendHello() { /** Disallow control characters in user controlled parameters @@ -334,10 +325,10 @@ void Connection::sendHello() #endif } #if USE_SSH - /// Just inform server that we will authenticate using SSH keys. else if (!ssh_private_key.isEmpty()) { - writeStringBinary(fmt::format("{}{}", EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER, user), *out); + /// Inform server that we will authenticate using SSH keys. + writeStringBinary(String(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) + user, *out); writeStringBinary(password, *out); performHandshakeForSSHAuth(); @@ -361,9 +352,9 @@ void Connection::sendAddendum() } +#if USE_SSH void Connection::performHandshakeForSSHAuth() { -#if USE_SSH String challenge; { writeVarUInt(Protocol::Client::SSHChallengeRequest, *out); @@ -388,12 +379,23 @@ void Connection::performHandshakeForSSHAuth() } writeVarUInt(Protocol::Client::SSHChallengeResponse, *out); - String to_sign = packStringForSshSign(challenge); + + auto pack_string_for_ssh_sign = [&](String challenge_) + { + String message; + message.append(std::to_string(DBMS_TCP_PROTOCOL_VERSION)); + message.append(default_database); + message.append(user); + message.append(challenge_); + return message; + }; + + String to_sign = pack_string_for_ssh_sign(challenge); String signature = ssh_private_key.signString(to_sign); writeStringBinary(signature, *out); out->next(); -#endif } +#endif void Connection::receiveHello(const Poco::Timespan & handshake_timeout) diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 5d0411027a1c..2cd325afed29 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -1,10 +1,9 @@ #pragma once - #include -#include #include +#include #include #include @@ -53,7 +52,7 @@ class Connection : public IServerConnection Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, - const ssh::SSHKey & ssh_private_key_, + const SSHKey & ssh_private_key_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -170,7 +169,9 @@ class Connection : public IServerConnection String default_database; String user; String password; - ssh::SSHKey ssh_private_key; +#if USE_SSH + SSHKey ssh_private_key; +#endif String quota_key; /// For inter-server authorization @@ -265,9 +266,10 @@ class Connection : public IServerConnection void connect(const ConnectionTimeouts & timeouts); void sendHello(); - String packStringForSshSign(String challenge); +#if USE_SSH void performHandshakeForSSHAuth(); +#endif void sendAddendum(); void receiveHello(const Poco::Timespan & handshake_timeout); diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 16911f97e84a..774f3375f63f 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -1,11 +1,10 @@ #include "ConnectionParameters.h" -#include + #include #include #include #include #include -#include #include #include #include @@ -88,19 +87,19 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } else { - std::string prompt{"Enter your private key passphrase (leave empty for no passphrase): "}; + std::string prompt{"Enter your SSH private key passphrase (leave empty for no passphrase): "}; char buf[1000] = {}; if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) passphrase = result; } - ssh::SSHKey key = ssh::SSHKeyFactory::makePrivateFromFile(filename, passphrase); + SSHKey key = SSHKeyFactory::makePrivateKeyFromFile(filename, passphrase); if (!key.isPrivate()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Found public key in file: {} but expected private", filename); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "File {} did not contain a private key (is it a public key?)", filename); ssh_private_key = std::move(key); #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index 5f375f09c83f..f23522d48b3b 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -1,9 +1,10 @@ #pragma once -#include +#include #include #include -#include + +#include namespace Poco::Util { @@ -20,7 +21,7 @@ struct ConnectionParameters std::string user; std::string password; std::string quota_key; - ssh::SSHKey ssh_private_key; + SSHKey ssh_private_key; Protocol::Secure security = Protocol::Secure::Disable; Protocol::Compression compression = Protocol::Compression::Enable; ConnectionTimeouts timeouts; diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index 574c4992d752..d35c25524616 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -123,7 +123,7 @@ class ConnectionPool : public IConnectionPool, private PoolBase { return std::make_shared( host, port, - default_database, user, password, ssh::SSHKey(), quota_key, + default_database, user, password, SSHKey(), quota_key, cluster, cluster_secret, client_name, compression, secure); } diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 11bdb6108caf..fb895d17763f 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -207,11 +207,17 @@ std::vector ConnectionPoolWithFailover::g max_entries = nested_pools.size(); } else if (pool_mode == PoolMode::GET_ONE) + { max_entries = 1; + } else if (pool_mode == PoolMode::GET_MANY) + { max_entries = settings.max_parallel_replicas; + } else + { throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode"); + } if (!priority_func) priority_func = makeGetPriorityFunc(settings); diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index f5b074a02579..0fa2bc129242 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -82,7 +82,7 @@ std::vector HedgedConnectionsFactory::getManyConnections(PoolMode } case PoolMode::GET_MANY: { - max_entries = max_parallel_replicas; + max_entries = std::min(max_parallel_replicas, shuffled_pools.size()); break; } } diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index 2b7ec3f3fe56..c5bcbdf0689c 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -158,7 +158,7 @@ class HedgedConnectionsFactory /// checking the number of requested replicas that are still in process). size_t requested_connections_count = 0; - const size_t max_parallel_replicas = 0; + const size_t max_parallel_replicas = 1; const bool skip_unavailable_shards = false; }; diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index afcaa4d60985..c7494e316057 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -1,11 +1,18 @@ #include "LocalConnection.h" +#include +#include #include #include #include +#include #include #include #include #include +#include +#include +#include +#include #include #include #include @@ -22,12 +29,13 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool send_profile_events_, const String & server_display_name_) +LocalConnection::LocalConnection(ContextPtr context_, ReadBuffer * in_, bool send_progress_, bool send_profile_events_, const String & server_display_name_) : WithContext(context_) , session(getContext(), ClientInfo::Interface::LOCAL) , send_progress(send_progress_) , send_profile_events(send_profile_events_) , server_display_name(server_display_name_) + , in(in_) { /// Authenticate and create a context to execute queries. session.authenticate("default", "", Poco::Net::SocketAddress{}); @@ -130,6 +138,57 @@ void LocalConnection::sendQuery( next_packet_type.reset(); + /// Prepare input() function + query_context->setInputInitializer([this] (ContextPtr context, const StoragePtr & input_storage) + { + if (context != query_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in Input initializer"); + + auto metadata_snapshot = input_storage->getInMemoryMetadataPtr(); + Block sample = metadata_snapshot->getSampleBlock(); + + next_packet_type = Protocol::Server::Data; + state->block = sample; + + String current_format = "Values"; + const char * begin = state->query.data(); + auto parsed_query = ClientBase::parseQuery(begin, begin + state->query.size(), + context->getSettingsRef(), + /*allow_multi_statements=*/ false, + /*is_interactive=*/ false, + /*ignore_error=*/ false); + if (const auto * insert = parsed_query->as()) + { + if (!insert->format.empty()) + current_format = insert->format; + } + + auto source = context->getInputFormat(current_format, *in, sample, context->getSettingsRef().max_insert_block_size); + Pipe pipe(source); + + auto columns_description = metadata_snapshot->getColumns(); + if (columns_description.hasDefaults()) + { + pipe.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns_description, *source, context); + }); + } + + state->input_pipeline = std::make_unique(std::move(pipe)); + state->input_pipeline_executor = std::make_unique(*state->input_pipeline); + + }); + query_context->setInputBlocksReaderCallback([this] (ContextPtr context) -> Block + { + if (context != query_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in InputBlocksReader"); + + Block block; + state->input_pipeline_executor->pull(block); + return block; + }); + try { state->io = executeQuery(state->query, query_context, QueryFlags{}, state->stage).second; @@ -537,11 +596,12 @@ void LocalConnection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & ServerConnectionPtr LocalConnection::createConnection( const ConnectionParameters &, ContextPtr current_context, + ReadBuffer * in, bool send_progress, bool send_profile_events, const String & server_display_name) { - return std::make_unique(current_context, send_progress, send_profile_events, server_display_name); + return std::make_unique(current_context, in, send_progress, send_profile_events, server_display_name); } diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 9c2d0a81d8d5..a23450709adf 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -15,6 +15,8 @@ namespace DB class PullingAsyncPipelineExecutor; class PushingAsyncPipelineExecutor; class PushingPipelineExecutor; +class QueryPipeline; +class ReadBuffer; /// State of query processing. struct LocalQueryState @@ -31,6 +33,10 @@ struct LocalQueryState std::unique_ptr executor; std::unique_ptr pushing_executor; std::unique_ptr pushing_async_executor; + /// For sending data for input() function. + std::unique_ptr input_pipeline; + std::unique_ptr input_pipeline_executor; + InternalProfileEventsQueuePtr profile_queue; std::unique_ptr exception; @@ -64,7 +70,11 @@ class LocalConnection : public IServerConnection, WithContext { public: explicit LocalConnection( - ContextPtr context_, bool send_progress_ = false, bool send_profile_events_ = false, const String & server_display_name_ = ""); + ContextPtr context_, + ReadBuffer * in_, + bool send_progress_, + bool send_profile_events_, + const String & server_display_name_); ~LocalConnection() override; @@ -73,6 +83,7 @@ class LocalConnection : public IServerConnection, WithContext static ServerConnectionPtr createConnection( const ConnectionParameters & connection_parameters, ContextPtr current_context, + ReadBuffer * in = nullptr, bool send_progress = false, bool send_profile_events = false, const String & server_display_name = ""); @@ -158,5 +169,7 @@ class LocalConnection : public IServerConnection, WithContext String current_database; ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; + + ReadBuffer * in; }; } diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 801aa8a91bb6..f7e6b1a1ccc9 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -1,7 +1,13 @@ #include +#include #include #include +#include +#include +#include +#include +#include #include #include #include @@ -11,10 +17,6 @@ #include #include #include -#include -#include -#include -#include namespace DB @@ -109,6 +111,11 @@ ConstArenas concatArenas(const ConstArenas & array, ConstArenaPtr arena) } +std::string ColumnAggregateFunction::getName() const +{ + return "AggregateFunction(" + func->getName() + ")"; +} + MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr column) { /** If the aggregate function returns an unfinalized/unfinished state, diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index ae7c5f0b54e5..a75b27e835c2 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -1,17 +1,9 @@ #pragma once -#include - +#include #include -#include - #include - -#include -#include -#include - -#include +#include namespace DB { @@ -26,6 +18,12 @@ using ArenaPtr = std::shared_ptr; using ConstArenaPtr = std::shared_ptr; using ConstArenas = std::vector; +class Context; +using ContextPtr = std::shared_ptr; + +struct ColumnWithTypeAndName; +using ColumnsWithTypeAndName = std::vector; + /** Column of states of aggregate functions. * Presented as an array of pointers to the states of aggregate functions (data). @@ -121,7 +119,7 @@ class ColumnAggregateFunction final : public COWHelpergetName() + ")"; } + std::string getName() const override; const char * getFamilyName() const override { return "AggregateFunction"; } TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; } diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 0a4f90c22625..90ef974010cb 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -940,7 +940,7 @@ void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo & if (nested_node) { /// Find any leaf of Nested subcolumn. - const auto * leaf = subcolumns.findLeaf(nested_node, [&](const auto &) { return true; }); + const auto * leaf = Subcolumns::findLeaf(nested_node, [&](const auto &) { return true; }); assert(leaf); /// Recreate subcolumn with default values and the same sizes of arrays. @@ -983,7 +983,7 @@ const ColumnObject::Subcolumns::Node * ColumnObject::getLeafOfTheSameNested(cons while (current_node) { /// Try to find the first Nested up to the current node. - const auto * node_nested = subcolumns.findParent(current_node, + const auto * node_nested = Subcolumns::findParent(current_node, [](const auto & candidate) { return candidate.isNested(); }); if (!node_nested) @@ -993,7 +993,7 @@ const ColumnObject::Subcolumns::Node * ColumnObject::getLeafOfTheSameNested(cons /// for the last rows. /// If there are no leaves, skip current node and find /// the next node up to the current. - leaf = subcolumns.findLeaf(node_nested, + leaf = Subcolumns::findLeaf(node_nested, [&](const auto & candidate) { return candidate.data.size() > old_size; diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index a06235a45001..e2936b27994f 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -1,12 +1,10 @@ #pragma once +#include #include #include -#include -#include -#include -#include #include +#include #include diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index b9a173fd92c5..3c08ebbf8b45 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -346,7 +346,7 @@ ColumnPtr ColumnSparse::filter(const Filter & filt, ssize_t) const } auto res_values = values->filter(values_filter, values_result_size_hint); - return this->create(res_values, std::move(res_offsets), res_offset); + return create(res_values, std::move(res_offsets), res_offset); } void ColumnSparse::expand(const Filter & mask, bool inverted) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 0b9be18c84ee..ab54b180fbfc 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -671,7 +671,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) ReadableSize(rss), ReadableSize(difference)); - total_memory_tracker.setRSS(rss, free_memory_in_allocator_arenas); + MemoryTracker::setRSS(rss, free_memory_in_allocator_arenas); } } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index af609fabb8f3..97a339b2bace 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -597,6 +597,7 @@ M(716, CANNOT_FORGET_PARTITION) \ M(717, EXPERIMENTAL_FEATURE_ERROR) \ M(718, TOO_SLOW_PARSING) \ + M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ diff --git a/src/Common/FileChecker.cpp b/src/Common/FileChecker.cpp index 098ea4b1ac46..5ecbe44530bc 100644 --- a/src/Common/FileChecker.cpp +++ b/src/Common/FileChecker.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace fs = std::filesystem; diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index cd2505df7f35..167aeee68f33 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -83,17 +84,15 @@ namespace } - size_t roundUp(size_t x, size_t rounding) + constexpr size_t roundUp(size_t x, size_t rounding) { chassert(rounding > 0); - return (x + (rounding - 1)) / rounding * rounding; - } - - - Poco::Timespan divide(const Poco::Timespan span, int divisor) - { - return Poco::Timespan(Poco::Timestamp::TimeDiff(span.totalMicroseconds() / divisor)); + return (x + rounding) / rounding * rounding; } + static_assert(roundUp(10000, 100) == 10100); + static_assert(roundUp(10001, 100) == 10100); + static_assert(roundUp(10099, 100) == 10100); + static_assert(roundUp(10100, 100) == 10200); } namespace DB @@ -202,8 +201,9 @@ class ConnectionGroup if (total_connections_in_group >= limits.warning_limit && total_connections_in_group >= mute_warning_until) { - LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}", type, total_connections_in_group, limits.warning_limit); - mute_warning_until = roundUp(total_connections_in_group, limits.warning_step); + mute_warning_until = roundUp(total_connections_in_group, HTTPConnectionPools::Limits::warning_step); + LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}, next warning at {}", + type, total_connections_in_group, limits.warning_limit, mute_warning_until); } } @@ -213,7 +213,8 @@ class ConnectionGroup --total_connections_in_group; - const size_t reduced_warning_limit = limits.warning_limit > 10 ? limits.warning_limit - 10 : 1; + const size_t gap = 20; + const size_t reduced_warning_limit = limits.warning_limit > gap ? limits.warning_limit - gap : 1; if (mute_warning_until > 0 && total_connections_in_group < reduced_warning_limit) { LOG_WARNING(log, "Sessions count is OK in the group {}, count {}", type, total_connections_in_group); @@ -273,9 +274,15 @@ class EndpointConnectionPool : public std::enable_shared_from_this; + using Session::mustReconnect; + + void markAsExpired() + { + isExpired = true; + } + void reconnect() override { - ProfileEvents::increment(metrics.reset); Session::close(); if (auto lock = pool.lock()) @@ -283,6 +290,7 @@ class EndpointConnectionPool : public std::enable_shared_from_thisgetConnection(timeouts); Session::assign(*new_connection); + Session::setKeepAliveRequest(Session::getKeepAliveRequest() + 1); } else { @@ -304,6 +312,12 @@ class EndpointConnectionPool : public std::enable_shared_from_thisatConnectionDestroy(*this); - else - ProfileEvents::increment(metrics.reset); + group->atConnectionDestroy(); + + if (!isExpired) + if (auto lock = pool.lock()) + lock->atConnectionDestroy(*this); CurrentMetrics::sub(metrics.active_count); } @@ -404,10 +420,18 @@ class EndpointConnectionPool : public std::enable_shared_from_this - explicit PooledConnection(EndpointConnectionPool::WeakPtr pool_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args) - : Session(args...), pool(std::move(pool_)), metrics(std::move(metrics_)) + explicit PooledConnection( + EndpointConnectionPool::WeakPtr pool_, + ConnectionGroup::Ptr group_, + IHTTPConnectionPoolForEndpoint::Metrics metrics_, + Args &&... args) + : Session(std::forward(args)...) + , pool(std::move(pool_)) + , group(group_) + , metrics(std::move(metrics_)) { CurrentMetrics::add(metrics.active_count); + group->atConnectionCreate(); } template @@ -433,10 +457,12 @@ class EndpointConnectionPool : public std::enable_shared_from_this expired_connections; SCOPE_EXIT({ @@ -494,8 +519,9 @@ class EndpointConnectionPool : public std::enable_shared_from_this expired_connections; SCOPE_EXIT({ @@ -535,25 +560,29 @@ class EndpointConnectionPool : public std::enable_shared_from_this & expired_connections, Poco::Timestamp now) TSA_REQUIRES(mutex) + size_t wipeExpiredImpl(std::vector & expired_connections) TSA_REQUIRES(mutex) { + SCOPE_EXIT({ + CurrentMetrics::sub(getMetrics().stored_count, expired_connections.size()); + ProfileEvents::increment(getMetrics().expired, expired_connections.size()); + }); + + auto isSoftLimitReached = group->isSoftLimitReached(); while (!stored_connections.empty()) { auto connection = stored_connections.top(); - if (!isExpired(now, connection)) + if (!isExpired(connection, isSoftLimitReached)) return stored_connections.size(); stored_connections.pop(); + connection->markAsExpired(); expired_connections.push_back(connection); } - CurrentMetrics::sub(getMetrics().stored_count, expired_connections.size()); - ProfileEvents::increment(getMetrics().expired, expired_connections.size()); - return stored_connections.size(); } @@ -569,57 +598,53 @@ class EndpointConnectionPool : public std::enable_shared_from_thisisSoftLimitReached()) - return now > (connection->getLastRequest() + divide(connection->getKeepAliveTimeout(), 10)); - return now > connection->getLastRequest() + connection->getKeepAliveTimeout(); + if (isSoftLimitReached) + return connection->isKeepAliveExpired(0.1); + return connection->isKeepAliveExpired(0.8); } - ConnectionPtr allocateNewConnection() + + ConnectionPtr prepareNewConnection(const ConnectionTimeouts & timeouts) { - ConnectionPtr connection = PooledConnection::create(this->getWeakFromThis(), getMetrics(), host, port); + auto connection = PooledConnection::create(this->getWeakFromThis(), group, getMetrics(), host, port); + connection->setKeepAlive(true); + setTimeouts(*connection, timeouts); if (!proxy_configuration.isEmpty()) { connection->setProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration)); } - group->atConnectionCreate(); - - return connection; - } - - ConnectionPtr prepareNewConnection(const ConnectionTimeouts & timeouts) - { auto address = HostResolversPool::instance().getResolver(host)->resolve(); - - auto session = allocateNewConnection(); - - setTimeouts(*session, timeouts); - session->setResolvedHost(*address); + connection->setResolvedHost(*address); try { auto timer = CurrentThread::getProfileEvents().timer(getMetrics().elapsed_microseconds); - session->doConnect(); + connection->doConnect(); } catch (...) { address.setFail(); ProfileEvents::increment(getMetrics().errors); - session->reset(); + connection->reset(); throw; } ProfileEvents::increment(getMetrics().created); - return session; + return connection; } void atConnectionDestroy(PooledConnection & connection) { - group->atConnectionDestroy(); + if (connection.getKeepAliveRequest() >= connection.getKeepAliveMaxRequests()) + { + ProfileEvents::increment(getMetrics().expired, 1); + return; + } if (!connection.connected() || connection.mustReconnect() || !connection.isCompleted() || connection.buffered() || group->isStoreLimitReached()) @@ -628,17 +653,17 @@ class EndpointConnectionPool : public std::enable_shared_from_thisgetWeakFromThis(), group, getMetrics(), host, port); connection_to_store->assign(connection); - CurrentMetrics::add(getMetrics().stored_count, 1); - ProfileEvents::increment(getMetrics().preserved, 1); - { MemoryTrackerSwitcher switcher{&total_memory_tracker}; std::lock_guard lock(mutex); stored_connections.push(connection_to_store); } + + CurrentMetrics::add(getMetrics().stored_count, 1); + ProfileEvents::increment(getMetrics().preserved, 1); } @@ -726,14 +751,13 @@ createConnectionPool(ConnectionGroup::Ptr group, std::string host, UInt16 port, class HTTPConnectionPools::Impl { private: - const size_t DEFAULT_WIPE_TIMEOUT_SECONDS = 5 * 60; + const size_t DEFAULT_WIPE_TIMEOUT_SECONDS = 10 * 60; const Poco::Timespan wipe_timeout = Poco::Timespan(DEFAULT_WIPE_TIMEOUT_SECONDS, 0); ConnectionGroup::Ptr disk_group = std::make_shared(HTTPConnectionGroupType::DISK); ConnectionGroup::Ptr storage_group = std::make_shared(HTTPConnectionGroupType::STORAGE); ConnectionGroup::Ptr http_group = std::make_shared(HTTPConnectionGroupType::HTTP); - /// If multiple mutexes are held simultaneously, /// they should be locked in this order: /// HTTPConnectionPools::mutex, then EndpointConnectionPool::mutex, then ConnectionGroup::mutex. diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h index 5f4cb3968228..dc601bf13198 100644 --- a/src/Common/HashTable/HashMap.h +++ b/src/Common/HashTable/HashMap.h @@ -207,7 +207,7 @@ class HashMapTable : public HashTable void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) { DB::PrefetchingHelper prefetching; - size_t prefetch_look_ahead = prefetching.getInitialLookAheadValue(); + size_t prefetch_look_ahead = DB::PrefetchingHelper::getInitialLookAheadValue(); size_t i = 0; auto prefetch_it = advanceIterator(this->begin(), prefetch_look_ahead); @@ -216,10 +216,10 @@ class HashMapTable : public HashTable { if constexpr (prefetch) { - if (i == prefetching.iterationsToMeasure()) + if (i == DB::PrefetchingHelper::iterationsToMeasure()) { prefetch_look_ahead = prefetching.calcPrefetchLookAhead(); - prefetch_it = advanceIterator(prefetch_it, prefetch_look_ahead - prefetching.getInitialLookAheadValue()); + prefetch_it = advanceIterator(prefetch_it, prefetch_look_ahead - DB::PrefetchingHelper::getInitialLookAheadValue()); } if (prefetch_it != end) diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 61d4d7d609c6..ca79b9433b59 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -211,23 +211,13 @@ void Timer::cleanup() #endif template -QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_type, UInt32 period, int pause_signal_) +QueryProfilerBase::QueryProfilerBase([[maybe_unused]] UInt64 thread_id, [[maybe_unused]] int clock_type, [[maybe_unused]] UInt32 period, [[maybe_unused]] int pause_signal_) : log(getLogger("QueryProfiler")) , pause_signal(pause_signal_) { #if defined(SANITIZER) - UNUSED(thread_id); - UNUSED(clock_type); - UNUSED(period); - UNUSED(pause_signal); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); #elif defined(__APPLE__) - UNUSED(thread_id); - UNUSED(clock_type); - UNUSED(period); - UNUSED(pause_signal); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work on OSX"); #else /// Sanity check. @@ -261,6 +251,20 @@ QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_t #endif } + +template +void QueryProfilerBase::setPeriod([[maybe_unused]] UInt32 period_) +{ +#if defined(SANITIZER) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); +#elif defined(__APPLE__) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work on OSX"); +#else + timer.set(period_); +#endif + +} + template QueryProfilerBase::~QueryProfilerBase() { diff --git a/src/Common/QueryProfiler.h b/src/Common/QueryProfiler.h index 254b11137ccb..ea4cc73bca63 100644 --- a/src/Common/QueryProfiler.h +++ b/src/Common/QueryProfiler.h @@ -57,6 +57,8 @@ class QueryProfilerBase QueryProfilerBase(UInt64 thread_id, int clock_type, UInt32 period, int pause_signal_); ~QueryProfilerBase(); + void setPeriod(UInt32 period_); + private: void cleanup(); diff --git a/src/Common/SSH/Wrappers.cpp b/src/Common/SSHWrapper.cpp similarity index 66% rename from src/Common/SSH/Wrappers.cpp rename to src/Common/SSHWrapper.cpp index a9b9f758c6e3..0ed266f215cd 100644 --- a/src/Common/SSH/Wrappers.cpp +++ b/src/Common/SSHWrapper.cpp @@ -1,4 +1,5 @@ -#include +#include + # if USE_SSH # include @@ -10,6 +11,14 @@ # pragma clang diagnostic pop +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LIBSSH_ERROR; +} + namespace { @@ -18,17 +27,19 @@ class SSHString public: explicit SSHString(std::string_view input) { - string = ssh_string_new(input.size()); - ssh_string_fill(string, input.data(), input.size()); + if (string = ssh_string_new(input.size()); string == nullptr) + throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't create SSHString"); + if (int rc = ssh_string_fill(string, input.data(), input.size()); rc != SSH_OK) + throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't create SSHString"); } - explicit SSHString(ssh_string c_other) { string = c_other; } + explicit SSHString(ssh_string other) { string = other; } ssh_string get() { return string; } String toString() { - return String(ssh_string_get_char(string), ssh_string_len(string)); + return {ssh_string_get_char(string), ssh_string_len(string)}; } ~SSHString() @@ -42,46 +53,28 @@ class SSHString } -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LIBSSH_ERROR; -} - -namespace ssh -{ - -SSHKey SSHKeyFactory::makePrivateFromFile(String filename, String passphrase) +SSHKey SSHKeyFactory::makePrivateKeyFromFile(String filename, String passphrase) { ssh_key key; - int rc = ssh_pki_import_privkey_file(filename.c_str(), passphrase.c_str(), nullptr, nullptr, &key); - if (rc != SSH_OK) - { + if (int rc = ssh_pki_import_privkey_file(filename.c_str(), passphrase.c_str(), nullptr, nullptr, &key); rc != SSH_OK) throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't import SSH private key from file"); - } return SSHKey(key); } -SSHKey SSHKeyFactory::makePublicFromFile(String filename) +SSHKey SSHKeyFactory::makePublicKeyFromFile(String filename) { ssh_key key; - int rc = ssh_pki_import_pubkey_file(filename.c_str(), &key); - if (rc != SSH_OK) + if (int rc = ssh_pki_import_pubkey_file(filename.c_str(), &key); rc != SSH_OK) throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't import SSH public key from file"); - return SSHKey(key); } -SSHKey SSHKeyFactory::makePublicFromBase64(String base64_key, String type_name) +SSHKey SSHKeyFactory::makePublicKeyFromBase64(String base64_key, String type_name) { ssh_key key; auto key_type = ssh_key_type_from_name(type_name.c_str()); - int rc = ssh_pki_import_pubkey_base64(base64_key.c_str(), key_type, &key); - if (rc != SSH_OK) + if (int rc = ssh_pki_import_pubkey_base64(base64_key.c_str(), key_type, &key); rc != SSH_OK) throw Exception(ErrorCodes::LIBSSH_ERROR, "Bad SSH public key provided"); - return SSHKey(key); } @@ -90,6 +83,12 @@ SSHKey::SSHKey(const SSHKey & other) key = ssh_key_dup(other.key); } +SSHKey::SSHKey(SSHKey && other) noexcept +{ + key = other.key; + other.key = nullptr; +} + SSHKey & SSHKey::operator=(const SSHKey & other) { ssh_key_free(key); @@ -119,13 +118,11 @@ bool SSHKey::isEqual(const SSHKey & other) const String SSHKey::signString(std::string_view input) const { SSHString input_str(input); - ssh_string c_output = nullptr; - int rc = pki_sign_string(key, input_str.get(), &c_output); - if (rc != SSH_OK) + ssh_string output = nullptr; + if (int rc = pki_sign_string(key, input_str.get(), &output); rc != SSH_OK) throw Exception(ErrorCodes::LIBSSH_ERROR, "Error singing with ssh key"); - - SSHString output(c_output); - return output.toString(); + SSHString output_str(output); + return output_str.toString(); } bool SSHKey::verifySignature(std::string_view signature, std::string_view original) const @@ -149,18 +146,15 @@ namespace { struct CStringDeleter { - [[maybe_unused]] void operator()(char * ptr) const { std::free(ptr); } + void operator()(char * ptr) const { std::free(ptr); } }; } String SSHKey::getBase64() const { char * buf = nullptr; - int rc = ssh_pki_export_pubkey_base64(key, &buf); - - if (rc != SSH_OK) + if (int rc = ssh_pki_export_pubkey_base64(key, &buf); rc != SSH_OK) throw DB::Exception(DB::ErrorCodes::LIBSSH_ERROR, "Failed to export public key to base64"); - /// Create a String from cstring, which makes a copy of the first one and requires freeing memory after it /// This is to safely manage buf memory std::unique_ptr buf_ptr(buf); @@ -177,7 +171,6 @@ SSHKey::~SSHKey() ssh_key_free(key); // it's safe free from libssh } -} } #endif diff --git a/src/Common/SSH/Wrappers.h b/src/Common/SSHWrapper.h similarity index 73% rename from src/Common/SSH/Wrappers.h rename to src/Common/SSHWrapper.h index 699bba2b0424..b6f0c577edcd 100644 --- a/src/Common/SSH/Wrappers.h +++ b/src/Common/SSHWrapper.h @@ -1,20 +1,18 @@ #pragma once + #include + +#include +#include + #include "config.h" -#if USE_SSH -# include -# include +#if USE_SSH using ssh_key = struct ssh_key_struct *; namespace DB { -namespace ssh -{ - -class SSHKeyFactory; - class SSHKey { public: @@ -22,11 +20,7 @@ class SSHKey ~SSHKey(); SSHKey(const SSHKey & other); - SSHKey(SSHKey && other) noexcept - { - key = other.key; - other.key = nullptr; - } + SSHKey(SSHKey && other) noexcept; SSHKey & operator=(const SSHKey & other); SSHKey & operator=(SSHKey && other) noexcept; @@ -43,7 +37,7 @@ class SSHKey String getBase64() const; String getKeyType() const; - friend SSHKeyFactory; + friend class SSHKeyFactory; private: explicit SSHKey(ssh_key key_) : key(key_) { } ssh_key key = nullptr; @@ -56,17 +50,14 @@ class SSHKeyFactory /// The check whether the path is allowed to read for ClickHouse has /// (e.g. a file is inside `user_files` directory) /// to be done outside of this functions. - static SSHKey makePrivateFromFile(String filename, String passphrase); - static SSHKey makePublicFromFile(String filename); - static SSHKey makePublicFromBase64(String base64_key, String type_name); + static SSHKey makePrivateKeyFromFile(String filename, String passphrase); + static SSHKey makePublicKeyFromFile(String filename); + static SSHKey makePublicKeyFromBase64(String base64_key, String type_name); }; -} } #else -namespace ssh -{ class SSHKey { public: @@ -74,5 +65,4 @@ class SSHKey [[ noreturn ]] bool isEmpty() { std::terminate(); } [[ noreturn ]] String signString(std::string_view) const { std::terminate(); } }; -} #endif diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 436b85ff30ba..4200161f8e80 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -18,13 +19,10 @@ #include #include #include -#include #include #include #include -#include "config.h" - #include #if defined(OS_DARWIN) @@ -366,7 +364,7 @@ String demangleAndCollapseNames(std::optional file, const char if (file.has_value()) { std::string_view file_copy = file.value(); - if (auto trim_pos = file_copy.find_last_of('/'); trim_pos != file_copy.npos) + if (auto trim_pos = file_copy.find_last_of('/'); trim_pos != std::string_view::npos) file_copy.remove_suffix(file_copy.size() - trim_pos); if (file_copy.ends_with("functional")) return "?"; @@ -481,7 +479,15 @@ void StackTrace::toStringEveryLine(void ** frame_pointers_raw, size_t offset, si toStringEveryLineImpl(true, {frame_pointers, offset, size}, std::move(callback)); } -using StackTraceCache = std::map>; +struct CacheEntry +{ + std::mutex mutex; + std::optional stacktrace_string; +}; + +using CacheEntryPtr = std::shared_ptr; + +using StackTraceCache = std::map>; static StackTraceCache & cacheInstance() { @@ -489,27 +495,47 @@ static StackTraceCache & cacheInstance() return cache; } -static std::mutex stacktrace_cache_mutex; +static DB::SharedMutex stacktrace_cache_mutex; String toStringCached(const StackTrace::FramePointers & pointers, size_t offset, size_t size) { + const StackTraceRefTriple key{pointers, offset, size}; + /// Calculation of stack trace text is extremely slow. - /// We use simple cache because otherwise the server could be overloaded by trash queries. + /// We use cache because otherwise the server could be overloaded by trash queries. /// Note that this cache can grow unconditionally, but practically it should be small. - std::lock_guard lock{stacktrace_cache_mutex}; - StackTraceCache & cache = cacheInstance(); - const StackTraceRefTriple key{pointers, offset, size}; + CacheEntryPtr cache_entry; - if (auto it = cache.find(key); it != cache.end()) - return it->second; - else + // Optimistic try for cache hit to avoid any contention whatsoever, should be the main hot code route + { + std::shared_lock read_lock{stacktrace_cache_mutex}; + if (auto it = cache.find(key); it != cache.end()) + cache_entry = it->second; + } + + // Create a new entry in case of a cache miss + if (!cache_entry) + { + std::unique_lock write_lock{stacktrace_cache_mutex}; + + // We should recheck because `shared_lock` was released before we acquired `write_lock` + if (auto it = cache.find(key); it != cache.end()) + cache_entry = it->second; // Another thread managed to created this entry before us + else + cache_entry = cache.emplace(StackTraceTriple{pointers, offset, size}, std::make_shared()).first->second; + } + + // Do not hold `stacktrace_cache_mutex` while running possibly slow calculation of stack trace text + std::scoped_lock lock(cache_entry->mutex); + if (!cache_entry->stacktrace_string.has_value()) { DB::WriteBufferFromOwnString out; toStringEveryLineImpl(false, key, [&](std::string_view str) { out << str << '\n'; }); - - return cache.emplace(StackTraceTriple{pointers, offset, size}, out.str()).first->second; + cache_entry->stacktrace_string = out.str(); } + + return *cache_entry->stacktrace_string; } std::string StackTrace::toString() const diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 3c2e6228421e..eaee070c44f5 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -490,8 +490,9 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ template class ThreadPoolImpl; -template class ThreadPoolImpl>; -template class ThreadFromGlobalPoolImpl; +template class ThreadPoolImpl>; +template class ThreadFromGlobalPoolImpl; +template class ThreadFromGlobalPoolImpl; std::unique_ptr GlobalThreadPool::the_instance; @@ -500,7 +501,9 @@ GlobalThreadPool::GlobalThreadPool( size_t max_threads_, size_t max_free_threads_, size_t queue_size_, - const bool shutdown_on_exception_) + const bool shutdown_on_exception_, + UInt64 global_profiler_real_time_period_ns_, + UInt64 global_profiler_cpu_time_period_ns_) : FreeThreadPool( CurrentMetrics::GlobalThread, CurrentMetrics::GlobalThreadActive, @@ -509,10 +512,12 @@ GlobalThreadPool::GlobalThreadPool( max_free_threads_, queue_size_, shutdown_on_exception_) + , global_profiler_real_time_period_ns(global_profiler_real_time_period_ns_) + , global_profiler_cpu_time_period_ns(global_profiler_cpu_time_period_ns_) { } -void GlobalThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size) +void GlobalThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size, UInt64 global_profiler_real_time_period_ns, UInt64 global_profiler_cpu_time_period_ns) { if (the_instance) { @@ -520,7 +525,7 @@ void GlobalThreadPool::initialize(size_t max_threads, size_t max_free_threads, s "The global thread pool is initialized twice"); } - the_instance.reset(new GlobalThreadPool(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/)); + the_instance.reset(new GlobalThreadPool(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/, global_profiler_real_time_period_ns, global_profiler_cpu_time_period_ns)); } GlobalThreadPool & GlobalThreadPool::instance() diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 31e4eabf63b8..528f782caf25 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -172,10 +172,21 @@ class GlobalThreadPool : public FreeThreadPool, private boost::noncopyable size_t max_threads_, size_t max_free_threads_, size_t queue_size_, - bool shutdown_on_exception_); + bool shutdown_on_exception_, + UInt64 global_profiler_real_time_period_ns_, + UInt64 global_profiler_cpu_time_period_ns_); public: - static void initialize(size_t max_threads = 10000, size_t max_free_threads = 1000, size_t queue_size = 10000); + UInt64 global_profiler_real_time_period_ns; + UInt64 global_profiler_cpu_time_period_ns; + + static void initialize( + size_t max_threads = 10000, + size_t max_free_threads = 1000, + size_t queue_size = 10000, + UInt64 global_profiler_real_time_period_ns_ = 0, + UInt64 global_profiler_cpu_time_period_ns_ = 0); + static GlobalThreadPool & instance(); static void shutdown(); }; @@ -187,7 +198,7 @@ class GlobalThreadPool : public FreeThreadPool, private boost::noncopyable * NOTE: User code should use 'ThreadFromGlobalPool' declared below instead of directly using this class. * */ -template +template class ThreadFromGlobalPoolImpl : boost::noncopyable { public: @@ -197,11 +208,15 @@ class ThreadFromGlobalPoolImpl : boost::noncopyable explicit ThreadFromGlobalPoolImpl(Function && func, Args &&... args) : state(std::make_shared()) { + UInt64 global_profiler_real_time_period = GlobalThreadPool::instance().global_profiler_real_time_period_ns; + UInt64 global_profiler_cpu_time_period = GlobalThreadPool::instance().global_profiler_cpu_time_period_ns; /// NOTE: /// - If this will throw an exception, the destructor won't be called /// - this pointer cannot be passed in the lambda, since after detach() it will not be valid GlobalThreadPool::instance().scheduleOrThrow([ my_state = state, + global_profiler_real_time_period, + global_profiler_cpu_time_period, my_func = std::forward(func), my_args = std::make_tuple(std::forward(args)...)]() mutable /// mutable is needed to destroy capture { @@ -220,6 +235,12 @@ class ThreadFromGlobalPoolImpl : boost::noncopyable /// Thread status holds raw pointer on query context, thus it always must be destroyed /// before sending signal that permits to join this thread. DB::ThreadStatus thread_status; + if constexpr (global_trace_collector_allowed) + { + if (unlikely(global_profiler_real_time_period != 0 || global_profiler_cpu_time_period != 0)) + thread_status.initGlobalProfiler(global_profiler_real_time_period, global_profiler_cpu_time_period); + } + std::apply(function, arguments); }, {}, // default priority @@ -305,11 +326,12 @@ class ThreadFromGlobalPoolImpl : boost::noncopyable /// you need to use class, or you need to use ThreadFromGlobalPool below. /// /// See the comments of ThreadPool below to know how it works. -using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl; /// An alias of thread that execute jobs/tasks on global thread pool by implicit passing tracing context on current thread to underlying worker as parent tracing context. /// If jobs/tasks are directly scheduled by using APIs of this class, you need to use this class or you need to use class above. -using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPoolWithoutTraceCollector = ThreadFromGlobalPoolImpl; /// Recommended thread pool for the case when multiple thread pools are created and destroyed. /// diff --git a/src/Common/ThreadPool_fwd.h b/src/Common/ThreadPool_fwd.h index 2782acc9c516..fea4e59f0879 100644 --- a/src/Common/ThreadPool_fwd.h +++ b/src/Common/ThreadPool_fwd.h @@ -3,11 +3,12 @@ template class ThreadPoolImpl; -template +template class ThreadFromGlobalPoolImpl; -using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl; -using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPoolWithoutTraceCollector = ThreadFromGlobalPoolImpl; using ThreadPool = ThreadPoolImpl; diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index cf50d305e956..ad96018a17e7 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -96,7 +96,7 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) stack_t altstack_description{}; altstack_description.ss_sp = alt_stack.getData(); altstack_description.ss_flags = 0; - altstack_description.ss_size = alt_stack.getSize(); + altstack_description.ss_size = ThreadStack::getSize(); if (0 != sigaltstack(&altstack_description, nullptr)) { @@ -124,6 +124,26 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) #endif } +void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) +{ +#if !defined(SANITIZER) && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) && !defined(__APPLE__) + try + { + if (global_profiler_real_time_period > 0) + query_profiler_real = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_real_time_period)); + + if (global_profiler_cpu_time_period > 0) + query_profiler_cpu = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_cpu_time_period)); + } + catch (...) + { + tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); + } +#endif +} + ThreadGroupPtr ThreadStatus::getThreadGroup() const { chassert(current_thread == this); diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 48b52f8aa6ef..0c02ab8fdb0c 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -307,6 +307,8 @@ class ThreadStatus : public boost::noncopyable void flushUntrackedMemory(); + void initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period); + private: void applyGlobalSettings(); void applyQuerySettings(); diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 4634eae77593..48bb510e5892 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -403,7 +403,7 @@ void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in) std::string ZooKeeperSetACLRequest::toStringImpl() const { - return fmt::format("path = {}\n", "version = {}", path, version); + return fmt::format("path = {}\nversion = {}", path, version); } void ZooKeeperSetACLResponse::writeImpl(WriteBuffer & out) const @@ -457,7 +457,7 @@ void ZooKeeperCheckRequest::readImpl(ReadBuffer & in) std::string ZooKeeperCheckRequest::toStringImpl() const { - return fmt::format("path = {}\n", "version = {}", path, version); + return fmt::format("path = {}\nversion = {}", path, version); } void ZooKeeperErrorResponse::readImpl(ReadBuffer & in) diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index a5c1d21eda6a..1d9830505f8e 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -2,6 +2,7 @@ #include #include +#include namespace Coordination @@ -64,3 +65,12 @@ static constexpr int32_t DEFAULT_OPERATION_TIMEOUT_MS = 10000; static constexpr int32_t DEFAULT_CONNECTION_TIMEOUT_MS = 1000; } + +/// This is used by fmt::format to print OpNum as strings. +/// All OpNum values should be in range [min, max] to be printed. +template <> +struct magic_enum::customize::enum_range +{ + static constexpr int min = -100; + static constexpr int max = 1000; +}; diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp index c7f949195c87..18d4e50be8ac 100644 --- a/src/Common/examples/encrypt_decrypt.cpp +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -35,9 +35,9 @@ int main(int argc, char ** argv) DB::CompressionCodecEncrypted::Configuration::instance().load(*loaded_config.configuration, "encryption_codecs"); if (action == "-e") - std::cout << processor.encryptValue(codec_name, value) << std::endl; + std::cout << DB::ConfigProcessor::encryptValue(codec_name, value) << std::endl; else if (action == "-d") - std::cout << processor.decryptValue(codec_name, value) << std::endl; + std::cout << DB::ConfigProcessor::decryptValue(codec_name, value) << std::endl; else std::cerr << "Unknown action: " << action << std::endl; } diff --git a/src/Common/examples/parallel_aggregation.cpp b/src/Common/examples/parallel_aggregation.cpp index 20f5f1c5224b..7094690a3a88 100644 --- a/src/Common/examples/parallel_aggregation.cpp +++ b/src/Common/examples/parallel_aggregation.cpp @@ -205,7 +205,7 @@ static void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexe else { size_t hash_value = global_map.hash(*it); - size_t bucket = global_map.getBucketFromHash(hash_value); + size_t bucket = MapTwoLevel::getBucketFromHash(hash_value); if (mutexes[bucket].try_lock()) { diff --git a/src/Common/randomDelay.cpp b/src/Common/randomDelay.cpp new file mode 100644 index 000000000000..7f6f30849198 --- /dev/null +++ b/src/Common/randomDelay.cpp @@ -0,0 +1,39 @@ +#include + +#include +#include +#include + + +void randomDelayForMaxMilliseconds(uint64_t milliseconds, LoggerPtr log, const char * start_of_message) +{ + if (milliseconds) + { + auto count = randomNumber() % milliseconds; + + if (log) + { + if (start_of_message && !*start_of_message) + start_of_message = nullptr; + + LOG_TEST(log, "{}{}Sleeping for {} milliseconds", + (start_of_message ? start_of_message : ""), + (start_of_message ? ": " : ""), + count); + } + + sleepForMilliseconds(count); + + if (log) + { + LOG_TEST(log, "{}{}Awaking after sleeping", + (start_of_message ? start_of_message : ""), + (start_of_message ? ": " : "")); + } + } +} + +void randomDelayForMaxSeconds(uint64_t seconds, LoggerPtr log, const char * start_of_message) +{ + randomDelayForMaxMilliseconds(seconds * 1000, log, start_of_message); +} diff --git a/src/Common/randomDelay.h b/src/Common/randomDelay.h new file mode 100644 index 000000000000..99f218cc8a17 --- /dev/null +++ b/src/Common/randomDelay.h @@ -0,0 +1,8 @@ +#pragma once + +#include + +/// Sleeps for random duration between 0 and a specified number of milliseconds, optionally outputs a logging message about that. +/// This function can be used to add random delays in tests. +void randomDelayForMaxMilliseconds(uint64_t milliseconds, LoggerPtr log = nullptr, const char * start_of_message = nullptr); +void randomDelayForMaxSeconds(uint64_t seconds, LoggerPtr log = nullptr, const char * start_of_message = nullptr); diff --git a/src/Common/tests/gtest_connection_pool.cpp b/src/Common/tests/gtest_connection_pool.cpp index dcc3c11fd529..cc091d12bb0d 100644 --- a/src/Common/tests/gtest_connection_pool.cpp +++ b/src/Common/tests/gtest_connection_pool.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -17,6 +16,40 @@ namespace { +template +class SafeHandler +{ +public: + using Ptr = std::shared_ptr>; + + SafeHandler() = default; + SafeHandler(SafeHandler&) = delete; + SafeHandler& operator=(SafeHandler&) = delete; + + T get() + { + std::lock_guard lock(mutex); + return obj; + } + + void set(T && options_) + { + std::lock_guard lock(mutex); + obj = std::move(options_); + } + +protected: + std::mutex mutex; + T obj = {}; +}; + +struct RequestOptions +{ + size_t slowdown_receive = 0; + int overwrite_keep_alive_timeout = 0; + int overwrite_keep_alive_max_requests = 10; +}; + size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = std::numeric_limits::max()) { const size_t buffer_size = 4096; @@ -47,13 +80,21 @@ size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = class MockRequestHandler : public Poco::Net::HTTPRequestHandler { public: - explicit MockRequestHandler(std::shared_ptr> slowdown_) - : slowdown(std::move(slowdown_)) + explicit MockRequestHandler(SafeHandler::Ptr options_) + : options(options_) { } void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override { + int value = request.getKeepAliveTimeout(); + ASSERT_GT(value, 0); + + auto params = options->get(); + + if (params.overwrite_keep_alive_timeout > 0) + response.setKeepAliveTimeout(params.overwrite_keep_alive_timeout, params.overwrite_keep_alive_max_requests); + response.setStatus(Poco::Net::HTTPResponse::HTTP_OK); auto size = request.getContentLength(); if (size > 0) @@ -61,28 +102,29 @@ class MockRequestHandler : public Poco::Net::HTTPRequestHandler else response.setChunkedTransferEncoding(true); // or chunk encoding - sleepForSeconds(*slowdown); + if (params.slowdown_receive > 0) + sleepForSeconds(params.slowdown_receive); stream_copy_n(request.stream(), response.send(), size); } - std::shared_ptr> slowdown; + SafeHandler::Ptr options; }; class HTTPRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory { public: - explicit HTTPRequestHandlerFactory(std::shared_ptr> slowdown_) - : slowdown(std::move(slowdown_)) + explicit HTTPRequestHandlerFactory(SafeHandler::Ptr options_) + : options(options_) { } Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest &) override { - return new MockRequestHandler(slowdown); + return new MockRequestHandler(options); } - std::shared_ptr> slowdown; + SafeHandler::Ptr options; }; } @@ -94,6 +136,8 @@ class ConnectionPoolTest : public testing::Test { protected: ConnectionPoolTest() { + options = std::make_shared>(); + startServer(); } @@ -102,7 +146,7 @@ class ConnectionPoolTest : public testing::Test { DB::HTTPConnectionPools::Limits def_limits{}; DB::HTTPConnectionPools::instance().setLimits(def_limits, def_limits, def_limits); - setSlowDown(0); + options->set(RequestOptions()); DB::HTTPConnectionPools::instance().dropCache(); DB::CurrentThread::getProfileEvents().reset(); @@ -129,7 +173,7 @@ class ConnectionPoolTest : public testing::Test { void startServer() { server_data.reset(); - server_data.handler_factory = new HTTPRequestHandlerFactory(slowdown_receive); + server_data.handler_factory = new HTTPRequestHandlerFactory(options); server_data.server = std::make_unique( server_data.handler_factory, server_data.port); @@ -143,11 +187,21 @@ class ConnectionPoolTest : public testing::Test { void setSlowDown(size_t seconds) { - *slowdown_receive = seconds; + auto opt = options->get(); + opt.slowdown_receive = seconds; + options->set(std::move(opt)); + } + + void setOverWriteKeepAlive(size_t seconds, int max_requests) + { + auto opt = options->get(); + opt.overwrite_keep_alive_timeout = int(seconds); + opt.overwrite_keep_alive_max_requests= max_requests; + options->set(std::move(opt)); } DB::ConnectionTimeouts timeouts; - std::shared_ptr> slowdown_receive = std::make_shared>(0); + SafeHandler::Ptr options; struct ServerData { @@ -182,7 +236,7 @@ class ConnectionPoolTest : public testing::Test { void wait_until(std::function pred) { while (!pred()) - sleepForMilliseconds(250); + sleepForMilliseconds(10); } void echoRequest(String data, HTTPSession & session) @@ -245,45 +299,52 @@ TEST_F(ConnectionPoolTest, CanRequest) ASSERT_EQ(0, getServer().currentConnections()); ASSERT_EQ(1, getServer().totalConnections()); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + auto metrics = pool->getMetrics(); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanPreserve) { auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); } - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); wait_until([&] () { return getServer().currentConnections() == 1; }); ASSERT_EQ(1, getServer().currentConnections()); - - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); } TEST_F(ConnectionPoolTest, CanReuse) { auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); - // DB::setReuseTag(*connection); } - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count)); - { auto connection = pool->getConnection(timeouts); - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); wait_until([&] () { return getServer().currentConnections() == 1; }); ASSERT_EQ(1, getServer().currentConnections()); @@ -293,6 +354,11 @@ TEST_F(ConnectionPoolTest, CanReuse) ASSERT_EQ(1, getServer().totalConnections()); ASSERT_EQ(1, getServer().currentConnections()); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + connection->reset(); } @@ -303,15 +369,16 @@ TEST_F(ConnectionPoolTest, CanReuse) ASSERT_EQ(0, getServer().currentConnections()); ASSERT_EQ(1, getServer().totalConnections()); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); } TEST_F(ConnectionPoolTest, CanReuse10) { auto pool = getPool(); - + auto metrics = pool->getMetrics(); for (int i = 0; i < 10; ++i) { @@ -328,16 +395,23 @@ TEST_F(ConnectionPoolTest, CanReuse10) ASSERT_EQ(0, getServer().currentConnections()); ASSERT_EQ(1, getServer().totalConnections()); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanReuse5) { - timeouts.withHTTPKeepAliveTimeout(1); + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); auto pool = getPool(); + auto metrics = pool->getMetrics(); std::vector connections; connections.reserve(5); @@ -347,11 +421,14 @@ TEST_F(ConnectionPoolTest, CanReuse5) } connections.clear(); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(5, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(5, CurrentMetrics::get(metrics.stored_count)); wait_until([&] () { return getServer().currentConnections() == 5; }); ASSERT_EQ(5, getServer().currentConnections()); @@ -363,35 +440,56 @@ TEST_F(ConnectionPoolTest, CanReuse5) echoRequest("Hello", *connection); } - ASSERT_EQ(5, getServer().totalConnections()); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(5, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(5, CurrentMetrics::get(metrics.stored_count)); + + /// wait until all connections are timeouted + wait_until([&] () { return getServer().currentConnections() == 0; }); + + { + // just to trigger pool->wipeExpired(); + auto connection = pool->getConnection(timeouts); + connection->reset(); + } + + ASSERT_EQ(6, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanReconnectAndCreate) { auto pool = getPool(); + auto metrics = pool->getMetrics(); std::vector in_use; - const size_t count = 2; + const size_t count = 3; for (int i = 0; i < count; ++i) { auto connection = pool->getConnection(timeouts); - // DB::setReuseTag(*connection); in_use.push_back(connection); } - ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(count, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(count, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); auto connection = std::move(in_use.back()); in_use.pop_back(); @@ -402,28 +500,39 @@ TEST_F(ConnectionPoolTest, CanReconnectAndCreate) echoRequest("Hello", *connection); - connection->reset(); - - wait_until([&] () { return getServer().currentConnections() == 1; }); - ASSERT_EQ(1, getServer().currentConnections()); - ASSERT_EQ(count+1, getServer().totalConnections()); + ASSERT_EQ(count+1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(count+1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(count, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanReconnectAndReuse) { + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + auto pool = getPool(); + auto metrics = pool->getMetrics(); std::vector in_use; - const size_t count = 2; + const size_t count = 3; + for (int i = 0; i < count; ++i) + { + auto connection = pool->getConnection(timeouts); + /// make some request in order to show to the server the keep alive headers + echoRequest("Hello", *connection); + in_use.push_back(std::move(connection)); + } + in_use.clear(); + for (int i = 0; i < count; ++i) { auto connection = pool->getConnection(timeouts); - // DB::setReuseTag(*connection); in_use.push_back(std::move(connection)); } @@ -441,11 +550,16 @@ TEST_F(ConnectionPoolTest, CanReconnectAndReuse) wait_until([&] () { return getServer().currentConnections() == 0; }); ASSERT_EQ(0, getServer().currentConnections()); - ASSERT_EQ(2, getServer().totalConnections()); + ASSERT_EQ(count, getServer().totalConnections()); + + ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(count + count - 1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(count + 1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(count-1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(count-2, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, ReceiveTimeout) @@ -454,6 +568,7 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) timeouts.withReceiveTimeout(1); auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); @@ -462,10 +577,14 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) ); } - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); { timeouts.withReceiveTimeout(3); @@ -475,10 +594,14 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) ); } - ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); { /// timeouts have effect for reused session @@ -489,10 +612,14 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) ); } - ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) @@ -500,6 +627,7 @@ TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) std::string_view message = "Hello ReadWriteBufferFromHTTP"; auto uri = Poco::URI(getServerUrl()); auto metrics = DB::HTTPConnectionPools::instance().getPool(DB::HTTPConnectionGroupType::HTTP, uri, DB::ProxyConfiguration{})->getMetrics(); + Poco::Net::HTTPBasicCredentials empty_creds; auto buf_from_http = DB::BuilderRWBufferFromHTTP(uri) .withConnectionGroup(DB::HTTPConnectionGroupType::HTTP) @@ -527,6 +655,7 @@ TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); @@ -538,23 +667,26 @@ TEST_F(ConnectionPoolTest, HardLimit) DB::HTTPConnectionPools::instance().setLimits(zero_limits, zero_limits, zero_limits); auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); } - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); - + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, NoReceiveCall) { auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); @@ -570,11 +702,209 @@ TEST_F(ConnectionPoolTest, NoReceiveCall) connection->flushRequest(); } - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); +} - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); +TEST_F(ConnectionPoolTest, ReconnectedWhenConnectionIsHoldTooLong) +{ + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + auto connection = pool->getConnection(timeouts); + + echoRequest("Hello", *connection); + + auto fake_ka = Poco::Timespan(30 * 1000 * 1000); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(fake_ka); + DB::setTimeouts(*connection, timeouts); // new keep alive timeout has no effect + + wait_until([&] () { return getServer().currentConnections() == 0; }); + + ASSERT_EQ(1, connection->connected()); + ASSERT_EQ(1, connection->getKeepAlive()); + ASSERT_EQ(1000, connection->getKeepAliveTimeout().totalMilliseconds()); + ASSERT_EQ(1, connection->isKeepAliveExpired(connection->getKeepAliveReliability())); + + echoRequest("Hello", *connection); + } + + + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); +} + +TEST_F(ConnectionPoolTest, ReconnectedWhenConnectionIsNearlyExpired) +{ + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + } + + sleepForMilliseconds(900); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + } + } + + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); +} + +TEST_F(ConnectionPoolTest, ServerOverwriteKeepAlive) +{ + auto ka = Poco::Timespan(30, 0); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + } + + { + setOverWriteKeepAlive(1, 10); + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); + ASSERT_EQ(1, connection->getKeepAliveTimeout().totalSeconds()); + } + + { + // server do not overwrite it in the following requests but client has to remember last agreed value + setOverWriteKeepAlive(0, 0); + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); + ASSERT_EQ(1, connection->getKeepAliveTimeout().totalSeconds()); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(3, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); +} + +TEST_F(ConnectionPoolTest, MaxRequests) +{ + auto ka = Poco::Timespan(30, 0); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + auto max_requests = 5; + timeouts.http_keep_alive_max_requests = max_requests; + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + for (int i = 1; i <= max_requests - 1; ++i) + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(max_requests, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(i, connection->getKeepAliveRequest()); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(max_requests-1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(max_requests-2, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(max_requests, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(max_requests, connection->getKeepAliveRequest()); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(max_requests-1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(max_requests-1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); +} + + +TEST_F(ConnectionPoolTest, ServerOverwriteMaxRequests) +{ + auto ka = Poco::Timespan(30, 0); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(1000, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(1, connection->getKeepAliveRequest()); + } + + auto max_requests = 3; + setOverWriteKeepAlive(5, max_requests); + + for (int i = 2; i <= 10*max_requests; ++i) + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(5, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(max_requests, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(((i-1) % max_requests) + 1, connection->getKeepAliveRequest()); + } + + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10*max_requests-10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(10*max_requests-10, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp index 0febfca75cc8..3476f436eeb2 100644 --- a/src/Compression/CachedCompressedReadBuffer.cpp +++ b/src/Compression/CachedCompressedReadBuffer.cpp @@ -38,7 +38,7 @@ void CachedCompressedReadBuffer::prefetch(Priority priority) bool CachedCompressedReadBuffer::nextImpl() { /// Let's check for the presence of a decompressed block in the cache, grab the ownership of this block, if it exists. - UInt128 key = cache->hash(path, file_pos); + UInt128 key = UncompressedCache::hash(path, file_pos); owned_cell = cache->getOrSet(key, [&]() { diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index 16573e035e04..f3f6345a5b5f 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -483,7 +483,7 @@ void testTranscoding(Timer & timer, ICompressionCodec & codec, const CodecTestSe ASSERT_TRUE(EqualByteContainers(test_sequence.data_type->getSizeOfValueInMemory(), source_data, decoded)); - const auto header_size = codec.getHeaderSize(); + const auto header_size = ICompressionCodec::getHeaderSize(); const auto compression_ratio = (encoded_size - header_size) / (source_data.size() * 1.0); if (expected_compression_ratio) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index d7fa5abe7421..25254e10441e 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -592,7 +592,7 @@ String RecalculateCommand::run() String CleanResourcesCommand::run() { - keeper_dispatcher.cleanResources(); + KeeperDispatcher::cleanResources(); return "ok"; } diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index dfd60b994f4c..77dbad5443eb 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -1,19 +1,17 @@ -#include -#include - -#include - -#include -#include - -#include - +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include -#include + #include diff --git a/src/Core/Defines.h b/src/Core/Defines.h index a8dd26519c2f..f2142bc764d4 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -54,6 +54,7 @@ static constexpr auto DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT = 15; static constexpr auto DEFAULT_TCP_KEEP_ALIVE_TIMEOUT = 290; static constexpr auto DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT = 30; +static constexpr auto DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST = 1000; static constexpr auto DBMS_DEFAULT_PATH = "/var/lib/clickhouse/"; diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index 9c058d619024..73f0703f21e4 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -22,6 +22,12 @@ namespace ErrorCodes extern const int DECIMAL_OVERFLOW; } +template +T DecimalField::getScaleMultiplier() const +{ + return DecimalUtils::scaleMultiplier(scale); +} + inline Field getBinaryValue(UInt8 type, ReadBuffer & buf) { switch (static_cast(type)) @@ -627,5 +633,9 @@ std::string_view Field::getTypeName() const return fieldTypeToString(which); } - +template class DecimalField; +template class DecimalField; +template class DecimalField; +template class DecimalField; +template class DecimalField; } diff --git a/src/Core/Field.h b/src/Core/Field.h index aed5fab21065..4424d669c4dc 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -151,7 +150,7 @@ class DecimalField operator T() const { return dec; } /// NOLINT T getValue() const { return dec; } - T getScaleMultiplier() const { return DecimalUtils::scaleMultiplier(scale); } + T getScaleMultiplier() const; UInt32 getScale() const { return scale; } template @@ -200,6 +199,12 @@ class DecimalField UInt32 scale; }; +extern template class DecimalField; +extern template class DecimalField; +extern template class DecimalField; +extern template class DecimalField; +extern template class DecimalField; + template constexpr bool is_decimal_field = false; template <> constexpr inline bool is_decimal_field> = true; template <> constexpr inline bool is_decimal_field> = true; @@ -893,11 +898,13 @@ NearestFieldType> & Field::get() template auto & Field::safeGet() { - const Types::Which requested = TypeToEnum>>::value; + const Types::Which target = TypeToEnum>>::value; - if (which != requested) + /// We allow converting int64 <-> uint64, int64 <-> bool, uint64 <-> bool in safeGet(). + if (target != which + && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which))) throw Exception(ErrorCodes::BAD_GET, - "Bad get: has {}, requested {}", getTypeName(), requested); + "Bad get: has {}, requested {}", getTypeName(), target); return get(); } diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 441e22f4a164..481071547534 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -56,10 +56,11 @@ namespace DB namespace EncodedUserInfo { -/// Marker of the inter-server secret (passed in the user name) +/// Marker for the inter-server secret (passed as the user name) /// (anyway user cannot be started with a whitespace) const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET "; -/// Marker of the SSH keys based authentication (passed in the user name) + +/// Marker for SSH-keys-based authentication (passed as the user name) const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION "; }; @@ -160,8 +161,8 @@ namespace Protocol ReadTaskResponse = 9, /// A filename to read from s3 (used in s3Cluster) MergeTreeReadTaskResponse = 10, /// Coordinator's decision with a modified set of mark ranges allowed to read - SSHChallengeRequest = 11, /// Request for SSH signature challenge - SSHChallengeResponse = 12, /// Request for SSH signature challenge + SSHChallengeRequest = 11, /// Request SSH signature challenge + SSHChallengeResponse = 12, /// Reply to SSH signature challenge MAX = SSHChallengeResponse, }; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 6608a35a5a2c..46e2dc649a67 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -128,15 +128,17 @@ namespace DB M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \ M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \ M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \ - M(UInt64, disk_connections_soft_limit, 1000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \ + M(UInt64, disk_connections_soft_limit, 5000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \ M(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \ - M(UInt64, disk_connections_store_limit, 12000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \ + M(UInt64, disk_connections_store_limit, 30000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \ M(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \ M(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \ M(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \ M(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ M(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ M(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ + M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ + M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 67c90de835be..9f2ee9dbace9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -188,7 +188,7 @@ class IColumn; \ M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \ \ - M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \ + M(NonZeroUInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled. Should be always greater than 0", 0) \ M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ @@ -670,6 +670,7 @@ class IColumn; M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \ M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \ M(QueryCacheNondeterministicFunctionHandling, query_cache_nondeterministic_function_handling, QueryCacheNondeterministicFunctionHandling::Throw, "How the query cache handles queries with non-deterministic functions, e.g. now()", 0) \ + M(QueryCacheSystemTableHandling, query_cache_system_table_handling, QueryCacheSystemTableHandling::Throw, "How the query cache handles queries against system tables, i.e. tables in databases 'system.*' and 'information_schema.*'", 0) \ M(UInt64, query_cache_max_size_in_bytes, 0, "The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. ", 0) \ M(UInt64, query_cache_max_entries, 0, "The maximum number of query results the current user may store in the query cache. 0 means unlimited.", 0) \ M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \ @@ -870,6 +871,7 @@ class IColumn; M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \ M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \ + M(Float, ignore_drop_queries_probability, 0, "If enabled, server will ignore all DROP table queries with specified probability (for Memory and JOIN engines it will replcase DROP to TRUNCATE). Used for testing purposes", 0) \ M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \ M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \ M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ @@ -1011,6 +1013,7 @@ class IColumn; M(Char, input_format_hive_text_fields_delimiter, '\x01', "Delimiter between fields in Hive Text File", 0) \ M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \ M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \ + M(Bool, input_format_hive_text_allow_variable_number_of_columns, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values", 0) \ M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \ M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \ M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, "The maximum rows of data to read for automatic schema inference", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index cc66b271a7f2..f7d8c84312cf 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -86,9 +86,12 @@ namespace SettingsChangesHistory static std::map settings_changes_history = { {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, + {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, + {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, + {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, {"optimize_in_single_value", false, true, "Optimize in single value, convert in to equals."}, - }}, + }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index c3f0715ad68d..0caf6e8d6098 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -2,6 +2,8 @@ #include #include +#include + namespace DB { @@ -85,6 +87,10 @@ IMPLEMENT_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling, ErrorCodes::B {"save", QueryCacheNondeterministicFunctionHandling::Save}, {"ignore", QueryCacheNondeterministicFunctionHandling::Ignore}}) +IMPLEMENT_SETTING_ENUM(QueryCacheSystemTableHandling, ErrorCodes::BAD_ARGUMENTS, + {{"throw", QueryCacheSystemTableHandling::Throw}, + {"save", QueryCacheSystemTableHandling::Save}, + {"ignore", QueryCacheSystemTableHandling::Ignore}}) IMPLEMENT_SETTING_ENUM(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS, {{"basic", FormatSettings::DateTimeInputFormat::Basic}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 0aa8216bb857..b17ff11d4282 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -184,6 +184,15 @@ enum class QueryCacheNondeterministicFunctionHandling DECLARE_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling) +/// How the query cache handles queries against system tables, tables in databases 'system.*' and 'information_schema.*' +enum class QueryCacheSystemTableHandling +{ + Throw, + Save, + Ignore +}; + +DECLARE_SETTING_ENUM(QueryCacheSystemTableHandling) DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeInputFormat) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 001d3e09dc97..caa8b3fdffd8 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -575,4 +575,40 @@ void SettingFieldCustom::readBinary(ReadBuffer & in) parseFromString(str); } +SettingFieldNonZeroUInt64::SettingFieldNonZeroUInt64(UInt64 x) : SettingFieldUInt64(x) +{ + checkValueNonZero(); +} + +SettingFieldNonZeroUInt64::SettingFieldNonZeroUInt64(const DB::Field & f) : SettingFieldUInt64(f) +{ + checkValueNonZero(); +} + +SettingFieldNonZeroUInt64 & SettingFieldNonZeroUInt64::operator=(UInt64 x) +{ + SettingFieldUInt64::operator=(x); + checkValueNonZero(); + return *this; +} + +SettingFieldNonZeroUInt64 & SettingFieldNonZeroUInt64::operator=(const DB::Field & f) +{ + SettingFieldUInt64::operator=(f); + checkValueNonZero(); + return *this; +} + +void SettingFieldNonZeroUInt64::parseFromString(const String & str) +{ + SettingFieldUInt64::parseFromString(str); + checkValueNonZero(); +} + +void SettingFieldNonZeroUInt64::checkValueNonZero() const +{ + if (value == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "A setting's value has to be greater than 0"); +} + } diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index cef30bb19163..64854e46ab5e 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -1,13 +1,12 @@ #pragma once -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include namespace DB @@ -516,4 +515,19 @@ struct SettingFieldCustom void readBinary(ReadBuffer & in); }; +struct SettingFieldNonZeroUInt64 : public SettingFieldUInt64 +{ +public: + explicit SettingFieldNonZeroUInt64(UInt64 x = 1); + explicit SettingFieldNonZeroUInt64(const Field & f); + + SettingFieldNonZeroUInt64 & operator=(UInt64 x); + SettingFieldNonZeroUInt64 & operator=(const Field & f); + + void parseFromString(const String & str); + +private: + void checkValueNonZero() const; +}; + } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index cc22db3969c4..bdac6e34444a 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -332,6 +332,7 @@ class SignalListener : public Poco::Runnable const std::vector & thread_frame_pointers, UInt32 thread_num, ThreadStatus * thread_ptr) const + try { ThreadStatus thread_status; @@ -519,7 +520,7 @@ class SignalListener : public Poco::Runnable } } - /// ClickHouse Keeper does not link to some part of Settings. + /// ClickHouse Keeper does not link to some parts of Settings. #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// List changed settings. if (!query_id.empty()) @@ -537,12 +538,18 @@ class SignalListener : public Poco::Runnable } #endif - /// When everything is done, we will try to send these error messages to client. + /// When everything is done, we will try to send these error messages to the client. if (thread_ptr) thread_ptr->onFatalError(); fatal_error_printed.test_and_set(); } + catch (...) + { + /// onFault is called from the std::thread, and it should catch all exceptions; otherwise, you can get unrelated fatal errors. + PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); + LOG_FATAL(getLogger(__PRETTY_FUNCTION__), message); + } }; @@ -665,7 +672,7 @@ void BaseDaemon::reloadConfiguration() */ config_path = config().getString("config-file", getDefaultConfigFileName()); ConfigProcessor config_processor(config_path, false, true); - config_processor.setConfigPath(fs::path(config_path).parent_path()); + ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); loaded_config = config_processor.loadConfig(/* allow_zk_includes = */ true); if (last_configuration != nullptr) diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 14a3c6a4248e..ef7d86d2a812 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +String DataTypeAggregateFunction::getFunctionName() const +{ + return function->getName(); +} + String DataTypeAggregateFunction::doGetName() const { @@ -52,6 +58,25 @@ size_t DataTypeAggregateFunction::getVersion() const return function->getDefaultVersion(); } +DataTypePtr DataTypeAggregateFunction::getReturnType() const +{ + return function->getResultType(); +} + +DataTypePtr DataTypeAggregateFunction::getReturnTypeToPredict() const +{ + return function->getReturnTypeToPredict(); +} + +bool DataTypeAggregateFunction::isVersioned() const +{ + return function->isVersioned(); +} + +void DataTypeAggregateFunction::updateVersionFromRevision(size_t revision, bool if_empty) const +{ + setVersion(function->getVersionFromRevision(revision), if_empty); +} String DataTypeAggregateFunction::getNameImpl(bool with_version) const { diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 7d1bb355ccf9..8b4b3d6ee4cf 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -1,7 +1,7 @@ #pragma once -#include - +#include +#include #include @@ -39,7 +39,7 @@ class DataTypeAggregateFunction final : public IDataType { } - String getFunctionName() const { return function->getName(); } + String getFunctionName() const; AggregateFunctionPtr getFunction() const { return function; } String doGetName() const override; @@ -51,8 +51,8 @@ class DataTypeAggregateFunction final : public IDataType bool canBeInsideNullable() const override { return false; } - DataTypePtr getReturnType() const { return function->getResultType(); } - DataTypePtr getReturnTypeToPredict() const { return function->getReturnTypeToPredict(); } + DataTypePtr getReturnType() const; + DataTypePtr getReturnTypeToPredict() const; DataTypes getArgumentsDataTypes() const { return argument_types; } MutableColumnPtr createColumn() const override; @@ -69,7 +69,7 @@ class DataTypeAggregateFunction final : public IDataType SerializationPtr doGetDefaultSerialization() const override; bool supportsSparseSerialization() const override { return false; } - bool isVersioned() const { return function->isVersioned(); } + bool isVersioned() const; /// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according /// to server revision. @@ -84,10 +84,7 @@ class DataTypeAggregateFunction final : public IDataType version = version_; } - void updateVersionFromRevision(size_t revision, bool if_empty) const - { - setVersion(function->getVersionFromRevision(revision), if_empty); - } + void updateVersionFromRevision(size_t revision, bool if_empty) const; }; void setVersionToAggregateFunctions(DataTypePtr & type, bool if_empty, std::optional revision = std::nullopt); diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index ee9870eb0efd..cae9622bcb93 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h index 926dfd9cc828..bdabb465fe56 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h @@ -1,13 +1,18 @@ #pragma once +#include +#include #include -#include #include namespace DB { +class IDataType; +using DataTypePtr = std::shared_ptr; +using DataTypes = std::vector; + /** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard * data type but when rows are merged, an aggregation function is applied. * diff --git a/src/DataTypes/DataTypeDate32.cpp b/src/DataTypes/DataTypeDate32.cpp index 762552bcb4cd..343e498d303c 100644 --- a/src/DataTypes/DataTypeDate32.cpp +++ b/src/DataTypes/DataTypeDate32.cpp @@ -18,7 +18,7 @@ SerializationPtr DataTypeDate32::doGetDefaultSerialization() const Field DataTypeDate32::getDefault() const { - return -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + return -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); /// NOLINT(readability-static-accessed-through-instance) } void registerDataTypeDate32(DataTypeFactory & factory) diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 6f09ed31e22d..640d2c419d49 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -1,17 +1,15 @@ -#include - -#include - +#include #include - -#include -#include -#include -#include - +#include #include #include +#include #include +#include +#include +#include +#include +#include namespace DB { diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.h b/src/DataTypes/Serializations/SerializationAggregateFunction.h index 4212298bbc14..c45fc79f7143 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.h +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index f745fac4d307..d6a74e5cb8f3 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -194,12 +194,12 @@ ReturnType deserializeImpl( buf.dropCheckpoint(); if (buf.hasUnreadData()) { + restore_column_if_needed(); if constexpr (throw_exception) throw Exception( ErrorCodes::CANNOT_PARSE_BOOL, "Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if " "bool_true_representation or bool_false_representation contains some delimiters of input format"); - restore_column_if_needed(); return ReturnType(false); } return ReturnType(true); diff --git a/src/DataTypes/Serializations/tests/gtest_json_parser.cpp b/src/DataTypes/Serializations/tests/gtest_json_parser.cpp index 9b0c8e44d023..1b5b02d579cf 100644 --- a/src/DataTypes/Serializations/tests/gtest_json_parser.cpp +++ b/src/DataTypes/Serializations/tests/gtest_json_parser.cpp @@ -34,7 +34,7 @@ TEST(JSONDataParser, ReadJSON) JSONDataParser parser; ReadBufferFromString buf(json_bad); String res; - parser.readJSON(res, buf); + JSONDataParser::readJSON(res, buf); ASSERT_EQ(json1, res); } @@ -44,7 +44,7 @@ TEST(JSONDataParser, ReadJSON) JSONDataParser parser; ReadBufferFromString buf(json_bad); String res; - parser.readJSON(res, buf); + JSONDataParser::readJSON(res, buf); ASSERT_EQ(json2, res); } } diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index dec77119eed6..e69b0411aacb 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -463,6 +463,9 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// nested_type will be nullptr, we should return nullptr in this case. if (!nested_type) return nullptr; + /// Common type for Nullable(Nothing) and Variant(...) is Variant(...) + if (isVariant(nested_type)) + return nested_type; return std::make_shared(nested_type); } } diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 6f3f1151153d..c2d0fbe1c00e 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -417,9 +417,9 @@ void DatabaseAtomic::assertCanBeDetached(bool cleanup) } DatabaseTablesIteratorPtr -DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name) const +DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const { - auto base_iter = DatabaseOrdinary::getTablesIterator(local_context, filter_by_table_name); + auto base_iter = DatabaseOrdinary::getTablesIterator(local_context, filter_by_table_name, skip_not_loaded); return std::make_unique(std::move(typeid_cast(*base_iter))); } diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 404478f7cd1e..b59edd479ba6 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -46,7 +46,7 @@ class DatabaseAtomic : public DatabaseOrdinary void drop(ContextPtr /*context*/) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override; diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 76fdb4fa961f..adb9a659fcd7 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -80,7 +80,7 @@ StoragePtr DatabaseDictionary::tryGetTable(const String & table_name, ContextPtr return createStorageDictionary(getDatabaseName(), load_result, getContext()); } -DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const { return std::make_unique(listTables(filter_by_table_name), getDatabaseName()); } diff --git a/src/Databases/DatabaseDictionary.h b/src/Databases/DatabaseDictionary.h index 469801d183e6..a18ea833710e 100644 --- a/src/Databases/DatabaseDictionary.h +++ b/src/Databases/DatabaseDictionary.h @@ -34,7 +34,7 @@ class DatabaseDictionary final : public IDatabase, WithContext StoragePtr tryGetTable(const String & table_name, ContextPtr context) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; bool empty() const override; diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 05af0acf978e..b27a816a60d4 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -229,7 +229,7 @@ std::vector> DatabaseFilesystem::getTablesForBacku * Returns an empty iterator because the database does not have its own tables * But only caches them for quick access */ -DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, const FilterByNameFunction &) const +DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const { return std::make_unique(Tables{}, getDatabaseName()); } diff --git a/src/Databases/DatabaseFilesystem.h b/src/Databases/DatabaseFilesystem.h index 3338aa28c21a..4b9db5e574d8 100644 --- a/src/Databases/DatabaseFilesystem.h +++ b/src/Databases/DatabaseFilesystem.h @@ -45,7 +45,7 @@ class DatabaseFilesystem : public IDatabase, protected WithContext std::vector> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const override; protected: StoragePtr getTableImpl(const String & name, ContextPtr context, bool throw_on_error) const; diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 2688ff2443ce..1de7f80f512a 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -225,7 +225,7 @@ std::vector> DatabaseHDFS::getTablesForBackup(cons * Returns an empty iterator because the database does not have its own tables * But only caches them for quick access */ -DatabaseTablesIteratorPtr DatabaseHDFS::getTablesIterator(ContextPtr, const FilterByNameFunction &) const +DatabaseTablesIteratorPtr DatabaseHDFS::getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const { return std::make_unique(Tables{}, getDatabaseName()); } diff --git a/src/Databases/DatabaseHDFS.h b/src/Databases/DatabaseHDFS.h index b586a912e163..d19918000cf4 100644 --- a/src/Databases/DatabaseHDFS.h +++ b/src/Databases/DatabaseHDFS.h @@ -45,7 +45,7 @@ class DatabaseHDFS : public IDatabase, protected WithContext void shutdown() override; std::vector> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const override; protected: StoragePtr getTableImpl(const String & name, ContextPtr context) const; diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 623c7fbee980..fb1b3ee626b8 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -152,7 +152,7 @@ StoragePtr DatabaseLazy::tryGetTable(const String & table_name) const return loadTable(table_name); } -DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const { std::lock_guard lock(mutex); Strings filtered_tables; diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 2b1b119754d6..4347649117d7 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -62,7 +62,7 @@ class DatabaseLazy final : public DatabaseOnDisk bool empty() const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override; diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index d8acfb5fa01f..674e9afa8acc 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -68,7 +68,7 @@ std::pair createTableFromAST( ast_create_query.setDatabase(database_name); if (ast_create_query.select && ast_create_query.isView()) - ApplyWithSubqueryVisitor().visit(*ast_create_query.select); + ApplyWithSubqueryVisitor::visit(*ast_create_query.select); if (ast_create_query.as_table_function) { diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 95bdcfc7dcef..32f7fdc5e522 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -171,7 +171,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); auto * create_query = ast->as(); /// NOTE No concurrent writes are possible during database loading create_query->setDatabase(TSA_SUPPRESS_WARNING_FOR_READ(database_name)); @@ -438,24 +438,40 @@ void DatabaseOrdinary::stopLoading() stop_load_table.clear(); } -DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const { - // Wait for every table (matching the filter) to be loaded and started up before we make the snapshot. - // It is important, because otherwise table might be: - // - not attached and thus will be missed in the snapshot; - // - not started, which is not good for DDL operations. - LoadTaskPtrs tasks_to_wait; + if (!skip_not_loaded) { - std::lock_guard lock(mutex); - if (!filter_by_table_name) - tasks_to_wait.reserve(startup_table.size()); - for (const auto & [table_name, task] : startup_table) - if (!filter_by_table_name || filter_by_table_name(table_name)) - tasks_to_wait.emplace_back(task); + // Wait for every table (matching the filter) to be loaded and started up before we make the snapshot. + // It is important, because otherwise table might be: + // - not attached and thus will be missed in the snapshot; + // - not started, which is not good for DDL operations. + LoadTaskPtrs tasks_to_wait; + { + std::lock_guard lock(mutex); + if (!filter_by_table_name) + tasks_to_wait.reserve(startup_table.size()); + for (const auto & [table_name, task] : startup_table) + if (!filter_by_table_name || filter_by_table_name(table_name)) + tasks_to_wait.emplace_back(task); + } + waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), tasks_to_wait); } - waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), tasks_to_wait); + return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name, skip_not_loaded); +} - return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name); +Strings DatabaseOrdinary::getAllTableNames(ContextPtr) const +{ + std::set unique_names; + { + std::lock_guard lock(mutex); + for (const auto & [table_name, _] : tables) + unique_names.emplace(table_name); + // Not yet loaded table are not listed in `tables`, so we have to add table names from tasks + for (const auto & [table_name, _] : startup_table) + unique_names.emplace(table_name); + } + return {unique_names.begin(), unique_names.end()}; } void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index 7089540337a3..fa5827903cbc 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -56,7 +56,8 @@ class DatabaseOrdinary : public DatabaseOnDisk LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + Strings getAllTableNames(ContextPtr context) const override; void alterTable( ContextPtr context, diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 59b3e52e139e..7b8f7468e81b 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -873,7 +873,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep std::vector replicated_tables_to_rename; size_t total_tables = 0; std::vector replicated_ids; - for (auto existing_tables_it = getTablesIterator(getContext(), {}); existing_tables_it->isValid(); + for (auto existing_tables_it = getTablesIterator(getContext(), {}, /*skip_not_loaded=*/false); existing_tables_it->isValid(); existing_tables_it->next(), ++total_tables) { String name = existing_tables_it->name(); @@ -1324,7 +1324,6 @@ void DatabaseReplicated::drop(ContextPtr context_) void DatabaseReplicated::stopReplication() { - stopLoading(); if (ddl_worker) ddl_worker->shutdown(); } diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp index 159a5242dbe0..1589cc1c75db 100644 --- a/src/Databases/DatabaseS3.cpp +++ b/src/Databases/DatabaseS3.cpp @@ -303,7 +303,7 @@ std::vector> DatabaseS3::getTablesForBackup(const * Returns an empty iterator because the database does not have its own tables * But only caches them for quick access */ -DatabaseTablesIteratorPtr DatabaseS3::getTablesIterator(ContextPtr, const FilterByNameFunction &) const +DatabaseTablesIteratorPtr DatabaseS3::getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const { return std::make_unique(Tables{}, getDatabaseName()); } diff --git a/src/Databases/DatabaseS3.h b/src/Databases/DatabaseS3.h index 5e7375dbd58e..7e38da0fe638 100644 --- a/src/Databases/DatabaseS3.h +++ b/src/Databases/DatabaseS3.h @@ -56,7 +56,7 @@ class DatabaseS3 : public IDatabase, protected WithContext void shutdown() override; std::vector> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const override; static Configuration parseArguments(ASTs engine_args, ContextPtr context); diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 4dffb16e4866..fc75f8e44b99 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -226,7 +226,7 @@ StoragePtr DatabaseWithOwnTablesBase::tryGetTable(const String & table_name, Con return tryGetTableNoWait(table_name); } -DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const { std::lock_guard lock(mutex); if (!filter_by_table_name) @@ -363,7 +363,7 @@ std::vector> DatabaseWithOwnTablesBase::getTablesF { std::vector> res; - for (auto it = getTablesIterator(local_context, filter); it->isValid(); it->next()) + for (auto it = getTablesIterator(local_context, filter, /*skip_not_loaded=*/false); it->isValid(); it->next()) { auto storage = it->table(); if (!storage) diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index 81a3c55a435a..2eecf8a564ff 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -35,7 +35,7 @@ class DatabaseWithOwnTablesBase : public IDatabase, protected WithContext StoragePtr detachTable(ContextPtr context, const String & table_name) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index c8705254e735..2772db5e0662 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -254,7 +254,7 @@ void DatabasesOverlay::shutdown() db->shutdown(); } -DatabaseTablesIteratorPtr DatabasesOverlay::getTablesIterator(ContextPtr context_, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabasesOverlay::getTablesIterator(ContextPtr context_, const FilterByNameFunction & filter_by_table_name, bool /*skip_not_loaded*/) const { Tables tables; for (const auto & db : databases) diff --git a/src/Databases/DatabasesOverlay.h b/src/Databases/DatabasesOverlay.h index 859063a4ac44..b0c7e7e40326 100644 --- a/src/Databases/DatabasesOverlay.h +++ b/src/Databases/DatabasesOverlay.h @@ -51,7 +51,7 @@ class DatabasesOverlay : public IDatabase, protected WithContext void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; bool empty() const override; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 75662bfebe39..b00f2fe4baf0 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -229,7 +229,18 @@ class IDatabase : public std::enable_shared_from_this /// Get an iterator that allows you to pass through all the tables. /// It is possible to have "hidden" tables that are not visible when passing through, but are visible if you get them by name using the functions above. - virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}) const = 0; /// NOLINT + /// Wait for all tables to be loaded and started up. If `skip_not_loaded` is true, then not yet loaded or not yet started up (at the moment of iterator creation) tables are excluded. + virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false) const = 0; /// NOLINT + + /// Returns list of table names. + virtual Strings getAllTableNames(ContextPtr context) const + { + // NOTE: This default implementation wait for all tables to be loaded and started up. It should be reimplemented for databases that support async loading. + Strings result; + for (auto table_it = getTablesIterator(context); table_it->isValid(); table_it->next()) + result.emplace_back(table_it->name()); + return result; + } /// Is the database empty. virtual bool empty() const = 0; diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 0f0d73ae16f8..d8360a24bcb5 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -185,9 +185,9 @@ StoragePtr DatabaseMaterializedMySQL::tryGetTable(const String & name, ContextPt } DatabaseTablesIteratorPtr -DatabaseMaterializedMySQL::getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const +DatabaseMaterializedMySQL::getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const { - DatabaseTablesIteratorPtr iterator = DatabaseAtomic::getTablesIterator(context_, filter_by_table_name); + DatabaseTablesIteratorPtr iterator = DatabaseAtomic::getTablesIterator(context_, filter_by_table_name, skip_not_loaded); if (context_->isInternalQuery()) return iterator; return std::make_unique(std::move(iterator), this); @@ -201,7 +201,6 @@ void DatabaseMaterializedMySQL::checkIsInternalQuery(ContextPtr context_, const void DatabaseMaterializedMySQL::stopReplication() { - stopLoading(); materialize_thread.stopSynchronization(); started_up = false; } diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index d2976adcadb8..a6418e6fc5cb 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -73,7 +73,7 @@ class DatabaseMaterializedMySQL : public DatabaseAtomic StoragePtr tryGetTable(const String & name, ContextPtr context_) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; void checkIsInternalQuery(ContextPtr context_, const char * method) const; diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index d9b0f7f9ac7d..b2e199735db6 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -105,7 +105,7 @@ bool DatabaseMySQL::empty() const return true; } -DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const { Tables tables; std::lock_guard lock(mutex); diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h index e5b1f434d2f2..084a8339be3f 100644 --- a/src/Databases/MySQL/DatabaseMySQL.h +++ b/src/Databases/MySQL/DatabaseMySQL.h @@ -58,7 +58,7 @@ class DatabaseMySQL final : public IDatabase, WithContext bool empty() const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_nam, bool skip_not_loaded) const override; ASTPtr getCreateDatabaseQuery() const override; diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 1c0d5fe3de1b..7ce03c74c581 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -456,8 +456,6 @@ void DatabaseMaterializedPostgreSQL::shutdown() void DatabaseMaterializedPostgreSQL::stopReplication() { - stopLoading(); - std::lock_guard lock(handler_mutex); if (replication_handler) replication_handler->shutdown(); @@ -485,10 +483,10 @@ void DatabaseMaterializedPostgreSQL::drop(ContextPtr local_context) DatabaseTablesIteratorPtr DatabaseMaterializedPostgreSQL::getTablesIterator( - ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const + ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const { /// Modify context into nested_context and pass query to Atomic database. - return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name); + return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name, skip_not_loaded); } void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory) diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index dfa53fa61d7b..cf1333d03c85 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -45,7 +45,7 @@ class DatabaseMaterializedPostgreSQL : public DatabaseAtomic void stopLoading() override; DatabaseTablesIteratorPtr - getTablesIterator(ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override; + getTablesIterator(ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; StoragePtr tryGetTable(const String & name, ContextPtr context) const override; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index b07b203f7862..3f62b9719d24 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -97,7 +97,7 @@ bool DatabasePostgreSQL::empty() const } -DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & /* filter_by_table_name */) const +DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & /* filter_by_table_name */, bool /* skip_not_loaded */) const { std::lock_guard lock(mutex); Tables tables; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h index 3ba7333c98ec..137b9d5cef9e 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h @@ -46,7 +46,7 @@ class DatabasePostgreSQL final : public IDatabase, WithContext void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; bool isTableExist(const String & name, ContextPtr context) const override; StoragePtr tryGetTable(const String & name, ContextPtr context) const override; diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index b7a82fd9d0ff..e758ea35de56 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -46,7 +46,7 @@ bool DatabaseSQLite::empty() const } -DatabaseTablesIteratorPtr DatabaseSQLite::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction &) const +DatabaseTablesIteratorPtr DatabaseSQLite::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction &, bool) const { std::lock_guard lock(mutex); diff --git a/src/Databases/SQLite/DatabaseSQLite.h b/src/Databases/SQLite/DatabaseSQLite.h index e5e93bbc8ce3..6bd84a4d297b 100644 --- a/src/Databases/SQLite/DatabaseSQLite.h +++ b/src/Databases/SQLite/DatabaseSQLite.h @@ -32,7 +32,7 @@ class DatabaseSQLite final : public IDatabase, WithContext StoragePtr tryGetTable(const String & name, ContextPtr context) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; bool empty() const override; diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp index 4b05f19fe911..d227a3ac76b0 100644 --- a/src/Databases/TablesDependencyGraph.cpp +++ b/src/Databases/TablesDependencyGraph.cpp @@ -448,7 +448,7 @@ std::vector TablesDependencyGraph::getTables() const void TablesDependencyGraph::mergeWith(const TablesDependencyGraph & other) { for (const auto & other_node : other.nodes) - addDependencies(other_node->storage_id, other.getDependencies(*other_node)); + addDependencies(other_node->storage_id, TablesDependencyGraph::getDependencies(*other_node)); } diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index fc58ff525bda..7509af31face 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -413,7 +413,7 @@ void FlatDictionary::blockToAttributes(const Block & block) const auto keys_column = block.safeGetByPosition(0).column; DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor({ keys_column }, arena_holder.getComplexKeyArena()); + DictionaryKeysExtractor keys_extractor({ keys_column }, arena_holder.getComplexKeyArena()); /// NOLINT(readability-static-accessed-through-instance) size_t keys_size = keys_extractor.getKeysSize(); static constexpr size_t key_offset = 1; diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h index 0acf0d23e5ee..0fd1fead456b 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.h +++ b/src/Dictionaries/PolygonDictionaryUtils.h @@ -214,7 +214,7 @@ class GridRoot : public ICell static constexpr Coord kEps = 1e-4f; private: - std::unique_ptr> root = nullptr; + std::unique_ptr> root; Coord min_x = 0, min_y = 0; Coord max_x = 0, max_y = 0; const size_t k_min_intersections; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 1a8d46668e07..33f7ca1ec19f 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -581,7 +581,7 @@ try auto disk_ptr = std::static_pointer_cast(shared_from_this()); auto tmp_file = std::make_unique(disk_ptr); auto buf = std::make_unique(std::move(tmp_file)); - buf->write(data.data, data.PAGE_SIZE_IN_BYTES); + buf->write(data.data, DiskWriteCheckData::PAGE_SIZE_IN_BYTES); buf->finalize(); buf->sync(); } diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 1e108b481ee8..1fe369832acb 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -346,7 +346,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s } auto downloader_id = file_segment.getOrSetDownloader(); - if (downloader_id == file_segment.getCallerId()) + if (downloader_id == FileSegment::getCallerId()) { if (canStartFromCache(file_offset_of_buffer_end, file_segment)) { diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index 5947b742339e..e4d74b640506 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -3,6 +3,7 @@ #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include @@ -101,18 +102,6 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() size_t sleep_time_with_backoff_milliseconds = 100; - auto handle_exception = [&, this](const auto & e, size_t i) - { - LOG_DEBUG(log, "Exception caught during Azure Read for file {} at attempt {}/{}: {}", path, i + 1, max_single_read_retries, e.Message); - if (i + 1 == max_single_read_retries) - throw; - - sleepForMilliseconds(sleep_time_with_backoff_milliseconds); - sleep_time_with_backoff_milliseconds *= 2; - initialized = false; - initialize(); - }; - for (size_t i = 0; i < max_single_read_retries; ++i) { try @@ -124,7 +113,14 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() } catch (const Azure::Core::RequestFailedException & e) { - handle_exception(e, i); + LOG_DEBUG(log, "Exception caught during Azure Read for file {} at attempt {}/{}: {}", path, i + 1, max_single_read_retries, e.Message); + if (i + 1 == max_single_read_retries || !isRetryableAzureException(e)) + throw; + + sleepForMilliseconds(sleep_time_with_backoff_milliseconds); + sleep_time_with_backoff_milliseconds *= 2; + initialized = false; + initialize(); } } @@ -213,16 +209,6 @@ void ReadBufferFromAzureBlobStorage::initialize() size_t sleep_time_with_backoff_milliseconds = 100; - auto handle_exception = [&, this](const auto & e, size_t i) - { - LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message); - if (i + 1 == max_single_download_retries) - throw; - - sleepForMilliseconds(sleep_time_with_backoff_milliseconds); - sleep_time_with_backoff_milliseconds *= 2; - }; - for (size_t i = 0; i < max_single_download_retries; ++i) { try @@ -233,7 +219,12 @@ void ReadBufferFromAzureBlobStorage::initialize() } catch (const Azure::Core::RequestFailedException & e) { - handle_exception(e,i); + LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message); + if (i + 1 == max_single_download_retries || !isRetryableAzureException(e)) + throw; + + sleepForMilliseconds(sleep_time_with_backoff_milliseconds); + sleep_time_with_backoff_milliseconds *= 2; } } @@ -283,7 +274,7 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran catch (const Azure::Core::RequestFailedException & e) { LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message); - if (i + 1 == max_single_download_retries) + if (i + 1 == max_single_download_retries || !isRetryableAzureException(e)) throw; sleepForMilliseconds(sleep_time_with_backoff_milliseconds); diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 05b93dd1fa34..37a189e4d19b 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -3,6 +3,7 @@ #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include @@ -65,22 +66,24 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() { - finalize(); -} + LOG_TRACE(limitedLog, "Close WriteBufferFromAzureBlobStorage. {}.", blob_path); -void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, size_t num_tries, size_t cost) -{ - auto handle_exception = [&, this](const auto & e, size_t i) + /// That destructor could be call with finalized=false in case of exceptions + if (!finalized) { - if (cost) - write_settings.resource_link.accumulate(cost); // Accumulate resource for later use, because we have failed to consume it - - if (i == num_tries - 1) - throw; + LOG_INFO( + log, + "WriteBufferFromAzureBlobStorage is not finalized in destructor. " + "The file might not be written to AzureBlobStorage. " + "{}.", + blob_path); + } - LOG_DEBUG(log, "Write at attempt {} for blob `{}` failed: {} {}", i + 1, blob_path, e.what(), e.Message); - }; + task_tracker->safeWaitAll(); +} +void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, size_t num_tries, size_t cost) +{ for (size_t i = 0; i < num_tries; ++i) { try @@ -91,7 +94,13 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, } catch (const Azure::Core::RequestFailedException & e) { - handle_exception(e, i); + if (cost) + write_settings.resource_link.accumulate(cost); // Accumulate resource for later use, because we have failed to consume it + + if (i == num_tries - 1 || !isRetryableAzureException(e)) + throw; + + LOG_DEBUG(log, "Write at attempt {} for blob `{}` failed: {} {}", i + 1, blob_path, e.what(), e.Message); } catch (...) { @@ -102,9 +111,13 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, } } -void WriteBufferFromAzureBlobStorage::finalizeImpl() +void WriteBufferFromAzureBlobStorage::preFinalize() { - auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + if (is_prefinalized) + return; + + // This function should not be run again + is_prefinalized = true; /// If there is only one block and size is less than or equal to max_single_part_upload_size /// then we use single part upload instead of multi part upload @@ -113,6 +126,7 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() size_t data_size = size_t(position() - memory.data()); if (data_size <= max_single_part_upload_size) { + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(memory.data()), data_size); execWithRetry([&](){ block_blob_client.Upload(memory_stream); }, max_unexpected_write_error_retries, data_size); LOG_TRACE(log, "Committed single block for blob `{}`", blob_path); @@ -120,14 +134,23 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() } } + writePart(); +} - execWithRetry([this](){ next(); }, max_unexpected_write_error_retries); - - task_tracker->waitAll(); +void WriteBufferFromAzureBlobStorage::finalizeImpl() +{ + LOG_TRACE(log, "finalizeImpl WriteBufferFromAzureBlobStorage {}", blob_path); - execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); + if (!is_prefinalized) + preFinalize(); - LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); + if (!block_ids.empty()) + { + task_tracker->waitAll(); + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); + LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); + } } void WriteBufferFromAzureBlobStorage::nextImpl() diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 6e10c07b255b..7d4081ad792b 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -41,7 +41,7 @@ class WriteBufferFromAzureBlobStorage : public WriteBufferFromFileBase ~WriteBufferFromAzureBlobStorage() override; void nextImpl() override; - + void preFinalize() override; std::string getFileName() const override { return blob_path; } void sync() override { next(); } @@ -65,6 +65,9 @@ class WriteBufferFromAzureBlobStorage : public WriteBufferFromFileBase const std::string blob_path; const WriteSettings write_settings; + /// Track that prefinalize() is called only once + bool is_prefinalized = false; + AzureClientPtr blob_container_client; std::vector block_ids; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index df1ccbb32d92..c3114eb0b6ff 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -76,6 +76,9 @@ std::unique_ptr getClient( client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS); client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); + client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT); + client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS); + client_configuration.endpointOverride = uri.endpoint; client_configuration.s3_use_adaptive_timeouts = config.getBool( config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 8cbb1b9e5639..bd41dc12fa7b 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -96,6 +96,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; + format_settings.hive_text.allow_variable_number_of_columns = settings.input_format_hive_text_allow_variable_number_of_columns; format_settings.custom.escaping_rule = settings.format_custom_escaping_rule; format_settings.custom.field_delimiter = settings.format_custom_field_delimiter; format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 5b7995e0da27..a239941469ff 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -176,6 +176,7 @@ struct FormatSettings char fields_delimiter = '\x01'; char collection_items_delimiter = '\x02'; char map_keys_delimiter = '\x03'; + bool allow_variable_number_of_columns = true; Names input_field_names; } hive_text{}; diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index c0d0713e2542..f2f1d985cc9c 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -1,51 +1,54 @@ #include #if USE_PROTOBUF -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include + namespace DB { diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 5ca4b0bc5798..0f54ff52ba2c 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -100,7 +100,11 @@ class CastOverloadResolverImpl : public IFunctionOverloadResolver validateDataType(type, data_type_validation_settings); if (cast_type == CastType::accurateOrNull) - return makeNullable(type); + { + /// Variant handles NULLs by itself during conversions. + if (!isVariant(type)) + return makeNullable(type); + } if (internal) return type; diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 79e5ee442c20..89ff63995b18 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -5,54 +5,55 @@ // sanitizer/asan_interface.h #include #include -#include +#include #include +#include #include #include #include #include #include #include +#include +#include +#include #include #include +#include #include #include #include #include #include +#include #include -#include +#include #include -#include +#include #include #include +#include #include #include +#include #include #include #include #include #include #include +#include #include #include +#include #include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #if USE_EMBEDDED_COMPILER # include diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 0f624a2fa2e5..5e072d406adf 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -90,7 +91,6 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NOT_IMPLEMENTED; extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; - extern const int CANNOT_PARSE_BOOL; extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; } @@ -279,7 +279,7 @@ struct ToDate32Transform32Or64Signed static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone) { - static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); + static const Int32 daynum_min_offset = -static_cast(DateLUTImpl::getDayNumOffsetEpoch()); if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) { @@ -1092,7 +1092,7 @@ struct ConvertThroughParsing { if constexpr (std::is_same_v) { - vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); /// NOLINT(readability-static-accessed-through-instance) } else { @@ -1816,6 +1816,7 @@ struct ConvertImpl /// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. +template struct ConvertImplGenericFromString { static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) @@ -1855,29 +1856,34 @@ struct ConvertImplGenericFromString { serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); } - catch (const Exception & e) + catch (const Exception &) { - auto * nullable_column = typeid_cast(&column_to); - if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) - { - auto & col_nullmap = nullable_column->getNullMapData(); - if (col_nullmap.size() != nullable_column->size()) - col_nullmap.resize_fill(nullable_column->size()); - if (nullable_column->size() == (i + 1)) - nullable_column->popBack(1); - nullable_column->insertDefault(); - continue; - } - throw; + if constexpr (throw_on_error) + throw; + /// Check if exception happened after we inserted the value + /// (deserializeWholeText should not do it, but let's check anyway). + if (column_to.size() > i) + column_to.popBack(column_to.size() - i); + column_to.insertDefault(); } + /// Usually deserializeWholeText checks for eof after parsing, but let's check one more time just in case. if (!read_buffer.eof()) { - if (result_type) - throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); + if constexpr (throw_on_error) + { + if (result_type) + throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); + else + throw Exception( + ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse string to column {}. Expected eof", column_to.getName()); + } else - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, - "Cannot parse string to column {}. Expected eof", column_to.getName()); + { + if (column_to.size() > i) + column_to.popBack(column_to.size() - i); + column_to.insertDefault(); + } } } } @@ -3280,7 +3286,9 @@ class FunctionCast final : public IFunctionBase { if (checkAndGetDataType(from_type.get())) { - return &ConvertImplGenericFromString::execute; + if (cast_type == CastType::accurateOrNull) + return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } return createWrapper(from_type, to_type, requested_result_is_nullable); @@ -3443,7 +3451,7 @@ class FunctionCast final : public IFunctionBase /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) { @@ -3486,7 +3494,7 @@ class FunctionCast final : public IFunctionBase /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } DataTypePtr from_type_holder; @@ -3577,7 +3585,7 @@ class FunctionCast final : public IFunctionBase /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } const auto * from_type = checkAndGetDataType(from_type_untyped.get()); @@ -3922,7 +3930,7 @@ class FunctionCast final : public IFunctionBase { return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) { - auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); + auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); res->finalize(); return res; }; @@ -4077,6 +4085,29 @@ class FunctionCast final : public IFunctionBase return ColumnVariant::create(discriminators, variants); } + WrapperType createStringToVariantWrapper() const + { + return [&](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + auto column = arguments[0].column->convertToFullColumnIfLowCardinality(); + auto args = arguments; + args[0].column = column; + + const ColumnNullable * column_nullable = nullptr; + if (isColumnNullable(*args[0].column)) + { + column_nullable = assert_cast(args[0].column.get()); + args[0].column = column_nullable->getNestedColumnPtr(); + } + + args[0].type = removeNullable(removeLowCardinality(args[0].type)); + + if (cast_type == CastType::accurateOrNull) + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); + }; + } + WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const { /// We allow converting NULL to Variant(...) as Variant can store NULLs. @@ -4091,6 +4122,10 @@ class FunctionCast final : public IFunctionBase } auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); + /// Cast String to Variant through parsing if it's not Variant(String). + if (isStringOrFixedString(removeNullable(removeLowCardinality(from_type))) && (!variant_discr_opt || to_variant.getVariants().size() > 1)) + return createStringToVariantWrapper(); + if (!variant_discr_opt) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName()); @@ -4692,7 +4727,7 @@ class FunctionCast final : public IFunctionBase if (to_type->getCustomSerialization() && to_type->getCustomName()) { - ret = [requested_result_is_nullable]( + ret = [this, requested_result_is_nullable]( ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, @@ -4701,7 +4736,10 @@ class FunctionCast final : public IFunctionBase auto wrapped_result_type = result_type; if (requested_result_is_nullable) wrapped_result_type = makeNullable(result_type); - return ConvertImplGenericFromString::execute( + if (this->cast_type == CastType::accurateOrNull) + return ConvertImplGenericFromString::execute( + arguments, wrapped_result_type, column_nullable, input_rows_count); + return ConvertImplGenericFromString::execute( arguments, wrapped_result_type, column_nullable, input_rows_count); }; return true; diff --git a/src/Functions/FunctionsDecimalArithmetics.h b/src/Functions/FunctionsDecimalArithmetics.h index 79e10d215a97..e26ad7362b37 100644 --- a/src/Functions/FunctionsDecimalArithmetics.h +++ b/src/Functions/FunctionsDecimalArithmetics.h @@ -280,7 +280,7 @@ class FunctionsDecimalArithmetics : public IFunction /** At compile time, result is unknown. We only know the Scale (number of fractional digits) at runtime. Also nothing is known about size of whole part. - As in simple division/multiplication for decimals, we scale the result up, but is is explicit here and no downscale is performed. + As in simple division/multiplication for decimals, we scale the result up, but it is explicit here and no downscale is performed. It guarantees that result will have given scale and it can also be MANUALLY converted to other decimal types later. **/ if (scale > DecimalUtils::max_precision) diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index d3317e2dfcfc..4460a8bd7bd3 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -1139,7 +1139,7 @@ class FunctionDictGetHierarchy final : public IFunction getName()); auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + const auto & hierarchical_attribute = FunctionDictHelper::getDictionaryHierarchicalAttribute(dictionary); return std::make_shared(removeNullable(hierarchical_attribute.type)); } @@ -1150,7 +1150,7 @@ class FunctionDictGetHierarchy final : public IFunction return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + const auto & hierarchical_attribute = FunctionDictHelper::getDictionaryHierarchicalAttribute(dictionary); auto key_column = ColumnWithTypeAndName{arguments[1].column, arguments[1].type, arguments[1].name}; auto key_column_casted = castColumnAccurate(key_column, removeNullable(hierarchical_attribute.type)); @@ -1205,7 +1205,7 @@ class FunctionDictIsIn final : public IFunction return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + const auto & hierarchical_attribute = FunctionDictHelper::getDictionaryHierarchicalAttribute(dictionary); auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[2].name}; auto in_key_column = ColumnWithTypeAndName{arguments[2].column->convertToFullColumnIfConst(), arguments[2].type, arguments[2].name}; diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 2539fa1aeb43..75c274a365ff 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -257,7 +260,7 @@ class FunctionJSONHelpers } case MoveType::Key: { - key = (*arguments[j + 1].column).getDataAt(row).toView(); + key = arguments[j + 1].column->getDataAt(row).toView(); if (!moveToElementByKey(res_element, key)) return false; break; @@ -334,6 +337,26 @@ class FunctionJSONHelpers }; +template +class JSONExtractImpl; + +template +class JSONExtractKeysAndValuesImpl; + +/** +* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. +* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` +* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of +* input arguments. +* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - +* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality +* if needed. +*/ +template typename Impl> +constexpr bool functionForcesTheReturnType() +{ + return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; +} template typename Impl> class ExecutableFunctionJSON : public IExecutableFunction @@ -348,17 +371,50 @@ class ExecutableFunctionJSON : public IExecutableFunction String getName() const override { return Name::name; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (null_presence.has_null_constant) return result_type->createColumnConstWithDefaultValue(input_rows_count); - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - if (null_presence.has_nullable) - return wrapInNullable(temporary_result, arguments, result_type, input_rows_count); - return temporary_result; + if constexpr (functionForcesTheReturnType()) + { + ColumnsWithTypeAndName columns_without_low_cardinality = arguments; + + for (auto & column : columns_without_low_cardinality) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } + + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + else + { + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } } private: @@ -429,7 +485,6 @@ class FunctionBaseFunctionJSON : public IFunctionBase DataTypePtr json_return_type; }; - /// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. /// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. template typename Impl> @@ -450,6 +505,10 @@ class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override { @@ -481,7 +540,6 @@ class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext } }; - struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; @@ -1238,6 +1296,35 @@ struct JSONExtractTree std::unique_ptr value; }; + class VariantNode : public Node + { + public: + VariantNode(std::vector> variant_nodes_, std::vector order_) : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) { } + + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + auto & column_variant = assert_cast(dest); + for (size_t i : order) + { + auto & variant = column_variant.getVariantByGlobalDiscriminator(i); + if (variant_nodes[i]->insertResultToColumn(variant, element)) + { + column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); + column_variant.getOffsets().push_back(variant.size() - 1); + return true; + } + } + + return false; + } + + private: + std::vector> variant_nodes; + /// Order in which we should try variants nodes. + /// For example, String should be always the last one. + std::vector order; + }; + static std::unique_ptr build(const char * function_name, const DataTypePtr & type) { switch (type->getTypeId()) @@ -1314,6 +1401,16 @@ struct JSONExtractTree const auto & value_type = map_type.getValueType(); return std::make_unique(build(function_name, key_type), build(function_name, value_type)); } + case TypeIndex::Variant: + { + const auto & variant_type = static_cast(*type); + const auto & variants = variant_type.getVariants(); + std::vector> variant_nodes; + variant_nodes.reserve(variants.size()); + for (const auto & variant : variants) + variant_nodes.push_back(build(function_name, variant)); + return std::make_unique(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); + } default: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} doesn't support the return type schema: {}", diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 4532286830de..6183d25c8bd4 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 01b633385596..4e3009a695da 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -144,7 +144,7 @@ struct NumericArraySource : public ArraySourceImpl> #pragma clang diagnostic ignored "-Wsuggest-override" #pragma clang diagnostic ignored "-Wsuggest-destructor-override" -/// NOLINTBEGIN(hicpp-use-override) +/// NOLINTBEGIN(hicpp-use-override, modernize-use-override) template struct ConstSource : public Base @@ -233,7 +233,7 @@ struct ConstSource : public Base } }; -/// NOLINTEND(hicpp-use-override) +/// NOLINTEND(hicpp-use-override, modernize-use-override) #pragma clang diagnostic pop diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp index e37e4a23b63c..e6796874e506 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp @@ -86,7 +86,7 @@ namespace auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; - FunctionNameNormalizer().visit(res.function_core.get()); + FunctionNameNormalizer::visit(res.function_core.get()); return ptr; } } @@ -106,7 +106,7 @@ void UserDefinedSQLFunctionFactory::checkCanBeRegistered(const ContextPtr & cont if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name); - if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) + if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) /// NOLINT(readability-static-accessed-through-instance) throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User defined executable function '{}' already exists", function_name); validateFunction(assert_cast(create_function_query).function_core, function_name); @@ -118,7 +118,7 @@ void UserDefinedSQLFunctionFactory::checkCanBeUnregistered(const ContextPtr & co AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop system function '{}'", function_name); - if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) + if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) /// NOLINT(readability-static-accessed-through-instance) throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop user defined executable function '{}'", function_name); } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index b083c5400839..d874612ad04a 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -56,7 +56,6 @@ UserDefinedSQLObjectsDiskStorage::UserDefinedSQLObjectsDiskStorage(const Context , dir_path{makeDirectoryPathCanonical(dir_path_)} , log{getLogger("UserDefinedSQLObjectsLoaderFromDisk")} { - createDirectory(); } @@ -122,7 +121,12 @@ void UserDefinedSQLObjectsDiskStorage::reloadObjects() void UserDefinedSQLObjectsDiskStorage::loadObjectsImpl() { LOG_INFO(log, "Loading user defined objects from {}", dir_path); - createDirectory(); + + if (!std::filesystem::exists(dir_path)) + { + LOG_DEBUG(log, "The directory for user defined objects ({}) does not exist: nothing to load", dir_path); + return; + } std::vector> function_names_and_queries; @@ -157,7 +161,6 @@ void UserDefinedSQLObjectsDiskStorage::loadObjectsImpl() void UserDefinedSQLObjectsDiskStorage::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) { - createDirectory(); auto ast = tryLoadObject(object_type, object_name); if (ast) setObject(object_name, *ast); @@ -185,6 +188,7 @@ bool UserDefinedSQLObjectsDiskStorage::storeObjectImpl( bool replace_if_exists, const Settings & settings) { + createDirectory(); String file_path = getFilePath(object_type, object_name); LOG_DEBUG(log, "Storing user-defined object {} to file {}", backQuote(object_name), file_path); diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp index 4f47a46b10da..f251d11789fa 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp @@ -23,7 +23,7 @@ ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query) auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; - FunctionNameNormalizer().visit(res.function_core.get()); + FunctionNameNormalizer::visit(res.function_core.get()); return ptr; } diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 783843a89d51..4551140acc3e 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -66,13 +66,13 @@ struct DotProduct }; template - static void accumulate(State & state, Type x, Type y) + static NO_SANITIZE_UNDEFINED void accumulate(State & state, Type x, Type y) { state.sum += x * y; } template - static void combine(State & state, const State & other_state) + static NO_SANITIZE_UNDEFINED void combine(State & state, const State & other_state) { state.sum += other_state.sum; } diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index 613fd934c41b..50b64cf9809f 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -1,18 +1,19 @@ -#include #include +#include #include #include #include #include #include #include +#include #include #include #include #include -#include "Columns/ColumnMap.h" -#include "DataTypes/DataTypeMap.h" +#include +#include namespace DB { diff --git a/src/Functions/decodeHTMLComponent.cpp b/src/Functions/decodeHTMLComponent.cpp index 2cd95127266c..4db3c43f9461 100644 --- a/src/Functions/decodeHTMLComponent.cpp +++ b/src/Functions/decodeHTMLComponent.cpp @@ -70,8 +70,7 @@ namespace const char * src_pos = src; const char * src_end = src + src_size; char * dst_pos = dst; - // perfect hashmap to lookup html character references - HTMLCharacterHash hash; + // to hold char seq for lookup, reuse it std::vector seq; while (true) @@ -108,7 +107,7 @@ namespace // null terminate the sequence seq.push_back('\0'); // lookup the html sequence in the perfect hashmap. - const auto * res = hash.Lookup(seq.data(), strlen(seq.data())); + const auto * res = HTMLCharacterHash::Lookup(seq.data(), strlen(seq.data())); // reset so that it's reused in the next iteration seq.clear(); if (res) diff --git a/src/Functions/getScalar.cpp b/src/Functions/getScalar.cpp index d72c84b8528e..7196cbc0a361 100644 --- a/src/Functions/getScalar.cpp +++ b/src/Functions/getScalar.cpp @@ -83,7 +83,7 @@ class FunctionGetSpecialScalar : public IFunction static ColumnWithTypeAndName createScalar(ContextPtr context_) { - if (const auto * block = context_->tryGetSpecialScalar(Scalar::scalar_name)) + if (auto block = context_->tryGetSpecialScalar(Scalar::scalar_name)) return block->getByPosition(0); else if (context_->hasQueryContext()) { diff --git a/src/Functions/mortonEncode.cpp b/src/Functions/mortonEncode.cpp index fee14c7784b5..3b95c114b145 100644 --- a/src/Functions/mortonEncode.cpp +++ b/src/Functions/mortonEncode.cpp @@ -321,6 +321,9 @@ class FunctionMortonEncode: public TargetSpecific::Default::FunctionMortonEncode ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { + if (input_rows_count == 0) + return ColumnUInt64::create(); + return selector.selectAndExecute(arguments, result_type, input_rows_count); } diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp index b0ba10c40492..793e79cdf461 100644 --- a/src/Functions/runningAccumulate.cpp +++ b/src/Functions/runningAccumulate.cpp @@ -1,8 +1,9 @@ -#include -#include -#include +#include #include #include +#include +#include +#include #include #include #include diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index fd8fb22455bf..e7e423058f11 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -32,7 +32,7 @@ namespace #endif - /// Get the host name. Is is constant on single server, but is not constant in distributed queries. + /// Get the host name. It is constant on single server, but is not constant in distributed queries. class FunctionHostName : public FunctionConstantBase { public: diff --git a/src/IO/Archives/LibArchiveWriter.h b/src/IO/Archives/LibArchiveWriter.h index f54a8ce23674..da566c82ff67 100644 --- a/src/IO/Archives/LibArchiveWriter.h +++ b/src/IO/Archives/LibArchiveWriter.h @@ -68,7 +68,7 @@ class LibArchiveWriter : public IArchiveWriter void startWritingFile(); void endWritingFile(); - std::unique_ptr stream_info TSA_GUARDED_BY(mutex) = nullptr; + std::unique_ptr stream_info TSA_GUARDED_BY(mutex); bool is_writing_file TSA_GUARDED_BY(mutex) = false; bool finalized TSA_GUARDED_BY(mutex) = false; mutable std::mutex mutex; diff --git a/src/IO/AzureBlobStorage/isRetryableAzureException.cpp b/src/IO/AzureBlobStorage/isRetryableAzureException.cpp new file mode 100644 index 000000000000..785d89857092 --- /dev/null +++ b/src/IO/AzureBlobStorage/isRetryableAzureException.cpp @@ -0,0 +1,21 @@ +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE +#include + +namespace DB +{ + +bool isRetryableAzureException(const Azure::Core::RequestFailedException & e) +{ + /// Always retry transport errors. + if (dynamic_cast(&e)) + return true; + + /// Retry other 5xx errors just in case. + return e.StatusCode >= Azure::Core::Http::HttpStatusCode::InternalServerError; +} + +} + +#endif diff --git a/src/IO/AzureBlobStorage/isRetryableAzureException.h b/src/IO/AzureBlobStorage/isRetryableAzureException.h new file mode 100644 index 000000000000..dfd13e4c98a0 --- /dev/null +++ b/src/IO/AzureBlobStorage/isRetryableAzureException.h @@ -0,0 +1,14 @@ +#pragma once +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE +#include + +namespace DB +{ + +bool isRetryableAzureException(const Azure::Core::RequestFailedException & e); + +} + +#endif diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index c4b636103fe2..da6214ae4771 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -144,7 +144,12 @@ ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts) { session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); - session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); + /// we can not change keep alive timeout for already initiated connections + if (!session.connected()) + { + session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); + session.setKeepAliveMaxRequests(int(timeouts.http_keep_alive_max_requests)); + } } ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session) diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index 49305f42d85b..b86ec44d21c0 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -35,6 +35,8 @@ struct ConnectionTimeouts Poco::Timespan tcp_keep_alive_timeout = Poco::Timespan(DEFAULT_TCP_KEEP_ALIVE_TIMEOUT, 0); Poco::Timespan http_keep_alive_timeout = Poco::Timespan(DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, 0); + size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST; + /// Timeouts for HedgedConnections Poco::Timespan hedged_connection_timeout = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0); @@ -69,6 +71,7 @@ APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS(DECLARE_BUILDER_FOR_MEMBER) ConnectionTimeouts & withConnectionTimeout(size_t seconds); ConnectionTimeouts & withConnectionTimeout(Poco::Timespan span); + ConnectionTimeouts & withHTTPKeepAliveMaxRequests(size_t requests); }; /// NOLINTBEGIN(bugprone-macro-parentheses) @@ -114,6 +117,12 @@ inline ConnectionTimeouts & ConnectionTimeouts::withConnectionTimeout(Poco::Time return *this; } +inline ConnectionTimeouts & ConnectionTimeouts::withHTTPKeepAliveMaxRequests(size_t requests) +{ + http_keep_alive_max_requests = requests; + return *this; +} + void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts); ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session); diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 09f7724d6136..6e1c886b9b04 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp index d53f3bc325db..0cfb60d6527e 100644 --- a/src/IO/MMapReadBufferFromFileWithCache.cpp +++ b/src/IO/MMapReadBufferFromFileWithCache.cpp @@ -26,7 +26,7 @@ void MMapReadBufferFromFileWithCache::init() MMapReadBufferFromFileWithCache::MMapReadBufferFromFileWithCache( MMappedFileCache & cache, const std::string & file_name, size_t offset, size_t length) { - mapped = cache.getOrSet(cache.hash(file_name, offset, length), [&] + mapped = cache.getOrSet(MMappedFileCache::hash(file_name, offset, length), [&] { return std::make_shared(file_name, offset, length); }); @@ -37,7 +37,7 @@ MMapReadBufferFromFileWithCache::MMapReadBufferFromFileWithCache( MMapReadBufferFromFileWithCache::MMapReadBufferFromFileWithCache( MMappedFileCache & cache, const std::string & file_name, size_t offset) { - mapped = cache.getOrSet(cache.hash(file_name, offset, -1), [&] + mapped = cache.getOrSet(MMappedFileCache::hash(file_name, offset, -1), [&] { return std::make_shared(file_name, offset); }); diff --git a/src/IO/ReadBufferFromIStream.cpp b/src/IO/ReadBufferFromIStream.cpp index bc90ec7ed15e..325beabaf81e 100644 --- a/src/IO/ReadBufferFromIStream.cpp +++ b/src/IO/ReadBufferFromIStream.cpp @@ -1,6 +1,8 @@ #include #include +#include + namespace DB { diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index fc1055390618..a9c861be13c1 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -822,7 +822,7 @@ inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, cons return false; /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. - date = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day(), -static_cast(date_lut.getDayNumOffsetEpoch())); + date = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day(), -static_cast(DateLUTImpl::getDayNumOffsetEpoch())); return ReturnType(true); } @@ -1880,10 +1880,10 @@ struct PcgDeserializer assertChar(' ', buf); readText(state, buf); - if (multiplier != rng.multiplier()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect multiplier in pcg32: expected {}, got {}", rng.multiplier(), multiplier); - if (increment != rng.increment()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect increment in pcg32: expected {}, got {}", rng.increment(), increment); + if (multiplier != pcg32_fast::multiplier()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect multiplier in pcg32: expected {}, got {}", pcg32_fast::multiplier(), multiplier); + if (increment != pcg32_fast::increment()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect increment in pcg32: expected {}, got {}", pcg32_fast::increment(), increment); rng.state_ = state; } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index c99b08d0c9dd..303ffb744b55 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -345,7 +345,7 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, if (last_attempt || !is_retriable) { if (!mute_logging) - LOG_ERROR(log, + LOG_DEBUG(log, "Failed to make request to '{}'{}. " "Error: '{}'. " "Failed at try {}/{}.", @@ -361,7 +361,7 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, on_retry(); if (!mute_logging) - LOG_INFO(log, + LOG_TRACE(log, "Failed to make request to '{}'{}. " "Error: {}. " "Failed at try {}/{}. " diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index c7bc727bf32e..c79ec05c8c62 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -96,9 +96,9 @@ bool isS3ExpressEndpoint(const std::string & endpoint); struct ClientSettings { - bool use_virtual_addressing; + bool use_virtual_addressing = false; /// Disable checksum to avoid extra read of the input stream - bool disable_checksum; + bool disable_checksum = false; /// Should client send ComposeObject request after upload to GCS. /// /// Previously ComposeObject request was required to make Copy possible, @@ -108,8 +108,8 @@ struct ClientSettings /// /// Ability to enable it preserved since likely it is required for old /// files. - bool gcs_issue_compose_request; - bool is_s3express_bucket; + bool gcs_issue_compose_request = false; + bool is_s3express_bucket = false; }; /// Client that improves the client from the AWS SDK diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index 34dc0c1d2bd0..8d5862230350 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -22,6 +22,8 @@ inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120; inline static constexpr uint64_t DEFAULT_CONNECT_TIMEOUT_MS = 1000; inline static constexpr uint64_t DEFAULT_REQUEST_TIMEOUT_MS = 30000; inline static constexpr uint64_t DEFAULT_MAX_CONNECTIONS = 100; +inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_TIMEOUT = 5; +inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_MAX_REQUESTS = 100; /// In GCP metadata service can be accessed via DNS regardless of IPv4 or IPv6. static inline constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal"; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index a29a4b0b8ee5..de20a712d4c2 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -146,7 +146,9 @@ ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguratio .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withTCPKeepAliveTimeout(Poco::Timespan( - client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)); + client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)) + .withHTTPKeepAliveTimeout(Poco::Timespan(client_configuration.http_keep_alive_timeout, 0)) + .withHTTPKeepAliveMaxRequests(client_configuration.http_keep_alive_max_requests); } PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index ebbddbb2c7ee..a0b35e9b4a9a 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -51,6 +51,8 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration /// See PoolBase::BehaviourOnLimit bool s3_use_adaptive_timeouts = true; + size_t http_keep_alive_timeout = DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT; + size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST; std::function error_report; diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 027cb624ed5c..4e679e6c4773 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -33,12 +33,17 @@ namespace S3 URI::URI(const std::string & uri_) { /// Case when bucket name represented in domain name of S3 URL. - /// E.g. (https://bucket-name.s3.Region.amazonaws.com/key) + /// E.g. (https://bucket-name.s3.region.amazonaws.com/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3express[\-a-z0-9]+|s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))"); + /// Case when AWS Private Link Interface is being used + /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) + /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html + static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce.amazonaws.com(:\d{1,5})?)"); + /// Case when bucket name and key represented in path of S3 URL. - /// E.g. (https://s3.Region.amazonaws.com/bucket-name/key) + /// E.g. (https://s3.region.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access static const RE2 path_style_pattern("^/([^/]*)/(.*)"); @@ -67,7 +72,7 @@ URI::URI(const std::string & uri_) else { mapper["s3"] = "https://{bucket}.s3.amazonaws.com"; - mapper["gs"] = "https://{bucket}.storage.googleapis.com"; + mapper["gs"] = "https://storage.googleapis.com/{bucket}"; mapper["oss"] = "https://{bucket}.oss.aliyuncs.com"; } @@ -103,7 +108,10 @@ URI::URI(const std::string & uri_) String name; String endpoint_authority_from_uri; - if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri)) + bool is_using_aws_private_link_interface = re2::RE2::FullMatch(uri.getAuthority(), aws_private_link_style_pattern); + + if (!is_using_aws_private_link_interface + && re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri)) { is_virtual_hosted_style = true; endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri; diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 2873728bc781..06b7d03aa8ce 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -17,6 +17,7 @@ namespace DB::S3 * The following patterns are allowed: * s3://bucket/key * http(s)://endpoint/bucket/key + * http(s)://bucket..s3..vpce.amazonaws.com<:port_number>/bucket_name/key */ struct URI { diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 25786619241f..0a28c578f69d 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -159,7 +159,7 @@ void testServerSideEncryption( DB::S3::CredentialsConfiguration { .use_environment_credentials = use_environment_credentials, - .use_insecure_imds_request = use_insecure_imds_request + .use_insecure_imds_request = use_insecure_imds_request, } ); diff --git a/src/IO/SeekableReadBuffer.cpp b/src/IO/SeekableReadBuffer.cpp index 5d83f4e1b4a9..f2a114a5389a 100644 --- a/src/IO/SeekableReadBuffer.cpp +++ b/src/IO/SeekableReadBuffer.cpp @@ -1,5 +1,6 @@ #include +#include namespace DB { diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 9e72705341df..6dce80081708 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -5,6 +5,9 @@ #include #include +#include +#include + namespace DB { diff --git a/src/IO/WriteBufferFromOStream.cpp b/src/IO/WriteBufferFromOStream.cpp index ffc3e62e9a66..e77ec079d1f9 100644 --- a/src/IO/WriteBufferFromOStream.cpp +++ b/src/IO/WriteBufferFromOStream.cpp @@ -1,6 +1,7 @@ #include #include +#include namespace DB { diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index b42b4e6e9789..a30e2feb4397 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1390,9 +1390,9 @@ struct PcgSerializer { static void serializePcg32(const pcg32_fast & rng, WriteBuffer & buf) { - writeText(rng.multiplier(), buf); + writeText(pcg32_fast::multiplier(), buf); writeChar(' ', buf); - writeText(rng.increment(), buf); + writeText(pcg32_fast::increment(), buf); writeChar(' ', buf); writeText(rng.state_, buf); } diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 5bf0dfb962df..0ec28f800727 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -74,6 +74,40 @@ const TestCase TestCases[] = { "data", "", true}, + {S3::URI("https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/root/nested/file.txt"), + "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com", + "root", + "nested/file.txt", + "", + false}, + // Test with a file with no extension + {S3::URI("https://bucket.vpce-03b2c987f1bd55c5f-j3b4vg7w.s3.ap-southeast-2.vpce.amazonaws.com/some_bucket/document"), + "https://bucket.vpce-03b2c987f1bd55c5f-j3b4vg7w.s3.ap-southeast-2.vpce.amazonaws.com", + "some_bucket", + "document", + "", + false}, + // Test with a deeply nested file path + {S3::URI("https://bucket.vpce-0242cd56f1bd55c5f-l5b7vg8x.s3.sa-east-1.vpce.amazonaws.com/some_bucket/b/c/d/e/f/g/h/i/j/data.json"), + "https://bucket.vpce-0242cd56f1bd55c5f-l5b7vg8x.s3.sa-east-1.vpce.amazonaws.com", + "some_bucket", + "b/c/d/e/f/g/h/i/j/data.json", + "", + false}, + // Zonal + {S3::URI("https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com/root/nested/file.txt"), + "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com", + "root", + "nested/file.txt", + "", + false}, + // Non standard port + {S3::URI("https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com:65535/root/nested/file.txt"), + "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com:65535", + "root", + "nested/file.txt", + "", + false}, }; class S3UriTest : public testing::TestWithParam diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 09e9364a3f1d..06e6e1f8fc83 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2135,13 +2135,6 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere } } - // std::cerr << "Allowed " << conjunction.allowed.size() << std::endl; - // for (const auto & node : conjunction.allowed) - // std::cerr << node->result_name << std::endl; - // std::cerr << "Rejected " << conjunction.rejected.size() << std::endl; - // for (const auto & node : conjunction.rejected) - // std::cerr << node->result_name << std::endl; - return conjunction; } @@ -2170,7 +2163,7 @@ ColumnsWithTypeAndName prepareFunctionArguments(const ActionsDAG::NodeRawConstPt /// /// Result actions add single column with conjunction result (it is always first in outputs). /// No other columns are added or removed. -ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) +ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) { if (conjunction.empty()) return nullptr; @@ -2265,9 +2258,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunctio return actions; } -ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( +ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( const std::string & filter_name, - bool can_remove_filter, + bool removes_filter, const Names & available_inputs, const ColumnsWithTypeAndName & all_inputs) { @@ -2321,16 +2314,232 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( } } - auto actions = cloneActionsForConjunction(conjunction.allowed, all_inputs); + auto actions = createActionsForConjunction(conjunction.allowed, all_inputs); if (!actions) return nullptr; /// Now, when actions are created, update the current DAG. + removeUnusedConjunctions(std::move(conjunction.rejected), predicate, removes_filter); + + return actions; +} + +ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPushDown( + const std::string & filter_name, + bool removes_filter, + const Names & left_stream_available_columns_to_push_down, + const Block & left_stream_header, + const Names & right_stream_available_columns_to_push_down, + const Block & right_stream_header, + const Names & equivalent_columns_to_push_down, + const std::unordered_map & equivalent_left_stream_column_to_right_stream_column, + const std::unordered_map & equivalent_right_stream_column_to_left_stream_column) +{ + Node * predicate = const_cast(tryFindInOutputs(filter_name)); + if (!predicate) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Output nodes for ActionsDAG do not contain filter column name {}. DAG:\n{}", + filter_name, + dumpDAG()); + + /// If condition is constant let's do nothing. + /// It means there is nothing to push down or optimization was already applied. + if (predicate->type == ActionType::COLUMN) + return {}; + + auto get_input_nodes = [this](const Names & inputs_names) + { + std::unordered_set allowed_nodes; + + std::unordered_map> inputs_map; + for (const auto & input_node : inputs) + inputs_map[input_node->result_name].emplace_back(input_node); + + for (const auto & name : inputs_names) + { + auto & inputs_list = inputs_map[name]; + if (inputs_list.empty()) + continue; + + allowed_nodes.emplace(inputs_list.front()); + inputs_list.pop_front(); + } + + return allowed_nodes; + }; + + auto left_stream_allowed_nodes = get_input_nodes(left_stream_available_columns_to_push_down); + auto right_stream_allowed_nodes = get_input_nodes(right_stream_available_columns_to_push_down); + auto both_streams_allowed_nodes = get_input_nodes(equivalent_columns_to_push_down); + + auto left_stream_push_down_conjunctions = getConjunctionNodes(predicate, left_stream_allowed_nodes); + auto right_stream_push_down_conjunctions = getConjunctionNodes(predicate, right_stream_allowed_nodes); + auto both_streams_push_down_conjunctions = getConjunctionNodes(predicate, both_streams_allowed_nodes); + + NodeRawConstPtrs left_stream_allowed_conjunctions = std::move(left_stream_push_down_conjunctions.allowed); + NodeRawConstPtrs right_stream_allowed_conjunctions = std::move(right_stream_push_down_conjunctions.allowed); + + std::unordered_set left_stream_allowed_conjunctions_set(left_stream_allowed_conjunctions.begin(), left_stream_allowed_conjunctions.end()); + std::unordered_set right_stream_allowed_conjunctions_set(right_stream_allowed_conjunctions.begin(), right_stream_allowed_conjunctions.end()); + + for (const auto * both_streams_push_down_allowed_conjunction_node : both_streams_push_down_conjunctions.allowed) + { + if (!left_stream_allowed_conjunctions_set.contains(both_streams_push_down_allowed_conjunction_node)) + left_stream_allowed_conjunctions.push_back(both_streams_push_down_allowed_conjunction_node); + + if (!right_stream_allowed_conjunctions_set.contains(both_streams_push_down_allowed_conjunction_node)) + right_stream_allowed_conjunctions.push_back(both_streams_push_down_allowed_conjunction_node); + } + + std::unordered_set rejected_conjunctions_set; + rejected_conjunctions_set.insert(left_stream_push_down_conjunctions.rejected.begin(), left_stream_push_down_conjunctions.rejected.end()); + rejected_conjunctions_set.insert(right_stream_push_down_conjunctions.rejected.begin(), right_stream_push_down_conjunctions.rejected.end()); + rejected_conjunctions_set.insert(both_streams_push_down_conjunctions.rejected.begin(), both_streams_push_down_conjunctions.rejected.end()); + + for (const auto & left_stream_allowed_conjunction : left_stream_allowed_conjunctions) + rejected_conjunctions_set.erase(left_stream_allowed_conjunction); + + for (const auto & right_stream_allowed_conjunction : right_stream_allowed_conjunctions) + rejected_conjunctions_set.erase(right_stream_allowed_conjunction); + + NodeRawConstPtrs rejected_conjunctions(rejected_conjunctions_set.begin(), rejected_conjunctions_set.end()); + + if (rejected_conjunctions.size() == 1) + { + chassert(rejected_conjunctions.front()->result_type); + + bool left_stream_push_constant = !left_stream_allowed_conjunctions.empty() && left_stream_allowed_conjunctions[0]->type == ActionType::COLUMN; + bool right_stream_push_constant = !right_stream_allowed_conjunctions.empty() && right_stream_allowed_conjunctions[0]->type == ActionType::COLUMN; + + if ((left_stream_push_constant || right_stream_push_constant) && !rejected_conjunctions.front()->result_type->equals(*predicate->result_type)) + { + /// No further optimization can be done + return {}; + } + } + + auto left_stream_filter_to_push_down = createActionsForConjunction(left_stream_allowed_conjunctions, left_stream_header.getColumnsWithTypeAndName()); + auto right_stream_filter_to_push_down = createActionsForConjunction(right_stream_allowed_conjunctions, right_stream_header.getColumnsWithTypeAndName()); + + auto replace_equivalent_columns_in_filter = [](const ActionsDAGPtr & filter, + const Block & stream_header, + const std::unordered_map & columns_to_replace) + { + auto updated_filter = ActionsDAG::buildFilterActionsDAG({filter->getOutputs()[0]}, columns_to_replace); + chassert(updated_filter->getOutputs().size() == 1); + + /** If result filter to left or right stream has column that is one of the stream inputs, we need distinguish filter column from + * actual input column. It is necessary because after filter step, filter column became constant column with value 1, and + * not all JOIN algorithms properly work with constants. + * + * Example: SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 ON t1.key = t2.key WHERE key; + */ + const auto * stream_filter_node = updated_filter->getOutputs()[0]; + if (stream_header.has(stream_filter_node->result_name)) + { + const auto & alias_node = updated_filter->addAlias(*stream_filter_node, "__filter" + stream_filter_node->result_name); + updated_filter->getOutputs()[0] = &alias_node; + } + + std::unordered_map> updated_filter_inputs; + + for (const auto & input : updated_filter->getInputs()) + updated_filter_inputs[input->result_name].push_back(input); + + for (const auto & input : filter->getInputs()) + { + if (updated_filter_inputs.contains(input->result_name)) + continue; + + const Node * updated_filter_input_node = nullptr; + + auto it = columns_to_replace.find(input->result_name); + if (it != columns_to_replace.end()) + updated_filter_input_node = &updated_filter->addInput(it->second); + else + updated_filter_input_node = &updated_filter->addInput({input->column, input->result_type, input->result_name}); + + updated_filter_inputs[input->result_name].push_back(updated_filter_input_node); + } + + for (const auto & input_column : stream_header.getColumnsWithTypeAndName()) + { + const Node * input; + auto & list = updated_filter_inputs[input_column.name]; + if (list.empty()) + { + input = &updated_filter->addInput(input_column); + } + else + { + input = list.front(); + list.pop_front(); + } + + if (input != updated_filter->getOutputs()[0]) + updated_filter->outputs.push_back(input); + } + + return updated_filter; + }; + + if (left_stream_filter_to_push_down) + left_stream_filter_to_push_down = replace_equivalent_columns_in_filter(left_stream_filter_to_push_down, + left_stream_header, + equivalent_right_stream_column_to_left_stream_column); + + if (right_stream_filter_to_push_down) + right_stream_filter_to_push_down = replace_equivalent_columns_in_filter(right_stream_filter_to_push_down, + right_stream_header, + equivalent_left_stream_column_to_right_stream_column); + + /* + * We should check the presence of a split filter column name in stream columns to avoid removing the required column. + * + * Example: + * A filter expression is `a AND b = c`, but `b` and `c` belong to another side of the join and not in allowed columns to push down, + * so the final split filter is just `a`. + * In this case `a` can be in stream columns but not `and(a, equals(b, c))`. + */ + + bool left_stream_filter_removes_filter = true; + bool right_stream_filter_removes_filter = true; + + if (left_stream_filter_to_push_down) + { + const auto & left_stream_filter_column_name = left_stream_filter_to_push_down->getOutputs()[0]->result_name; + left_stream_filter_removes_filter = !left_stream_header.has(left_stream_filter_column_name); + } + + if (right_stream_filter_to_push_down) + { + const auto & right_stream_filter_column_name = right_stream_filter_to_push_down->getOutputs()[0]->result_name; + right_stream_filter_removes_filter = !right_stream_header.has(right_stream_filter_column_name); + } + + ActionsDAG::ActionsForJOINFilterPushDown result + { + .left_stream_filter_to_push_down = std::move(left_stream_filter_to_push_down), + .left_stream_filter_removes_filter = left_stream_filter_removes_filter, + .right_stream_filter_to_push_down = std::move(right_stream_filter_to_push_down), + .right_stream_filter_removes_filter = right_stream_filter_removes_filter + }; + + if (!result.left_stream_filter_to_push_down && !result.right_stream_filter_to_push_down) + return result; + + /// Now, when actions are created, update the current DAG. + removeUnusedConjunctions(std::move(rejected_conjunctions), predicate, removes_filter); - if (conjunction.rejected.empty()) + return result; +} + +void ActionsDAG::removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions, Node * predicate, bool removes_filter) +{ + if (rejected_conjunctions.empty()) { /// The whole predicate was split. - if (can_remove_filter) + if (removes_filter) { /// If filter column is not needed, remove it from output nodes. std::erase_if(outputs, [&](const Node * node) { return node == predicate; }); @@ -2362,7 +2571,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( { /// Predicate is conjunction, where both allowed and rejected sets are not empty. - NodeRawConstPtrs new_children = std::move(conjunction.rejected); + NodeRawConstPtrs new_children = std::move(rejected_conjunctions); if (new_children.size() == 1 && new_children.front()->result_type->equals(*predicate->result_type)) { @@ -2403,13 +2612,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( std::unordered_set used_inputs; for (const auto * input : inputs) { - if (can_remove_filter && input == predicate) + if (removes_filter && input == predicate) continue; used_inputs.insert(input); } removeUnusedActions(used_inputs); - return actions; } static bool isColumnSortingPreserved(const ActionsDAG::Node * start_node, const String & sorted_column) @@ -2557,8 +2765,11 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( auto input_node_it = node_name_to_input_node_column.find(node->result_name); if (input_node_it != node_name_to_input_node_column.end()) { - result_node = &result_dag->addInput(input_node_it->second); - node_to_result_node.emplace(node, result_node); + auto & result_input = result_inputs[input_node_it->second.name]; + if (!result_input) + result_input = &result_dag->addInput(input_node_it->second); + + node_to_result_node.emplace(node, result_input); nodes_to_process.pop_back(); continue; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 469fe9ea7f17..a8a377866d3a 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -372,12 +372,46 @@ class ActionsDAG /// columns will be transformed like `x, y, z` -> `z > 0, z, x, y` -(remove filter)-> `z, x, y`. /// To avoid it, add inputs from `all_inputs` list, /// so actions `x, y, z -> z > 0, x, y, z` -(remove filter)-> `x, y, z` will not change columns order. - ActionsDAGPtr cloneActionsForFilterPushDown( + ActionsDAGPtr splitActionsForFilterPushDown( const std::string & filter_name, - bool can_remove_filter, + bool removes_filter, const Names & available_inputs, const ColumnsWithTypeAndName & all_inputs); + struct ActionsForJOINFilterPushDown + { + ActionsDAGPtr left_stream_filter_to_push_down; + bool left_stream_filter_removes_filter; + ActionsDAGPtr right_stream_filter_to_push_down; + bool right_stream_filter_removes_filter; + }; + + /** Split actions for JOIN filter push down. + * + * @param filter_name - name of filter node in current DAG. + * @param removes_filter - if filter is removed after it is applied. + * @param left_stream_available_columns_to_push_down - columns from left stream that are safe to use in push down conditions + * to left stream. + * @param left_stream_header - left stream header. + * @param right_stream_available_columns_to_push_down - columns from right stream that are safe to use in push down conditions + * to right stream. + * @param right_stream_header - right stream header. + * @param equivalent_columns_to_push_down - columns from left and right streams that are safe to use in push down conditions + * to left and right streams. + * @param equivalent_left_stream_column_to_right_stream_column - equivalent left stream column name to right stream column map. + * @param equivalent_right_stream_column_to_left_stream_column - equivalent right stream column name to left stream column map. + */ + ActionsForJOINFilterPushDown splitActionsForJOINFilterPushDown( + const std::string & filter_name, + bool removes_filter, + const Names & left_stream_available_columns_to_push_down, + const Block & left_stream_header, + const Names & right_stream_available_columns_to_push_down, + const Block & right_stream_header, + const Names & equivalent_columns_to_push_down, + const std::unordered_map & equivalent_left_stream_column_to_right_stream_column, + const std::unordered_map & equivalent_right_stream_column_to_left_stream_column); + bool isSortingPreserved(const Block & input_header, const SortDescription & sort_description, const String & ignore_output_column = "") const; @@ -429,7 +463,9 @@ class ActionsDAG void compileFunctions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif - static ActionsDAGPtr cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); + static ActionsDAGPtr createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); + + void removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions, Node * predicate, bool removes_filter); }; class FindOriginalNodeForOutputName diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 16e2449206d0..504b72575630 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1071,7 +1071,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto current_context = data.getContext(); - if (UserDefinedExecutableFunctionFactory::instance().has(node.name, current_context)) + if (UserDefinedExecutableFunctionFactory::instance().has(node.name, current_context)) /// NOLINT(readability-static-accessed-through-instance) { Array parameters; if (node.parameters) @@ -1087,7 +1087,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } } - function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(node.name, current_context, parameters); + function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(node.name, current_context, parameters); /// NOLINT(readability-static-accessed-through-instance) } if (!function_builder) @@ -1130,12 +1130,11 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & const auto * query_parameter = child->as(); if (function && function->name == "lambda") { - /// If the argument is a lambda expression, just remember its approximate type. - if (function->arguments->children.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "lambda requires two arguments"); + if (!isASTLambdaFunction(*function)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Lambda function definition expects two arguments, first argument must be a tuple of arguments"); + /// If the argument is a lambda expression, just remember its approximate type. const auto * lambda_args_tuple = function->arguments->children.at(0)->as(); - if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception(ErrorCodes::TYPE_MISMATCH, "First argument of lambda must be a tuple"); diff --git a/src/Interpreters/AggregateDescription.cpp b/src/Interpreters/AggregateDescription.cpp index 787e0a503f85..d4c09995b56e 100644 --- a/src/Interpreters/AggregateDescription.cpp +++ b/src/Interpreters/AggregateDescription.cpp @@ -1,7 +1,7 @@ +#include +#include #include #include -#include - #include diff --git a/src/Interpreters/AggregateDescription.h b/src/Interpreters/AggregateDescription.h index 8c3302a8b0b3..0f1c0ce67ae6 100644 --- a/src/Interpreters/AggregateDescription.h +++ b/src/Interpreters/AggregateDescription.h @@ -1,13 +1,16 @@ #pragma once -#include +#include #include +#include #include #include namespace DB { +class WriteBuffer; + namespace JSONBuilder { class JSONMap; } struct AggregateDescription diff --git a/src/Interpreters/AggregatedData.h b/src/Interpreters/AggregatedData.h index 6cd6b190801e..4b581c682cab 100644 --- a/src/Interpreters/AggregatedData.h +++ b/src/Interpreters/AggregatedData.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include diff --git a/src/Interpreters/AggregatedDataVariants.h b/src/Interpreters/AggregatedDataVariants.h index 8b82c5d9842b..9f7185db9fca 100644 --- a/src/Interpreters/AggregatedDataVariants.h +++ b/src/Interpreters/AggregatedDataVariants.h @@ -1,11 +1,12 @@ #pragma once -#include -#include -#include #include #include #include +#include +#include + + namespace DB { class Arena; diff --git a/src/Interpreters/AggregationUtils.cpp b/src/Interpreters/AggregationUtils.cpp index 125a9e4f6b83..132ce93786aa 100644 --- a/src/Interpreters/AggregationUtils.cpp +++ b/src/Interpreters/AggregationUtils.cpp @@ -1,3 +1,4 @@ +#include #include namespace DB diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index ab8cec864ae5..2db07bb77f66 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -7,38 +7,37 @@ # include #endif -#include +#include +#include +#include +#include +#include #include -#include #include -#include -#include +#include +#include #include -#include -#include -#include -#include +#include #include -#include +#include +#include #include -#include +#include #include -#include -#include -#include -#include +#include +#include #include -#include -#include #include -#include -#include +#include #include +#include +#include +#include +#include +#include #include - -#include - -#include +#include +#include namespace ProfileEvents @@ -1057,7 +1056,7 @@ void NO_INLINE Aggregator::executeImplBatch( /// During processing of row #i we will prefetch HashTable cell for row #(i + prefetch_look_ahead). PrefetchingHelper prefetching; - size_t prefetch_look_ahead = prefetching.getInitialLookAheadValue(); + size_t prefetch_look_ahead = PrefetchingHelper::getInitialLookAheadValue(); /// Optimization for special case when there are no aggregate functions. if (params.aggregates_size == 0) @@ -1078,7 +1077,7 @@ void NO_INLINE Aggregator::executeImplBatch( { if constexpr (prefetch && HasPrefetchMemberFunc) { - if (i == row_begin + prefetching.iterationsToMeasure()) + if (i == row_begin + PrefetchingHelper::iterationsToMeasure()) prefetch_look_ahead = prefetching.calcPrefetchLookAhead(); if (i + prefetch_look_ahead < row_end) @@ -1164,7 +1163,7 @@ void NO_INLINE Aggregator::executeImplBatch( if constexpr (prefetch && HasPrefetchMemberFunc) { - if (i == key_start + prefetching.iterationsToMeasure()) + if (i == key_start + PrefetchingHelper::iterationsToMeasure()) prefetch_look_ahead = prefetching.calcPrefetchLookAhead(); if (i + prefetch_look_ahead < row_end) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index c05d1b8f979b..6b9ca34c2d7f 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -294,7 +294,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); - auto sample_block = interpreter.getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); + auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); if (!FormatFactory::instance().isInputFormat(insert_query.format)) throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format); diff --git a/src/Interpreters/BlobStorageLog.cpp b/src/Interpreters/BlobStorageLog.cpp index f9d5b0d6790b..0324ef8713cf 100644 --- a/src/Interpreters/BlobStorageLog.cpp +++ b/src/Interpreters/BlobStorageLog.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -26,6 +27,7 @@ ColumnsDescription BlobStorageLogElement::getColumnsDescription() return ColumnsDescription { + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, {"event_date", std::make_shared(), "Date of the event."}, {"event_time", std::make_shared(), "Time of the event."}, {"event_time_microseconds", std::make_shared(6), "Time of the event with microseconds precision."}, @@ -51,6 +53,7 @@ void BlobStorageLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; auto event_time_seconds = timeInSeconds(event_time); + columns[i++]->insert(getFQDNOrHostName()); columns[i++]->insert(DateLUT::instance().toDayNum(event_time_seconds).toUnderType()); columns[i++]->insert(event_time_seconds); columns[i++]->insert(Decimal64(timeInMicroseconds(event_time))); diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index f21c5f3a5085..d20ae77d7206 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -17,6 +17,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +EvictionCandidates::EvictionCandidates() + : log(getLogger("EvictionCandidates")) +{ +} + EvictionCandidates::~EvictionCandidates() { /// Here `queue_entries_to_invalidate` contains queue entries @@ -31,6 +36,10 @@ EvictionCandidates::~EvictionCandidates() iterator->invalidate(); } + /// We cannot reset evicting flag if we already removed queue entries. + if (removed_queue_entries) + return; + /// Here `candidates` contain only those file segments /// which failed to be removed during evict() /// because there was some exception before evict() @@ -58,13 +67,37 @@ void EvictionCandidates::add( ++candidates_size; } +void EvictionCandidates::removeQueueEntries(const CachePriorityGuard::Lock & lock) +{ + /// Remove queue entries of eviction candidates. + /// This will release space we consider to be hold for them. + + LOG_TEST(log, "Will remove {} eviction candidates", size()); + + for (const auto & [key, key_candidates] : candidates) + { + for (const auto & candidate : key_candidates.candidates) + { + auto queue_iterator = candidate->getQueueIterator(); + queue_iterator->invalidate(); + + candidate->file_segment->resetQueueIterator(); + queue_iterator->remove(lock); + } + } + removed_queue_entries = true; +} + void EvictionCandidates::evict() { if (candidates.empty()) return; auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds); - queue_entries_to_invalidate.reserve(candidates_size); + + /// If queue entries are already removed, then nothing to invalidate. + if (!removed_queue_entries) + queue_entries_to_invalidate.reserve(candidates_size); for (auto & [key, key_candidates] : candidates) { @@ -80,10 +113,14 @@ void EvictionCandidates::evict() { auto & candidate = key_candidates.candidates.back(); chassert(candidate->releasable()); - const auto segment = candidate->file_segment; - auto iterator = segment->getQueueIterator(); - chassert(iterator); + + IFileCachePriority::IteratorPtr iterator; + if (!removed_queue_entries) + { + iterator = segment->getQueueIterator(); + chassert(iterator); + } ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size()); @@ -112,7 +149,9 @@ void EvictionCandidates::evict() /// it was freed in favour of some reserver, so we can make it visibly /// free only for that particular reserver. - queue_entries_to_invalidate.push_back(iterator); + if (iterator) + queue_entries_to_invalidate.push_back(iterator); + key_candidates.candidates.pop_back(); } } @@ -154,6 +193,12 @@ void EvictionCandidates::finalize( on_finalize.clear(); } +bool EvictionCandidates::needFinalize() const +{ + /// Do we need to call finalize()? + return !on_finalize.empty() || !queue_entries_to_invalidate.empty(); +} + void EvictionCandidates::setSpaceHolder( size_t size, size_t elements, diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h index 2745d508a5d0..0dcc6bc0dda5 100644 --- a/src/Interpreters/Cache/EvictionCandidates.h +++ b/src/Interpreters/Cache/EvictionCandidates.h @@ -4,11 +4,12 @@ namespace DB { -class EvictionCandidates +class EvictionCandidates : private boost::noncopyable { public: using FinalizeEvictionFunc = std::function; + EvictionCandidates(); ~EvictionCandidates(); void add( @@ -18,12 +19,16 @@ class EvictionCandidates void evict(); + void removeQueueEntries(const CachePriorityGuard::Lock &); + void onFinalize(FinalizeEvictionFunc && func) { on_finalize.emplace_back(std::move(func)); } void finalize( FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock &); + bool needFinalize() const; + size_t size() const { return candidates_size; } auto begin() const { return candidates.begin(); } @@ -47,8 +52,13 @@ class EvictionCandidates size_t candidates_size = 0; std::vector on_finalize; + std::vector queue_entries_to_invalidate; + bool removed_queue_entries = false; + IFileCachePriority::HoldSpacePtr hold_space; + + LoggerPtr log; }; using EvictionCandidatesPtr = std::unique_ptr; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 71dc0cca3a74..be452e43bedf 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -90,9 +90,15 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s , metadata(settings.base_path, settings.background_download_queue_size_limit, settings.background_download_threads, write_cache_per_user_directory) { if (settings.cache_policy == "LRU") - main_priority = std::make_unique(settings.max_size, settings.max_elements); + { + main_priority = std::make_unique( + settings.max_size, settings.max_elements, nullptr, cache_name); + } else if (settings.cache_policy == "SLRU") - main_priority = std::make_unique(settings.max_size, settings.max_elements, settings.slru_size_ratio); + { + main_priority = std::make_unique( + settings.max_size, settings.max_elements, settings.slru_size_ratio, nullptr, nullptr, cache_name); + } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown cache policy: {}", settings.cache_policy); @@ -180,6 +186,7 @@ void FileCache::initialize() } metadata.startup(); + is_initialized = true; } @@ -1378,37 +1385,86 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, } } - if (new_settings.max_size != actual_settings.max_size || new_settings.max_elements != actual_settings.max_elements) { - cache_is_being_resized.store(true, std::memory_order_relaxed); - SCOPE_EXIT({ - cache_is_being_resized.store(false, std::memory_order_relaxed); - }); - - auto cache_lock = lockCache(); - bool updated = false; - try + EvictionCandidates eviction_candidates; + bool modified_size_limit = false; + + /// In order to not block cache for the duration of cache resize, + /// we do: + /// a. Take a cache lock. + /// 1. Collect eviction candidates, + /// 2. Remove queue entries of eviction candidates. + /// This will release space we consider to be hold for them, + /// so that we can safely modify size limits. + /// 3. Modify size limits of cache. + /// b. Release a cache lock. + /// 1. Do actual eviction from filesystem. { - updated = main_priority->modifySizeLimits( - new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock); - } - catch (...) - { - actual_settings.max_size = main_priority->getSizeLimit(cache_lock); - actual_settings.max_elements = main_priority->getElementsLimit(cache_lock); - throw; + cache_is_being_resized.store(true, std::memory_order_relaxed); + SCOPE_EXIT({ + cache_is_being_resized.store(false, std::memory_order_relaxed); + }); + + auto cache_lock = lockCache(); + + FileCacheReserveStat stat; + if (main_priority->collectCandidatesForEviction( + new_settings.max_size, new_settings.max_elements, 0/* max_candidates_to_evict */, + stat, eviction_candidates, cache_lock)) + { + /// Remove only queue entries of eviction candidates. + eviction_candidates.removeQueueEntries(cache_lock); + /// Note that (in-memory) metadata about corresponding file segments + /// (e.g. file segment info in CacheMetadata) will be removed + /// only after eviction from filesystem. This is needed to avoid + /// a race on removal of file from filesystsem and + /// addition of the same file as part of a newly cached file segment. + + /// Modify cache size limits. + /// From this point cache eviction will follow them. + main_priority->modifySizeLimits( + new_settings.max_size, new_settings.max_elements, + new_settings.slru_size_ratio, cache_lock); + + modified_size_limit = true; + } } - if (updated) + if (modified_size_limit) { + try + { + /// Do actual eviction from filesystem. + eviction_candidates.evict(); + } + catch (...) + { + if (eviction_candidates.needFinalize()) + eviction_candidates.finalize(nullptr, lockCache()); + throw; + } + + if (eviction_candidates.needFinalize()) + eviction_candidates.finalize(nullptr, lockCache()); + LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}", actual_settings.max_size, new_settings.max_size, actual_settings.max_elements, new_settings.max_elements); - actual_settings.max_size = main_priority->getSizeLimit(cache_lock); - actual_settings.max_elements = main_priority->getElementsLimit(cache_lock); + actual_settings.max_size = new_settings.max_size; + actual_settings.max_elements = new_settings.max_elements; + } + else + { + LOG_WARNING( + log, "Unable to modify size limit from {} to {}, elements limit from {} to {}. " + "`max_size` and `max_elements` settings will remain inconsistent with config.xml. " + "Next attempt to update them will happen on the next config reload. " + "You can trigger it with SYSTEM RELOAD CONFIG.", + actual_settings.max_size, new_settings.max_size, + actual_settings.max_elements, new_settings.max_elements); } } diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 4ee456cce728..684ca83fff89 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -18,6 +18,7 @@ #include #include #include +#include #include diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index e05e1935d952..a7a5834f03d7 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -155,7 +155,17 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig FileCacheSettings old_settings = cache_info->getSettings(); if (old_settings == new_settings) + { continue; + } + + /// FIXME: registerDiskCache modifies `path` setting of FileCacheSettings if path is relative. + /// This can lead to calling applySettingsIfPossible even though nothing changed, which is avoidable. + + // LOG_TRACE(log, "Will apply settings changes for cache {}. " + // "Settings changes: {} (new settings: {}, old_settings: {})", + // cache_name, fmt::join(new_settings.getSettingsDiff(old_settings), ", "), + // new_settings.toString(), old_settings.toString()); try { @@ -166,6 +176,7 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig /// Settings changes could be partially applied in case of exception, /// make sure cache_info->settings show correct state of applied settings. cache_info->setSettings(old_settings); + tryLogCurrentException(__PRETTY_FUNCTION__); throw; } diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index 8a48a2de68fa..ff5f48503b7e 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -98,4 +99,61 @@ void FileCacheSettings::loadFromCollection(const NamedCollection & collection) loadImpl(std::move(collection_has), std::move(collection_get_uint), std::move(collection_get_string), std::move(collection_get_double)); } +std::string FileCacheSettings::toString() const +{ + WriteBufferFromOwnString res; + res << "base_path: " << base_path << ", "; + res << "max_size: " << max_size << ", "; + res << "max_elements: " << max_elements << ", "; + res << "max_file_segment_size: " << max_file_segment_size << ", "; + res << "cache_on_write_operations: " << cache_on_write_operations << ", "; + res << "cache_hits_threshold: " << cache_hits_threshold << ", "; + res << "enable_filesystem_query_cache_limit: " << enable_filesystem_query_cache_limit << ", "; + res << "bypass_cache_threshold: " << bypass_cache_threshold << ", "; + res << "boundary_alignment: " << boundary_alignment << ", "; + res << "background_download_threads: " << background_download_threads << ", "; + res << "background_download_queue_size_limit: " << background_download_queue_size_limit << ", "; + res << "load_metadata_threads: " << load_metadata_threads << ", "; + res << "write_cache_per_user_id_directory: " << write_cache_per_user_id_directory << ", "; + res << "cache_policy: " << cache_policy << ", "; + res << "slru_size_ratio: " << slru_size_ratio << ", "; + return res.str(); +} + +std::vector FileCacheSettings::getSettingsDiff(const FileCacheSettings & other) const +{ + std::vector res; + if (base_path != other.base_path) + res.push_back("base_path"); + if (max_size != other.max_size) + res.push_back("max_size"); + if (max_elements != other.max_elements) + res.push_back("max_elements"); + if (max_file_segment_size != other.max_file_segment_size) + res.push_back("max_file_segment_size"); + if (cache_on_write_operations != other.cache_on_write_operations) + res.push_back("cache_on_write_operations"); + if (cache_hits_threshold != other.cache_hits_threshold) + res.push_back("cache_hits_threshold"); + if (enable_filesystem_query_cache_limit != other.enable_filesystem_query_cache_limit) + res.push_back("enable_filesystem_query_cache_limit"); + if (bypass_cache_threshold != other.bypass_cache_threshold) + res.push_back("bypass_cache_threshold"); + if (boundary_alignment != other.boundary_alignment) + res.push_back("boundary_alignment"); + if (background_download_threads != other.background_download_threads) + res.push_back("background_download_threads"); + if (background_download_queue_size_limit != other.background_download_queue_size_limit) + res.push_back("background_download_queue_size_limit"); + if (load_metadata_threads != other.load_metadata_threads) + res.push_back("load_metadata_threads"); + if (write_cache_per_user_id_directory != other.write_cache_per_user_id_directory) + res.push_back("write_cache_per_user_directory"); + if (cache_policy != other.cache_policy) + res.push_back("cache_policy"); + if (slru_size_ratio != other.slru_size_ratio) + res.push_back("slru_size_ratio"); + return res; +} + } diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index 14770b3f0054..7dab14ac8960 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -41,6 +41,9 @@ struct FileCacheSettings void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); void loadFromCollection(const NamedCollection & collection); + std::string toString() const; + std::vector getSettingsDiff(const FileCacheSettings & other) const; + bool operator ==(const FileCacheSettings &) const = default; private: diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index e474e24c6f1f..20a3af60c8cf 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -114,7 +114,7 @@ FileSegment::Range::Range(size_t left_, size_t right_) : left(left_), right(righ FileSegment::State FileSegment::state() const { - auto lock = lockFileSegment(); + auto lk = lock(); return download_state; } @@ -131,7 +131,7 @@ String FileSegment::tryGetPath() const return metadata->getFileSegmentPath(*this); } -FileSegmentGuard::Lock FileSegment::lockFileSegment() const +FileSegmentGuard::Lock FileSegment::lock() const { ProfileEventTimeIncrement watch(ProfileEvents::FileSegmentLockMicroseconds); return segment_guard.lock(); @@ -153,24 +153,30 @@ void FileSegment::setDownloadState(State state, const FileSegmentGuard::Lock & l size_t FileSegment::getReservedSize() const { - auto lock = lockFileSegment(); + auto lk = lock(); return reserved_size; } FileSegment::Priority::IteratorPtr FileSegment::getQueueIterator() const { - auto lock = lockFileSegment(); + auto lk = lock(); return queue_iterator; } void FileSegment::setQueueIterator(Priority::IteratorPtr iterator) { - auto lock = lockFileSegment(); + auto lk = lock(); if (queue_iterator) throw Exception(ErrorCodes::LOGICAL_ERROR, "Queue iterator cannot be set twice"); queue_iterator = iterator; } +void FileSegment::resetQueueIterator() +{ + auto lk = lock(); + queue_iterator.reset(); +} + size_t FileSegment::getCurrentWriteOffset() const { return range().left + downloaded_size; @@ -183,14 +189,14 @@ size_t FileSegment::getDownloadedSize() const void FileSegment::setDownloadedSize(size_t delta) { - auto lock = lockFileSegment(); + auto lk = lock(); downloaded_size += delta; assert(downloaded_size == std::filesystem::file_size(getPath())); } bool FileSegment::isDownloaded() const { - auto lock = lockFileSegment(); + auto lk = lock(); return download_state == State::DOWNLOADED; } @@ -204,8 +210,7 @@ String FileSegment::getCallerId() String FileSegment::getDownloader() const { - auto lock = lockFileSegment(); - return getDownloaderUnlocked(lock); + return getDownloaderUnlocked(lock()); } String FileSegment::getDownloaderUnlocked(const FileSegmentGuard::Lock &) const @@ -215,11 +220,11 @@ String FileSegment::getDownloaderUnlocked(const FileSegmentGuard::Lock &) const String FileSegment::getOrSetDownloader() { - auto lock = lockFileSegment(); + auto lk = lock(); - assertNotDetachedUnlocked(lock); + assertNotDetachedUnlocked(lk); - auto current_downloader = getDownloaderUnlocked(lock); + auto current_downloader = getDownloaderUnlocked(lk); if (current_downloader.empty()) { @@ -229,7 +234,7 @@ String FileSegment::getOrSetDownloader() return "notAllowed:" + stateToString(download_state); current_downloader = downloader_id = caller_id; - setDownloadState(State::DOWNLOADING, lock); + setDownloadState(State::DOWNLOADING, lk); chassert(key_metadata.lock()); } @@ -253,15 +258,15 @@ void FileSegment::resetDownloadingStateUnlocked(const FileSegmentGuard::Lock & l void FileSegment::resetDownloader() { - auto lock = lockFileSegment(); + auto lk = lock(); SCOPE_EXIT({ cv.notify_all(); }); - assertNotDetachedUnlocked(lock); - assertIsDownloaderUnlocked("resetDownloader", lock); + assertNotDetachedUnlocked(lk); + assertIsDownloaderUnlocked("resetDownloader", lk); - resetDownloadingStateUnlocked(lock); - resetDownloaderUnlocked(lock); + resetDownloadingStateUnlocked(lk); + resetDownloaderUnlocked(lk); } void FileSegment::resetDownloaderUnlocked(const FileSegmentGuard::Lock &) @@ -290,8 +295,8 @@ void FileSegment::assertIsDownloaderUnlocked(const std::string & operation, cons bool FileSegment::isDownloader() const { - auto lock = lockFileSegment(); - return isDownloaderUnlocked(lock); + auto lk = lock(); + return isDownloaderUnlocked(lk); } bool FileSegment::isDownloaderUnlocked(const FileSegmentGuard::Lock & lock) const @@ -301,21 +306,21 @@ bool FileSegment::isDownloaderUnlocked(const FileSegmentGuard::Lock & lock) cons FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader() { - auto lock = lockFileSegment(); - assertIsDownloaderUnlocked("getRemoteFileReader", lock); + auto lk = lock(); + assertIsDownloaderUnlocked("getRemoteFileReader", lk); return remote_file_reader; } void FileSegment::resetRemoteFileReader() { - auto lock = lockFileSegment(); - assertIsDownloaderUnlocked("resetRemoteFileReader", lock); + auto lk = lock(); + assertIsDownloaderUnlocked("resetRemoteFileReader", lk); remote_file_reader.reset(); } FileSegment::RemoteFileReaderPtr FileSegment::extractRemoteFileReader() { - auto lock = lockFileSegment(); + auto lk = lock(); if (remote_file_reader && (download_state == State::DOWNLOADED || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)) { @@ -326,8 +331,8 @@ FileSegment::RemoteFileReaderPtr FileSegment::extractRemoteFileReader() void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_) { - auto lock = lockFileSegment(); - assertIsDownloaderUnlocked("setRemoteFileReader", lock); + auto lk = lock(); + assertIsDownloaderUnlocked("setRemoteFileReader", lk); if (remote_file_reader) throw Exception(ErrorCodes::LOGICAL_ERROR, "Remote file reader already exists"); @@ -343,9 +348,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset) throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed"); { - auto lock = lockFileSegment(); - assertIsDownloaderUnlocked("write", lock); - assertNotDetachedUnlocked(lock); + auto lk = lock(); + assertIsDownloaderUnlocked("write", lk); + assertNotDetachedUnlocked(lk); } const auto file_segment_path = getPath(); @@ -404,10 +409,10 @@ void FileSegment::write(const char * from, size_t size, size_t offset) const int code = e.getErrno(); const bool is_no_space_left_error = code == /* No space left on device */28 || code == /* Quota exceeded */122; - auto lock = lockFileSegment(); + auto lk = lock(); - e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lock))); - setDownloadFailedUnlocked(lock); + e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lk))); + setDownloadFailedUnlocked(lk); if (downloaded_size == 0 && fs::exists(file_segment_path)) { @@ -430,9 +435,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset) } catch (Exception & e) { - auto lock = lockFileSegment(); - e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lock))); - setDownloadFailedUnlocked(lock); + auto lk = lock(); + e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lk))); + setDownloadFailedUnlocked(lk); throw; } @@ -445,7 +450,7 @@ FileSegment::State FileSegment::wait(size_t offset) span.addAttribute("clickhouse.key", key().toString()); span.addAttribute("clickhouse.offset", offset); - auto lock = lockFileSegment(); + auto lk = lock(); if (downloader_id.empty() || offset < getCurrentWriteOffset()) return download_state; @@ -458,10 +463,10 @@ FileSegment::State FileSegment::wait(size_t offset) LOG_TEST(log, "{} waiting on: {}, current downloader: {}", getCallerId(), range().toString(), downloader_id); ProfileEventTimeIncrement watch(ProfileEvents::FileSegmentWaitMicroseconds); - chassert(!getDownloaderUnlocked(lock).empty()); - chassert(!isDownloaderUnlocked(lock)); + chassert(!getDownloaderUnlocked(lk).empty()); + chassert(!isDownloaderUnlocked(lk)); - [[maybe_unused]] const auto ok = cv.wait_for(lock, std::chrono::seconds(60), [&, this]() + [[maybe_unused]] const auto ok = cv.wait_for(lk, std::chrono::seconds(60), [&, this]() { return download_state != State::DOWNLOADING || offset < getCurrentWriteOffset(); }); @@ -507,10 +512,10 @@ bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milli bool is_file_segment_size_exceeded; { - auto lock = lockFileSegment(); + auto lk = lock(); - assertNotDetachedUnlocked(lock); - assertIsDownloaderUnlocked("reserve", lock); + assertNotDetachedUnlocked(lk); + assertIsDownloaderUnlocked("reserve", lk); expected_downloaded_size = getDownloadedSize(); @@ -553,7 +558,7 @@ bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milli bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds); if (!reserved) - setDownloadFailedUnlocked(lockFileSegment()); + setDownloadFailedUnlocked(lock()); return reserved; } @@ -578,8 +583,8 @@ void FileSegment::setDownloadedUnlocked(const FileSegmentGuard::Lock &) void FileSegment::setDownloadFailed() { - auto lock = lockFileSegment(); - setDownloadFailedUnlocked(lock); + auto lk = lock(); + setDownloadFailedUnlocked(lk); } void FileSegment::setDownloadFailedUnlocked(const FileSegmentGuard::Lock & lock) @@ -601,22 +606,22 @@ void FileSegment::setDownloadFailedUnlocked(const FileSegmentGuard::Lock & lock) void FileSegment::completePartAndResetDownloader() { - auto lock = lockFileSegment(); + auto lk = lock(); SCOPE_EXIT({ cv.notify_all(); }); - assertNotDetachedUnlocked(lock); - assertIsDownloaderUnlocked("completePartAndResetDownloader", lock); + assertNotDetachedUnlocked(lk); + assertIsDownloaderUnlocked("completePartAndResetDownloader", lk); chassert(download_state == State::DOWNLOADING || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); if (download_state == State::DOWNLOADING) - resetDownloadingStateUnlocked(lock); + resetDownloadingStateUnlocked(lk); - resetDownloaderUnlocked(lock); + resetDownloaderUnlocked(lk); - LOG_TEST(log, "Complete batch. ({})", getInfoForLogUnlocked(lock)); + LOG_TEST(log, "Complete batch. ({})", getInfoForLogUnlocked(lk)); } void FileSegment::complete() @@ -636,7 +641,7 @@ void FileSegment::complete() throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot complete file segment: {}", getInfoForLog()); } - auto segment_lock = lockFileSegment(); + auto segment_lock = lock(); if (isCompleted(false)) return; @@ -752,8 +757,8 @@ void FileSegment::complete() String FileSegment::getInfoForLog() const { - auto lock = lockFileSegment(); - return getInfoForLogUnlocked(lock); + auto lk = lock(); + return getInfoForLogUnlocked(lk); } String FileSegment::getInfoForLogUnlocked(const FileSegmentGuard::Lock &) const @@ -795,7 +800,7 @@ String FileSegment::stateToString(FileSegment::State state) bool FileSegment::assertCorrectness() const { - return assertCorrectnessUnlocked(lockFileSegment()); + return assertCorrectnessUnlocked(lock()); } bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) const @@ -841,7 +846,6 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) chassert(downloaded_size == range().size()); chassert(downloaded_size > 0); chassert(std::filesystem::file_size(getPath()) > 0); - chassert(queue_iterator); check_iterator(queue_iterator); } else @@ -865,8 +869,8 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) void FileSegment::assertNotDetached() const { - auto lock = lockFileSegment(); - assertNotDetachedUnlocked(lock); + auto lk = lock(); + assertNotDetachedUnlocked(lk); } void FileSegment::assertNotDetachedUnlocked(const FileSegmentGuard::Lock & lock) const @@ -883,7 +887,7 @@ void FileSegment::assertNotDetachedUnlocked(const FileSegmentGuard::Lock & lock) FileSegment::Info FileSegment::getInfo(const FileSegmentPtr & file_segment) { - auto lock = file_segment->lockFileSegment(); + auto lock = file_segment->lock(); auto key_metadata = file_segment->tryGetKeyMetadata(); return Info{ .key = file_segment->key(), @@ -906,7 +910,7 @@ FileSegment::Info FileSegment::getInfo(const FileSegmentPtr & file_segment) bool FileSegment::isDetached() const { - auto lock = lockFileSegment(); + auto lk = lock(); return download_state == State::DETACHED; } @@ -922,7 +926,7 @@ bool FileSegment::isCompleted(bool sync) const if (is_completed_state()) return true; - auto lock = lockFileSegment(); + auto lk = lock(); return is_completed_state(); } diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index c34ee064345a..7793c50d2d5d 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -171,12 +171,14 @@ friend class FileCache; /// Because of reserved_size in tryReserve(). * ========== Methods used by `cache` ======================== */ - FileSegmentGuard::Lock lock() const { return segment_guard.lock(); } + FileSegmentGuard::Lock lock() const; Priority::IteratorPtr getQueueIterator() const; void setQueueIterator(Priority::IteratorPtr iterator); + void resetQueueIterator(); + KeyMetadataPtr tryGetKeyMetadata() const; KeyMetadataPtr getKeyMetadata() const; @@ -241,7 +243,6 @@ friend class FileCache; /// Because of reserved_size in tryReserve(). bool assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) const; LockedKeyPtr lockKeyMetadata(bool assert_exists = true) const; - FileSegmentGuard::Lock lockFileSegment() const; String tryGetPath() const; diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index ff06f17ce36d..a727aab68eea 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -146,7 +146,20 @@ class IFileCachePriority : private boost::noncopyable const UserID & user_id, const CachePriorityGuard::Lock &) = 0; - virtual bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock &) = 0; + /// Collect eviction `candidates_num` candidates for eviction. + virtual bool collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + EvictionCandidates & candidates, + const CachePriorityGuard::Lock &) = 0; + + virtual bool modifySizeLimits( + size_t max_size_, + size_t max_elements_, + double size_ratio_, + const CachePriorityGuard::Lock &) = 0; /// A space holder implementation, which allows to take hold of /// some space in cache given that this space was freed. @@ -192,8 +205,8 @@ class IFileCachePriority : private boost::noncopyable virtual void releaseImpl(size_t /* size */, size_t /* elements */) {} - size_t max_size = 0; - size_t max_elements = 0; + std::atomic max_size = 0; + std::atomic max_elements = 0; }; } diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index ddc307554092..1a2040f9ed2b 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -17,9 +17,6 @@ namespace ProfileEvents { extern const Event FilesystemCacheEvictionSkippedFileSegments; extern const Event FilesystemCacheEvictionTries; - extern const Event FilesystemCacheEvictMicroseconds; - extern const Event FilesystemCacheEvictedBytes; - extern const Event FilesystemCacheEvictedFileSegments; extern const Event FilesystemCacheEvictionSkippedEvictingFileSegments; } @@ -126,6 +123,9 @@ void LRUFileCachePriority::updateSize(int64_t size) chassert(size != 0); chassert(size > 0 || state->current_size >= size_t(-size)); + LOG_TEST(log, "Updating size with {}, current is {}", + size, state->current_size); + state->current_size += size; CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); } @@ -254,10 +254,14 @@ bool LRUFileCachePriority::canFit( size_t elements, size_t released_size_assumption, size_t released_elements_assumption, - const CachePriorityGuard::Lock &) const + const CachePriorityGuard::Lock &, + const size_t * max_size_, + const size_t * max_elements_) const { - return (max_size == 0 || state->current_size + size - released_size_assumption <= max_size) - && (max_elements == 0 || state->current_elements_num + elements - released_elements_assumption <= max_elements); + return (max_size == 0 + || (state->current_size + size - released_size_assumption <= (max_size_ ? *max_size_ : max_size.load()))) + && (max_elements == 0 + || state->current_elements_num + elements - released_elements_assumption <= (max_elements_ ? *max_elements_ : max_elements.load())); } bool LRUFileCachePriority::collectCandidatesForEviction( @@ -274,6 +278,74 @@ bool LRUFileCachePriority::collectCandidatesForEviction( return true; } + auto can_fit = [&] + { + return canFit(size, elements, stat.total_stat.releasable_size, stat.total_stat.releasable_count, lock); + }; + + iterateForEviction(res, stat, can_fit, lock); + if (can_fit()) + { + /// `res` contains eviction candidates. Do we have any? + if (res.size() > 0) + { + /// As eviction is done without a cache priority lock, + /// then if some space was partially available and some needed + /// to be freed via eviction, we need to make sure that this + /// partially available space is still available + /// after we finish with eviction for non-available space. + /// So we create a space holder for the currently available part + /// of the required space for the duration of eviction of the other + /// currently non-available part of the space. + + const size_t hold_size = size > stat.total_stat.releasable_size + ? size - stat.total_stat.releasable_size + : 0; + + const size_t hold_elements = elements > stat.total_stat.releasable_count + ? elements - stat.total_stat.releasable_count + : 0; + + if (hold_size || hold_elements) + res.setSpaceHolder(hold_size, hold_elements, *this, lock); + } + + // LOG_TEST(log, "Collected {} candidates for eviction (total size: {}). " + // "Took hold of size {} and elements {}", + // res.size(), stat.total_stat.releasable_size, hold_size, hold_elements); + + return true; + } + else + { + return false; + } +} + +bool LRUFileCachePriority::collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + EvictionCandidates & res, + const CachePriorityGuard::Lock & lock) +{ + auto stop_condition = [&, this]() + { + return canFit(0, 0, stat.total_stat.releasable_size, stat.total_stat.releasable_count, + lock, &desired_size, &desired_elements_count) + || (max_candidates_to_evict && res.size() >= max_candidates_to_evict); + }; + iterateForEviction(res, stat, stop_condition, lock); + return stop_condition(); +} + +void LRUFileCachePriority::iterateForEviction( + EvictionCandidates & res, + FileCacheReserveStat & stat, + StopConditionFunc stop_condition, + const CachePriorityGuard::Lock & lock) +{ ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries); IterateFunc iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) @@ -288,59 +360,23 @@ bool LRUFileCachePriority::collectCandidatesForEviction( } else { - stat.update(segment_metadata->size(), file_segment->getKind(), false); ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionSkippedFileSegments); + stat.update(segment_metadata->size(), file_segment->getKind(), false); } return IterationResult::CONTINUE; }; - auto can_fit = [&] - { - return canFit(size, elements, stat.total_stat.releasable_size, stat.total_stat.releasable_count, lock); - }; - iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) { - return can_fit() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); + return stop_condition() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); }, lock); - - if (can_fit()) - { - /// As eviction is done without a cache priority lock, - /// then if some space was partially available and some needed - /// to be freed via eviction, we need to make sure that this - /// partially available space is still available - /// after we finish with eviction for non-available space. - /// So we create a space holder for the currently available part - /// of the required space for the duration of eviction of the other - /// currently non-available part of the space. - - const size_t hold_size = size > stat.total_stat.releasable_size - ? size - stat.total_stat.releasable_size - : 0; - - const size_t hold_elements = elements > stat.total_stat.releasable_count - ? elements - stat.total_stat.releasable_count - : 0; - - if (hold_size || hold_elements) - res.setSpaceHolder(hold_size, hold_elements, *this, lock); - - // LOG_TEST(log, "Collected {} candidates for eviction (total size: {}). " - // "Took hold of size {} and elements {}", - // res.size(), stat.total_stat.releasable_size, hold_size, hold_elements); - - return true; - } - else - { - return false; - } } -LRUFileCachePriority::LRUIterator -LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CachePriorityGuard::Lock &) +LRUFileCachePriority::LRUIterator LRUFileCachePriority::move( + LRUIterator & it, + LRUFileCachePriority & other, + const CachePriorityGuard::Lock &) { const auto & entry = *it.getEntry(); if (entry.size == 0) @@ -384,50 +420,31 @@ IFileCachePriority::PriorityDumpPtr LRUFileCachePriority::dump(const CachePriori } bool LRUFileCachePriority::modifySizeLimits( - size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CachePriorityGuard::Lock & lock) + size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CachePriorityGuard::Lock &) { if (max_size == max_size_ && max_elements == max_elements_) return false; /// Nothing to change. - auto check_limits_satisfied = [&]() + if (state->current_size > max_size_ || state->current_elements_num > max_elements_) { - return (max_size_ == 0 || state->current_size <= max_size_) - && (max_elements_ == 0 || state->current_elements_num <= max_elements_); - }; - - if (check_limits_satisfied()) - { - max_size = max_size_; - max_elements = max_elements_; - return true; + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot modify size limits to {} in size and {} in elements: " + "not enough space freed. Current size: {}/{}, elements: {}/{}", + max_size_, max_elements_, + state->current_size, max_size, state->current_elements_num, max_elements); } - auto iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) - { - chassert(segment_metadata->file_segment->assertCorrectness()); - - if (!segment_metadata->releasable()) - return IterationResult::CONTINUE; - - auto segment = segment_metadata->file_segment; - locked_key.removeFileSegment(segment->offset(), segment->lock()); - - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->getDownloadedSize()); - return IterationResult::REMOVE_AND_CONTINUE; - }; - - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds); - iterate( - [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) - { return check_limits_satisfied() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); }, - lock); - max_size = max_size_; max_elements = max_elements_; return true; } +IFileCachePriority::EntryPtr LRUFileCachePriority::LRUIterator::getEntry() const +{ + assertValid(); + return *iterator; +} + void LRUFileCachePriority::LRUIterator::remove(const CachePriorityGuard::Lock & lock) { assertValid(); @@ -440,12 +457,15 @@ void LRUFileCachePriority::LRUIterator::invalidate() assertValid(); const auto & entry = *iterator; - LOG_TEST(cache_priority->log, - "Invalidating entry in LRU queue entry {}", entry->toString()); chassert(entry->size != 0); cache_priority->updateSize(-entry->size); cache_priority->updateElementsCount(-1); + + LOG_TEST(cache_priority->log, + "Invalidated entry in LRU queue {}: {}", + entry->toString(), cache_priority->getApproxStateInfoForLog()); + entry->size = 0; } @@ -522,6 +542,12 @@ std::string LRUFileCachePriority::getStateInfoForLog(const CachePriorityGuard::L getSize(lock), max_size, getElementsCount(lock), max_elements, description); } +std::string LRUFileCachePriority::getApproxStateInfoForLog() const +{ + return fmt::format("size: {}/{}, elements: {}/{} (description: {})", + getSizeApprox(), max_size, getElementsCountApprox(), max_elements, description); +} + void LRUFileCachePriority::holdImpl( size_t size, size_t elements, diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 31968d611966..d31a3fb0f109 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -62,6 +62,14 @@ class LRUFileCachePriority final : public IFileCachePriority const UserID & user_id, const CachePriorityGuard::Lock &) override; + bool collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + EvictionCandidates & res, + const CachePriorityGuard::Lock &) override; + void shuffle(const CachePriorityGuard::Lock &) override; struct LRUPriorityDump : public IPriorityDump @@ -94,7 +102,9 @@ class LRUFileCachePriority final : public IFileCachePriority size_t elements, size_t released_size_assumption, size_t released_elements_assumption, - const CachePriorityGuard::Lock &) const; + const CachePriorityGuard::Lock &, + const size_t * max_size_ = nullptr, + const size_t * max_elements_ = nullptr) const; LRUQueue::iterator remove(LRUQueue::iterator it, const CachePriorityGuard::Lock &); @@ -110,12 +120,20 @@ class LRUFileCachePriority final : public IFileCachePriority LRUIterator move(LRUIterator & it, LRUFileCachePriority & other, const CachePriorityGuard::Lock &); LRUIterator add(EntryPtr entry, const CachePriorityGuard::Lock &); + using StopConditionFunc = std::function; + void iterateForEviction( + EvictionCandidates & res, + FileCacheReserveStat & stat, + StopConditionFunc stop_condition, + const CachePriorityGuard::Lock &); + void holdImpl( size_t size, size_t elements, const CachePriorityGuard::Lock & lock) override; void releaseImpl(size_t size, size_t elements) override; + std::string getApproxStateInfoForLog() const; }; class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator @@ -130,7 +148,7 @@ class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator LRUIterator & operator =(const LRUIterator & other); bool operator ==(const LRUIterator & other) const; - EntryPtr getEntry() const override { return *iterator; } + EntryPtr getEntry() const override; size_t increasePriority(const CachePriorityGuard::Lock &) override; diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 151f2ea06cc9..7b1f24e93fca 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -2,11 +2,17 @@ #include #include +#include #include #include +#include +#include #include #include +#include +#include #include +#include #include #include #include @@ -52,7 +58,54 @@ struct HasNonDeterministicFunctionsMatcher } }; +struct HasSystemTablesMatcher +{ + struct Data + { + const ContextPtr context; + bool has_system_tables = false; + }; + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } + + static void visit(const ASTPtr & node, Data & data) + { + if (data.has_system_tables) + return; + + String database_table; /// or whatever else we get, e.g. just a table + + /// SELECT [...] FROM + if (const auto * table_identifier = node->as()) + { + database_table = table_identifier->name(); + } + /// SELECT [...] FROM clusterAllReplicas(,
) + else if (const auto * identifier = node->as()) + { + database_table = identifier->name(); + } + /// Handle SELECT [...] FROM clusterAllReplicas(, '
') + else if (const auto * literal = node->as()) + { + const auto & value = literal->value; + database_table = toString(value); + } + + Tokens tokens(database_table.c_str(), database_table.c_str() + database_table.size(), /*max_query_size*/ 2048, /*skip_insignificant*/ true); + IParser::Pos pos(tokens, /*max_depth*/ 42, /*max_backtracks*/ 42); + Expected expected; + String database; + String table; + bool successfully_parsed = parseDatabaseAndTableName(pos, expected, database, table); + if (successfully_parsed) + if (DatabaseCatalog::isPredefinedDatabase(database)) + data.has_system_tables = true; + } +}; + using HasNonDeterministicFunctionsVisitor = InDepthNodeVisitor; +using HasSystemTablesVisitor = InDepthNodeVisitor; } @@ -63,6 +116,13 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context) return finder_data.has_non_deterministic_functions; } +bool astContainsSystemTables(ASTPtr ast, ContextPtr context) +{ + HasSystemTablesMatcher::Data finder_data{context}; + HasSystemTablesVisitor(finder_data).visit(ast); + return finder_data.has_system_tables; +} + namespace { diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index c574f3085e30..a06f504de65b 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -17,6 +17,9 @@ namespace DB /// Does AST contain non-deterministic functions like rand() and now()? bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); +/// Does AST contain system tables like "system.processes"? +bool astContainsSystemTables(ASTPtr ast, ContextPtr context); + /// Maps queries to query results. Useful to avoid repeated query calculation. /// /// The cache does not aim to be transactionally consistent (which is difficult to get right). For example, the cache is not invalidated diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index 1400d3219c64..68bf182dd2e0 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -28,17 +28,19 @@ SLRUFileCachePriority::SLRUFileCachePriority( size_t max_elements_, double size_ratio_, LRUFileCachePriority::StatePtr probationary_state_, - LRUFileCachePriority::StatePtr protected_state_) + LRUFileCachePriority::StatePtr protected_state_, + const std::string & description_) : IFileCachePriority(max_size_, max_elements_) , size_ratio(size_ratio_) , protected_queue(LRUFileCachePriority(getRatio(max_size_, size_ratio), getRatio(max_elements_, size_ratio), protected_state_, - "protected")) + description_ + ", protected")) , probationary_queue(LRUFileCachePriority(getRatio(max_size_, 1 - size_ratio), getRatio(max_elements_, 1 - size_ratio), probationary_state_, - "probationary")) + description_ + ", probationary")) + , log(getLogger("SLRUFileCachePriority(" + description_ + ")")) { LOG_DEBUG( log, "Probationary queue {} in size and {} in elements. " @@ -249,6 +251,49 @@ bool SLRUFileCachePriority::collectCandidatesForEvictionInProtected( return true; } +bool SLRUFileCachePriority::collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + EvictionCandidates & res, + const CachePriorityGuard::Lock & lock) +{ + const auto desired_probationary_size = getRatio(desired_size, 1 - size_ratio); + const auto desired_probationary_elements_num = getRatio(desired_elements_count, 1 - size_ratio); + + FileCacheReserveStat probationary_stat; + const bool probationary_limit_satisfied = probationary_queue.collectCandidatesForEviction( + desired_probationary_size, desired_probationary_elements_num, + max_candidates_to_evict, probationary_stat, res, lock); + + stat += probationary_stat; + + LOG_TEST(log, "Collected {} to evict from probationary queue. Total size: {}", + res.size(), probationary_stat.total_stat.releasable_size); + + chassert(!max_candidates_to_evict || res.size() <= max_candidates_to_evict); + chassert(res.size() == stat.total_stat.releasable_count); + + if (max_candidates_to_evict && res.size() >= max_candidates_to_evict) + return probationary_limit_satisfied; + + const auto desired_protected_size = getRatio(max_size, size_ratio); + const auto desired_protected_elements_num = getRatio(max_elements, size_ratio); + + FileCacheReserveStat protected_stat; + const bool protected_limit_satisfied = protected_queue.collectCandidatesForEviction( + desired_protected_size, desired_protected_elements_num, + max_candidates_to_evict - res.size(), protected_stat, res, lock); + + stat += protected_stat; + + LOG_TEST(log, "Collected {} to evict from protected queue. Total size: {}", + res.size(), protected_stat.total_stat.releasable_size); + + return probationary_limit_satisfied && protected_limit_satisfied; +} + void SLRUFileCachePriority::downgrade(IteratorPtr iterator, const CachePriorityGuard::Lock & lock) { auto * candidate_it = assert_cast(iterator.get()); diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index 4cf5bb0f1999..ee3cafe322da 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -19,7 +19,8 @@ class SLRUFileCachePriority : public IFileCachePriority size_t max_elements_, double size_ratio_, LRUFileCachePriority::StatePtr probationary_state_ = nullptr, - LRUFileCachePriority::StatePtr protected_state_ = nullptr); + LRUFileCachePriority::StatePtr protected_state_ = nullptr, + const std::string & description_ = "none"); size_t getSize(const CachePriorityGuard::Lock & lock) const override; @@ -57,6 +58,14 @@ class SLRUFileCachePriority : public IFileCachePriority const UserID & user_id, const CachePriorityGuard::Lock &) override; + bool collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + EvictionCandidates & res, + const CachePriorityGuard::Lock &) override; + void shuffle(const CachePriorityGuard::Lock &) override; PriorityDumpPtr dump(const CachePriorityGuard::Lock &) override; @@ -67,7 +76,7 @@ class SLRUFileCachePriority : public IFileCachePriority double size_ratio; LRUFileCachePriority protected_queue; LRUFileCachePriority probationary_queue; - LoggerPtr log = getLogger("SLRUFileCachePriority"); + LoggerPtr log; void increasePriority(SLRUIterator & iterator, const CachePriorityGuard::Lock & lock); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 65fcd51529bd..04b761e8b2be 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -971,7 +971,7 @@ Strings Context::getWarnings() const /// TODO: remove, use `getTempDataOnDisk` VolumePtr Context::getGlobalTemporaryVolume() const { - std::lock_guard lock(shared->mutex); + SharedLockGuard lock(shared->mutex); /// Calling this method we just bypass the `temp_data_on_disk` and write to the file on the volume directly. /// Volume is the same for `root_temp_data_on_disk` (always set) and `temp_data_on_disk` (if it's set). if (shared->root_temp_data_on_disk) @@ -1550,14 +1550,17 @@ ClassifierPtr Context::getWorkloadClassifier() const } -const Scalars & Context::getScalars() const +Scalars Context::getScalars() const { + std::lock_guard lock(mutex); return scalars; } -const Block & Context::getScalar(const String & name) const +Block Context::getScalar(const String & name) const { + std::lock_guard lock(mutex); + auto it = scalars.find(name); if (scalars.end() == it) { @@ -1568,12 +1571,13 @@ const Block & Context::getScalar(const String & name) const return it->second; } -const Block * Context::tryGetSpecialScalar(const String & name) const +std::optional Context::tryGetSpecialScalar(const String & name) const { + std::lock_guard lock(mutex); auto it = special_scalars.find(name); if (special_scalars.end() == it) - return nullptr; - return &it->second; + return std::nullopt; + return it->second; } Tables Context::getExternalTables() const @@ -1653,6 +1657,7 @@ void Context::addScalar(const String & name, const Block & block) if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have scalars"); + std::lock_guard lock(mutex); scalars[name] = block; } @@ -1662,6 +1667,7 @@ void Context::addSpecialScalar(const String & name, const Block & block) if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have local scalars"); + std::lock_guard lock(mutex); special_scalars[name] = block; } @@ -1671,6 +1677,7 @@ bool Context::hasScalar(const String & name) const if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have scalars"); + std::lock_guard lock(mutex); return scalars.contains(name); } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 8601d09621f4..a0225cb2f9a5 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -680,12 +680,12 @@ class Context: public ContextData, public std::enable_shared_from_this std::shared_ptr findExternalTable(const String & table_name) const; std::shared_ptr removeExternalTable(const String & table_name); - const Scalars & getScalars() const; - const Block & getScalar(const String & name) const; + Scalars getScalars() const; + Block getScalar(const String & name) const; void addScalar(const String & name, const Block & block); bool hasScalar(const String & name) const; - const Block * tryGetSpecialScalar(const String & name) const; + std::optional tryGetSpecialScalar(const String & name) const; void addSpecialScalar(const String & name, const Block & block); const QueryAccessInfo & getQueryAccessInfo() const { return *getQueryAccessInfoPtr(); } diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index ec6c8b5924f3..0caca88c283c 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -27,6 +27,8 @@ #include #include +#include + #include "config.h" #if USE_MYSQL @@ -1602,6 +1604,9 @@ void DatabaseCatalog::reloadDisksTask() for (auto & database : getDatabases()) { + // WARNING: In case of `async_load_databases = true` getTablesIterator() call wait for all table in the database to be loaded. + // WARNING: It means that no database will be able to update configuration until all databases are fully loaded. + // TODO: We can split this task by table or by database to make loaded table operate as usual. auto it = database.second->getTablesIterator(getContext()); while (it->isValid()) { @@ -1739,10 +1744,9 @@ std::pair TableNameHints::getExtendedHintForTable(const String & Names TableNameHints::getAllRegisteredNames() const { - Names result; if (database) - for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) - result.emplace_back(table_it->name()); - return result; + return database->getAllTableNames(context); + return {}; } + } diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 0cf138c14f6d..a70ff3c6c53e 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -112,7 +112,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr auto hash = subquery.getTreeHash(/*ignore_aliases=*/ true); const auto scalar_query_hash_str = toString(hash); - std::unique_ptr interpreter = nullptr; + std::unique_ptr interpreter; bool hit = false; bool is_local = false; diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index f9e24e2de706..a636e59fa1a5 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -1187,7 +1187,7 @@ class ExternalLoader::LoadingDispatcher : private boost::noncopyable else { auto result = std::chrono::system_clock::now() + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, error_count)); - LOG_TRACE(log, "Supposed update time for unspecified object is {} (backoff, {} errors.", to_string(result), error_count); + LOG_TRACE(log, "Supposed update time for unspecified object is {} (backoff, {} errors)", to_string(result), error_count); return result; } } diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 12a906526f6c..73498b39ead3 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 7acaf95beccb..2115dc57126d 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -56,7 +56,7 @@ InterpreterAlterQuery::InterpreterAlterQuery(const ASTPtr & query_ptr_, ContextP BlockIO InterpreterAlterQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const auto & alter = query_ptr->as(); if (alter.alter_object == ASTAlterQuery::AlterObjectType::DATABASE) { @@ -131,7 +131,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) if (modify_query) { // Expand CTE before filling default database - ApplyWithSubqueryVisitor().visit(*modify_query); + ApplyWithSubqueryVisitor::visit(*modify_query); } /// Add default database to table identifiers that we can encounter in e.g. default expressions, mutation expression, etc. diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index 18e9ba4a64b2..3f4a03c34973 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes BlockIO InterpreterCreateFunctionQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); ASTCreateFunctionQuery & create_function_query = updated_query_ptr->as(); diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index aed4b0587b40..a439cb672c82 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes BlockIO InterpreterCreateIndexQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); auto current_context = getContext(); const auto & create_index = query_ptr->as(); @@ -39,12 +39,12 @@ BlockIO InterpreterCreateIndexQuery::execute() } // Noop if allow_create_index_without_type = true. throw otherwise - if (!create_index.index_decl->as()->type) + if (!create_index.index_decl->as()->getType()) { if (!current_context->getSettingsRef().allow_create_index_without_type) { throw Exception(ErrorCodes::INCORRECT_QUERY, "CREATE INDEX without TYPE is forbidden." - " SET allow_create_index_without_type=1 to ignore this statements."); + " SET allow_create_index_without_type=1 to ignore this statements"); } else { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7c3bed7388c8..c0da510a68b7 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1114,7 +1114,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) // Table SQL definition is available even if the table is detached (even permanently) auto query = database->getCreateTableQuery(create.getTable(), getContext()); - FunctionNameNormalizer().visit(query.get()); + FunctionNameNormalizer::visit(query.get()); auto create_query = query->as(); if (!create.is_dictionary && create_query.is_dictionary) @@ -1184,7 +1184,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (create.select && create.isView()) { // Expand CTE before filling default database - ApplyWithSubqueryVisitor().visit(*create.select); + ApplyWithSubqueryVisitor::visit(*create.select); AddDefaultDatabaseVisitor visitor(getContext(), current_database); visitor.visit(*create.select); } @@ -1763,7 +1763,7 @@ BlockIO InterpreterCreateQuery::executeQueryOnCluster(ASTCreateQuery & create) BlockIO InterpreterCreateQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); auto & create = query_ptr->as(); bool is_create_database = create.database && !create.table; diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 07d23be78a7e..ee7749941453 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -35,7 +35,7 @@ InterpreterDeleteQuery::InterpreterDeleteQuery(const ASTPtr & query_ptr_, Contex BlockIO InterpreterDeleteQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const ASTDeleteQuery & delete_query = query_ptr->as(); auto table_id = getContext()->resolveStorageID(delete_query, Context::ResolveOrdinary); diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index 2661fd9058c2..7a273d4969b8 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes BlockIO InterpreterDropFunctionQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); ASTDropFunctionQuery & drop_function_query = updated_query_ptr->as(); diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index e29e59ee4c34..ee13e3b628b4 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -116,7 +116,7 @@ BlockIO InterpreterDropQuery::executeToTable(ASTDropQuery & query) return res; } -BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait) +BlockIO InterpreterDropQuery::executeToTableImpl(const ContextPtr & context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait) { /// NOTE: it does not contain UUID, we will resolve it with locked DDLGuard auto table_id = StorageID(query); @@ -163,6 +163,19 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue "Table {} is not a Dictionary", table_id.getNameForLogs()); + if (settings.ignore_drop_queries_probability != 0 && ast_drop_query.kind == ASTDropQuery::Kind::Drop && std::uniform_real_distribution<>(0.0, 1.0)(thread_local_rng) <= settings.ignore_drop_queries_probability) + { + ast_drop_query.sync = false; + if (table->storesDataOnDisk()) + { + LOG_TEST(getLogger("InterpreterDropQuery"), "Ignore DROP TABLE query for table {}.{}", table_id.database_name, table_id.table_name); + return {}; + } + + LOG_TEST(getLogger("InterpreterDropQuery"), "Replace DROP TABLE query to TRUNCATE TABLE for table {}.{}", table_id.database_name, table_id.table_name); + ast_drop_query.kind = ASTDropQuery::Truncate; + } + /// Now get UUID, so we can wait for table data to be finally dropped table_id.uuid = database->tryGetTableUUID(table_id.table_name); @@ -412,6 +425,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, table_context->setInternalQuery(true); /// Do not hold extra shared pointers to tables std::vector> tables_to_drop; + // NOTE: This means we wait for all tables to be loaded inside getTablesIterator() call in case of `async_load_databases = true`. for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next()) { auto table_ptr = iterator->table(); diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h index 08668f47225a..cd0c63f9ea70 100644 --- a/src/Interpreters/InterpreterDropQuery.h +++ b/src/Interpreters/InterpreterDropQuery.h @@ -41,7 +41,7 @@ class InterpreterDropQuery : public IInterpreter, WithMutableContext BlockIO executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector & uuids_to_wait); BlockIO executeToTable(ASTDropQuery & query); - BlockIO executeToTableImpl(ContextPtr context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait); + BlockIO executeToTableImpl(const ContextPtr& context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait); static void waitForTableToBeActuallyDroppedOrDetached(const ASTDropQuery & query, const DatabasePtr & db, const UUID & uuid_to_wait); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index fc58f7b50988..12677c422b8a 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -340,13 +340,10 @@ bool InterpreterInsertQuery::shouldAddSquashingFroStorage(const StoragePtr & tab { auto context_ptr = getContext(); const Settings & settings = context_ptr->getSettingsRef(); - const ASTInsertQuery * query = nullptr; - if (query_ptr) - query = query_ptr->as(); /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). - return !(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash && !(query && query->watch); + return !(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash; } Chain InterpreterInsertQuery::buildPreSinkChain( @@ -429,7 +426,7 @@ BlockIO InterpreterInsertQuery::execute() std::vector presink_chains; std::vector sink_chains; - if (!distributed_pipeline || query.watch) + if (!distributed_pipeline) { /// Number of streams works like this: /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever @@ -522,7 +519,8 @@ BlockIO InterpreterInsertQuery::execute() auto views = DatabaseCatalog::instance().getDependentViews(table_id); /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. - const bool resize_to_max_insert_threads = !table->isView() && views.empty(); + /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. + const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads : std::min(settings.max_insert_threads, pipeline.getNumStreams()); @@ -560,11 +558,6 @@ BlockIO InterpreterInsertQuery::execute() } } } - else if (query.watch) - { - InterpreterWatchQuery interpreter_watch{ query.watch, getContext() }; - pipeline = interpreter_watch.buildQueryPipeline(); - } ThreadGroupPtr running_group; if (current_thread) @@ -591,7 +584,7 @@ BlockIO InterpreterInsertQuery::execute() { res.pipeline = std::move(*distributed_pipeline); } - else if (query.select || query.watch) + else if (query.select) { const auto & header = presink_chains.at(0).getInputHeader(); auto actions_dag = ActionsDAG::makeConvertingActions( diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index dee522a21847..efc37fd76afd 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -412,8 +413,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!options.is_subquery) { if (context->getSettingsRef().enable_global_with_statement) - ApplyWithAliasVisitor().visit(query_ptr); - ApplyWithSubqueryVisitor().visit(query_ptr); + ApplyWithAliasVisitor::visit(query_ptr); + ApplyWithSubqueryVisitor::visit(query_ptr); } query_info.query = query_ptr->clone(); @@ -609,7 +610,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) { query_info.is_parameterized_view = view->isParameterizedView(); - view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); + StorageView::replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( @@ -629,7 +630,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) { /// Restore original view name. Save rewritten subquery for future usage in StorageView. - query_info.view_query = view->restoreViewName(getSelectQuery(), view_table); + query_info.view_query = StorageView::restoreViewName(getSelectQuery(), view_table); view = nullptr; } @@ -798,7 +799,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( != parallel_replicas_before_analysis) { context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); need_analyze_again = true; } @@ -945,7 +946,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() if (number_of_replicas_to_use <= 1) { context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(log, "Disabling parallel replicas because there aren't enough rows to read"); return true; } @@ -1165,13 +1166,13 @@ static FillColumnDescription getWithFillDescription(const ASTOrderByElement & or { FillColumnDescription descr; - if (order_by_elem.fill_from) - std::tie(descr.fill_from, descr.fill_from_type) = getWithFillFieldValue(order_by_elem.fill_from, context); - if (order_by_elem.fill_to) - std::tie(descr.fill_to, descr.fill_to_type) = getWithFillFieldValue(order_by_elem.fill_to, context); + if (order_by_elem.getFillFrom()) + std::tie(descr.fill_from, descr.fill_from_type) = getWithFillFieldValue(order_by_elem.getFillFrom(), context); + if (order_by_elem.getFillTo()) + std::tie(descr.fill_to, descr.fill_to_type) = getWithFillFieldValue(order_by_elem.getFillTo(), context); - if (order_by_elem.fill_step) - std::tie(descr.fill_step, descr.step_kind) = getWithFillStep(order_by_elem.fill_step, context); + if (order_by_elem.getFillStep()) + std::tie(descr.fill_step, descr.step_kind) = getWithFillStep(order_by_elem.getFillStep(), context); else descr.fill_step = order_by_elem.direction; @@ -1217,8 +1218,8 @@ SortDescription InterpreterSelectQuery::getSortDescription(const ASTSelectQuery const auto & order_by_elem = elem->as(); std::shared_ptr collator; - if (order_by_elem.collation) - collator = std::make_shared(order_by_elem.collation->as().value.get()); + if (order_by_elem.getCollation()) + collator = std::make_shared(order_by_elem.getCollation()->as().value.get()); if (order_by_elem.with_fill) { @@ -2434,7 +2435,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc agg_count.create(place); SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place)); - agg_count.set(place, *num_rows); + AggregateFunctionCount::set(place, *num_rows); auto column = ColumnAggregateFunction::create(func); column->insertFrom(place); diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index f50a122f9a28..31d3920ccfdd 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -2,16 +2,17 @@ #if USE_EMBEDDED_COMPILER -#include -#include -#include - -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include +# include + +# include +# include +# include namespace { diff --git a/src/Interpreters/JIT/compileFunction.h b/src/Interpreters/JIT/compileFunction.h index 84abfa0925a1..551e4d0bb114 100644 --- a/src/Interpreters/JIT/compileFunction.h +++ b/src/Interpreters/JIT/compileFunction.h @@ -4,10 +4,10 @@ #if USE_EMBEDDED_COMPILER -#include -#include -#include -#include +# include +# include +# include +# include namespace DB diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index fd7ffca28723..4821d607d0eb 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -498,14 +498,12 @@ ASTs InterpreterCreateImpl::getRewrittenQueries( columns->columns->children.emplace_back(create_materialized_column_declaration(version_column_name, "UInt64", UInt64(1))); /// Add minmax skipping index for _version column. - auto version_index = std::make_shared(); - version_index->name = version_column_name; auto index_expr = std::make_shared(version_column_name); auto index_type = makeASTFunction("minmax"); index_type->no_empty_args = true; - version_index->set(version_index->expr, index_expr); - version_index->set(version_index->type, index_type); + auto version_index = std::make_shared(index_expr, index_type, version_column_name); version_index->granularity = 1; + ASTPtr indices = std::make_shared(); indices->children.push_back(version_index); columns->set(columns->indices, indices); diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 0717abd47821..f02021997529 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -1,9 +1,10 @@ -#include -#include #include +#include +#include +#include #include #include -#include +#include namespace DB { diff --git a/src/Interpreters/RewriteOrderByVisitor.cpp b/src/Interpreters/RewriteOrderByVisitor.cpp index 694dec84b7a0..26817b70dc05 100644 --- a/src/Interpreters/RewriteOrderByVisitor.cpp +++ b/src/Interpreters/RewriteOrderByVisitor.cpp @@ -39,9 +39,8 @@ void RewriteOrderBy::visit(ASTPtr & ast, Data &) { // clone w/o children auto clone = std::make_shared(*order_by_elem); - clone->children.clear(); - clone->children.emplace_back(identifier); + clone->children[0] = identifier; new_order_by->children.emplace_back(clone); } if (!new_order_by->children.empty()) diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index fe7ccd64ffe1..7703a3521303 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -278,7 +278,8 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr bool is_system = db.first == DatabaseCatalog::SYSTEM_DATABASE; - for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) + // Note that we skip not yet loaded tables, so metrics could possibly be lower than expected on fully loaded database just after server start if `async_load_databases = true`. + for (auto iterator = db.second->getTablesIterator(getContext(), {}, /*skip_not_loaded=*/true); iterator->isValid(); iterator->next()) { ++total_number_of_tables; if (is_system) @@ -408,7 +409,7 @@ void ServerAsynchronousMetrics::updateDetachedPartsStats() if (!db.second->canContainMergeTreeTables()) continue; - for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(getContext(), {}, true); iterator->isValid(); iterator->next()) { const auto & table = iterator->table(); if (!table) diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 8f11754b3bef..d1520c92dbc2 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -168,7 +168,7 @@ void Set::setHeader(const ColumnsWithTypeAndName & header) } /// Choose data structure to use for the set. - data.init(data.chooseMethod(key_columns, key_sizes)); + data.init(SetVariants::chooseMethod(key_columns, key_sizes)); } void Set::fillSetElements() diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 1c24c4f85c99..2b8e8bef6d4a 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #if defined(OS_LINUX) # include @@ -474,12 +475,22 @@ void ThreadStatus::initQueryProfiler() try { if (settings.query_profiler_real_time_period_ns > 0) - query_profiler_real = std::make_unique(thread_id, - /* period= */ static_cast(settings.query_profiler_real_time_period_ns)); + { + if (!query_profiler_real) + query_profiler_real = std::make_unique(thread_id, + /* period= */ static_cast(settings.query_profiler_real_time_period_ns)); + else + query_profiler_real->setPeriod(static_cast(settings.query_profiler_real_time_period_ns)); + } if (settings.query_profiler_cpu_time_period_ns > 0) - query_profiler_cpu = std::make_unique(thread_id, - /* period= */ static_cast(settings.query_profiler_cpu_time_period_ns)); + { + if (!query_profiler_cpu) + query_profiler_cpu = std::make_unique(thread_id, + /* period= */ static_cast(settings.query_profiler_cpu_time_period_ns)); + else + query_profiler_cpu->setPeriod(static_cast(settings.query_profiler_cpu_time_period_ns)); + } } catch (...) { diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b71a8e3681d9..7b9790881702 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -144,7 +144,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context) } else { - FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context); + FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context); /// NOLINT(readability-static-accessed-through-instance) if (!function_builder) function_builder = function_factory.get(function->name, context); @@ -277,7 +277,7 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) const auto & order_by_elem = elem->as(); if (order_by_elem.with_fill /// Always keep elements WITH FILL as they affects other. - || elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second) + || elems_set.emplace(name, order_by_elem.getCollation() ? order_by_elem.getCollation()->getColumnName() : "").second) unique_elems.emplace_back(elem); } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 5588fc55a642..914b3c3037d1 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1587,7 +1587,7 @@ void TreeRewriter::normalize( /// already normalized on initiator node, or not normalized and should remain unnormalized for /// compatibility. if (context_->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && settings.normalize_function_names) - FunctionNameNormalizer().visit(query.get()); + FunctionNameNormalizer::visit(query.get()); if (settings.optimize_move_to_prewhere) { diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 8a7a5024d692..31a881001e3b 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -1,10 +1,10 @@ -#include - +#include #include -#include -#include #include +#include #include +#include +#include namespace DB diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index d14908fe9937..05269c9d2c38 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -1,12 +1,12 @@ #pragma once +#include #include -#include -#include -#include -#include #include +#include #include +#include +#include namespace DB { diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index b5c3e00e2997..4e1a2bcf5ee4 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -73,7 +73,7 @@ std::optional evaluateConstantExpressionImpl(c /// already normalized on initiator node, or not normalized and should remain unnormalized for /// compatibility. if (context->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && context->getSettingsRef().normalize_function_names) - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); auto syntax_result = TreeRewriter(context, no_throw).analyze(ast, source_columns); if (!syntax_result) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index ea2f69bd2b15..db8753000d8e 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -97,6 +97,7 @@ namespace DB namespace ErrorCodes { extern const int QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS; + extern const int QUERY_CACHE_USED_WITH_SYSTEM_TABLE; extern const int INTO_OUTFILE_NOT_ALLOWED; extern const int INVALID_TRANSACTION; extern const int LOGICAL_ERROR; @@ -644,15 +645,6 @@ void logExceptionBeforeStart( } } -static void setQuerySpecificSettings(ASTPtr & ast, ContextMutablePtr context) -{ - if (auto * ast_insert_into = ast->as()) - { - if (ast_insert_into->watch) - context->setSetting("output_format_enable_streaming", 1); - } -} - void validateAnalyzerSettings(ASTPtr ast, bool context_value) { if (ast->as()) @@ -898,8 +890,6 @@ static std::tuple executeQueryImpl( if (auto * insert_query = ast->as()) insert_query->tail = istr; - setQuerySpecificSettings(ast, context); - /// There is an option of probabilistic logging of queries. /// If it is used - do the random sampling and "collapse" the settings. /// It allows to consistently log queries with all the subqueries in distributed query processing @@ -923,7 +913,7 @@ static std::tuple executeQueryImpl( /// Propagate WITH statement to children ASTSelect. if (settings.enable_global_with_statement) { - ApplyWithGlobalVisitor().visit(ast); + ApplyWithGlobalVisitor::visit(ast); } { @@ -1198,15 +1188,26 @@ static std::tuple executeQueryImpl( /// top of the pipeline which stores the result in the query cache. if (can_use_query_cache && settings.enable_writes_to_query_cache) { + /// Only use the query cache if the query does not contain non-deterministic functions or system tables (which are typically non-deterministic) + const bool ast_contains_nondeterministic_functions = astContainsNonDeterministicFunctions(ast, context); + const bool ast_contains_system_tables = astContainsSystemTables(ast, context); + const QueryCacheNondeterministicFunctionHandling nondeterministic_function_handling = settings.query_cache_nondeterministic_function_handling; + const QueryCacheSystemTableHandling system_table_handling = settings.query_cache_system_table_handling; if (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Throw) throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS, "The query result was not cached because the query contains a non-deterministic function." " Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching"); - if (!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save) + if (ast_contains_system_tables && system_table_handling == QueryCacheSystemTableHandling::Throw) + throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_SYSTEM_TABLE, + "The query result was not cached because the query contains a system table." + " Use setting `query_cache_system_table_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching"); + + if ((!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save) + && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save)) { QueryCache::Key key( ast, res.pipeline.getHeader(), diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp index 09afb01d7ffb..41191ba16055 100644 --- a/src/Interpreters/tests/gtest_filecache.cpp +++ b/src/Interpreters/tests/gtest_filecache.cpp @@ -364,7 +364,7 @@ TEST_F(FileCacheTest, LRUPolicy) std::cerr << "Step 1\n"; auto cache = DB::FileCache("1", settings); cache.initialize(); - auto key = cache.createKeyForPath("key1"); + auto key = DB::FileCache::createKeyForPath("key1"); auto get_or_set = [&](size_t offset, size_t size) { @@ -728,7 +728,7 @@ TEST_F(FileCacheTest, LRUPolicy) auto cache2 = DB::FileCache("2", settings); cache2.initialize(); - auto key = cache2.createKeyForPath("key1"); + auto key = DB::FileCache::createKeyForPath("key1"); /// Get [2, 29] assertEqual( @@ -747,7 +747,7 @@ TEST_F(FileCacheTest, LRUPolicy) fs::create_directories(settings2.base_path); auto cache2 = DB::FileCache("3", settings2); cache2.initialize(); - auto key = cache2.createKeyForPath("key1"); + auto key = DB::FileCache::createKeyForPath("key1"); /// Get [0, 24] assertEqual( @@ -762,7 +762,7 @@ TEST_F(FileCacheTest, LRUPolicy) auto cache = FileCache("4", settings); cache.initialize(); - const auto key = cache.createKeyForPath("key10"); + const auto key = FileCache::createKeyForPath("key10"); const auto key_path = cache.getKeyPath(key, user); cache.removeAllReleasable(user.user_id); @@ -786,7 +786,7 @@ TEST_F(FileCacheTest, LRUPolicy) auto cache = DB::FileCache("5", settings); cache.initialize(); - const auto key = cache.createKeyForPath("key10"); + const auto key = FileCache::createKeyForPath("key10"); const auto key_path = cache.getKeyPath(key, user); cache.removeAllReleasable(user.user_id); @@ -823,7 +823,7 @@ TEST_F(FileCacheTest, writeBuffer) segment_settings.kind = FileSegmentKind::Temporary; segment_settings.unbounded = true; - auto cache_key = cache.createKeyForPath(key); + auto cache_key = FileCache::createKeyForPath(key); auto holder = cache.set(cache_key, 0, 3, segment_settings, user); /// The same is done in TemporaryDataOnDisk::createStreamToCacheFile. std::filesystem::create_directories(cache.getKeyPath(cache_key, user)); @@ -949,7 +949,7 @@ TEST_F(FileCacheTest, temporaryData) const auto user = FileCache::getCommonUser(); auto tmp_data_scope = std::make_shared(nullptr, &file_cache, TemporaryDataOnDiskSettings{}); - auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user); + auto some_data_holder = file_cache.getOrSet(FileCache::createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user); { ASSERT_EQ(some_data_holder->size(), 5); @@ -1199,7 +1199,7 @@ TEST_F(FileCacheTest, SLRUPolicy) { auto cache = DB::FileCache(std::to_string(++file_cache_name), settings); cache.initialize(); - auto key = cache.createKeyForPath("key1"); + auto key = FileCache::createKeyForPath("key1"); auto add_range = [&](size_t offset, size_t size) { diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index c5862b82f34a..f794ad336e26 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -53,6 +53,8 @@ static std::string renderFileNameTemplate(time_t now, const std::string & file_p return path.replace_filename(ss.str()); } +/// NOLINTBEGIN(readability-static-accessed-through-instance) + void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name) { auto current_logger = config.getString("logger", ""); @@ -393,6 +395,8 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log } } +/// NOLINTEND(readability-static-accessed-through-instance) + void Loggers::closeLogs(Poco::Logger & logger) { if (log_file) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 07eea86ef811..cdc9a471e985 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -793,4 +793,15 @@ bool tryGetFunctionNameInto(const IAST * ast, String & name) return false; } +bool isASTLambdaFunction(const ASTFunction & function) +{ + if (function.name == "lambda" && function.arguments && function.arguments->children.size() == 2) + { + const auto * lambda_args_tuple = function.arguments->children.at(0)->as(); + return lambda_args_tuple && lambda_args_tuple->name == "tuple"; + } + + return false; +} + } diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 631b6285bfa4..3a94691f25da 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -111,4 +111,7 @@ inline String getFunctionName(const ASTPtr & ast) { return getFunctionName(ast.g inline std::optional tryGetFunctionName(const ASTPtr & ast) { return tryGetFunctionName(ast.get()); } inline bool tryGetFunctionNameInto(const ASTPtr & ast, String & name) { return tryGetFunctionNameInto(ast.get(), name); } +/// Checks if function is a lambda function definition `lambda((x, y), x + y)` +bool isASTLambdaFunction(const ASTFunction & function); + } diff --git a/src/Parsers/ASTIndexDeclaration.cpp b/src/Parsers/ASTIndexDeclaration.cpp index 8dac5389c803..0c36644356f3 100644 --- a/src/Parsers/ASTIndexDeclaration.cpp +++ b/src/Parsers/ASTIndexDeclaration.cpp @@ -8,24 +8,63 @@ namespace DB { -ASTPtr ASTIndexDeclaration::clone() const +namespace ErrorCodes { - auto res = std::make_shared(); + extern const int LOGICAL_ERROR; +} - res->name = name; - if (granularity) - res->granularity = granularity; + +ASTIndexDeclaration::ASTIndexDeclaration(ASTPtr expression, ASTPtr type, const String & name_) + : name(name_) +{ + if (!expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index declaration must have an expression"); + children.push_back(expression); + + if (type) + { + if (!dynamic_cast(type.get())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index declaration type must be a function"); + children.push_back(type); + } +} + +ASTPtr ASTIndexDeclaration::clone() const +{ + ASTPtr expr = getExpression(); if (expr) - res->set(res->expr, expr->clone()); + expr = expr->clone(); + + ASTPtr type = getType(); if (type) - res->set(res->type, type->clone()); + type = type->clone(); + + auto res = std::make_shared(expr, type, name); + res->granularity = granularity; + return res; } +ASTPtr ASTIndexDeclaration::getExpression() const +{ + if (children.size() <= expression_idx) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index declaration must have an expression"); + return children[expression_idx]; +} + +std::shared_ptr ASTIndexDeclaration::getType() const +{ + if (children.size() <= type_idx) + return nullptr; + auto func_ast = std::dynamic_pointer_cast(children[type_idx]); + if (!func_ast) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index declaration type must be a function"); + return func_ast; +} void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { - if (expr) + if (auto expr = getExpression()) { if (part_of_create_index_query) { @@ -46,11 +85,12 @@ void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & sta } } - if (type) + if (auto type = getType()) { s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); type->formatImpl(s, state, frame); } + if (granularity) { s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h index 1fbf5e126959..dd05ad081848 100644 --- a/src/Parsers/ASTIndexDeclaration.h +++ b/src/Parsers/ASTIndexDeclaration.h @@ -16,9 +16,9 @@ class ASTIndexDeclaration : public IAST static const auto DEFAULT_ANNOY_INDEX_GRANULARITY = 100'000'000uz; static const auto DEFAULT_USEARCH_INDEX_GRANULARITY = 100'000'000uz; + ASTIndexDeclaration(ASTPtr expression, ASTPtr type, const String & name_); + String name; - IAST * expr; - ASTFunction * type; UInt64 granularity; bool part_of_create_index_query = false; @@ -28,11 +28,12 @@ class ASTIndexDeclaration : public IAST ASTPtr clone() const override; void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; - void forEachPointerToChild(std::function f) override - { - f(reinterpret_cast(&expr)); - f(reinterpret_cast(&type)); - } + ASTPtr getExpression() const; + std::shared_ptr getType() const; + +private: + static constexpr size_t expression_idx = 0; + static constexpr size_t type_idx = 1; }; } diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index 72a569fe0471..8e3458539f3b 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -123,13 +123,8 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s settings.ostr << delim; select->formatImpl(settings, state, frame); } - else if (watch) - { - settings.ostr << delim; - watch->formatImpl(settings, state, frame); - } - if (!select && !watch) + if (!select) { if (!format.empty()) { diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h index b0f444ed7558..aeab0f148bec 100644 --- a/src/Parsers/ASTInsertQuery.h +++ b/src/Parsers/ASTInsertQuery.h @@ -24,7 +24,6 @@ class ASTInsertQuery : public IAST ASTPtr settings_ast; ASTPtr select; - ASTPtr watch; ASTPtr infile; ASTPtr compression; @@ -63,7 +62,6 @@ class ASTInsertQuery : public IAST if (partition_by) { res->partition_by = partition_by->clone(); res->children.push_back(res->partition_by); } if (settings_ast) { res->settings_ast = settings_ast->clone(); res->children.push_back(res->settings_ast); } if (select) { res->select = select->clone(); res->children.push_back(res->select); } - if (watch) { res->watch = watch->clone(); res->children.push_back(res->watch); } if (infile) { res->infile = infile->clone(); res->children.push_back(res->infile); } if (compression) { res->compression = compression->clone(); res->children.push_back(res->compression); } diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp index 318849812aa5..be0416359a18 100644 --- a/src/Parsers/ASTOrderByElement.cpp +++ b/src/Parsers/ASTOrderByElement.cpp @@ -31,7 +31,7 @@ void ASTOrderByElement::formatImpl(const FormatSettings & settings, FormatState << (settings.hilite ? hilite_none : ""); } - if (collation) + if (auto collation = getCollation()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " COLLATE " << (settings.hilite ? hilite_none : ""); collation->formatImpl(settings, state, frame); @@ -40,17 +40,17 @@ void ASTOrderByElement::formatImpl(const FormatSettings & settings, FormatState if (with_fill) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH FILL" << (settings.hilite ? hilite_none : ""); - if (fill_from) + if (auto fill_from = getFillFrom()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); fill_from->formatImpl(settings, state, frame); } - if (fill_to) + if (auto fill_to = getFillTo()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : ""); fill_to->formatImpl(settings, state, frame); } - if (fill_step) + if (auto fill_step = getFillStep()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " STEP " << (settings.hilite ? hilite_none : ""); fill_step->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTOrderByElement.h b/src/Parsers/ASTOrderByElement.h index 4cebc30be31b..6edf84d7bde9 100644 --- a/src/Parsers/ASTOrderByElement.h +++ b/src/Parsers/ASTOrderByElement.h @@ -10,18 +10,34 @@ namespace DB */ class ASTOrderByElement : public IAST { +private: + enum class Child : uint8_t + { + EXPRESSION, + COLLATION, + FILL_FROM, + FILL_TO, + FILL_STEP, + }; + public: int direction = 0; /// 1 for ASC, -1 for DESC int nulls_direction = 0; /// Same as direction for NULLS LAST, opposite for NULLS FIRST. bool nulls_direction_was_explicitly_specified = false; + bool with_fill = false; + /** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */ - ASTPtr collation; + void setCollation(ASTPtr node) { setChild(Child::COLLATION, node); } + void setFillFrom(ASTPtr node) { setChild(Child::FILL_FROM, node); } + void setFillTo(ASTPtr node) { setChild(Child::FILL_TO, node); } + void setFillStep(ASTPtr node) { setChild(Child::FILL_STEP, node); } - bool with_fill = false; - ASTPtr fill_from; - ASTPtr fill_to; - ASTPtr fill_step; + /** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */ + ASTPtr getCollation() const { return getChild(Child::COLLATION); } + ASTPtr getFillFrom() const { return getChild(Child::FILL_FROM); } + ASTPtr getFillTo() const { return getChild(Child::FILL_TO); } + ASTPtr getFillStep() const { return getChild(Child::FILL_STEP); } String getID(char) const override { return "OrderByElement"; } @@ -36,6 +52,34 @@ class ASTOrderByElement : public IAST protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +private: + + ASTPtr getChild(Child child) const + { + auto it = positions.find(child); + if (it != positions.end()) + return children[it->second]; + return {}; + } + + void setChild(Child child, ASTPtr node) + { + if (node == nullptr) + return; + + auto it = positions.find(child); + if (it != positions.end()) + { + children[it->second] = node; + } + else + { + positions[child] = children.size(); + children.push_back(node); + } + } + + std::unordered_map positions; }; } diff --git a/src/Parsers/Access/ParserPublicSSHKey.cpp b/src/Parsers/Access/ParserPublicSSHKey.cpp index bc033e25bbb9..9102044900de 100644 --- a/src/Parsers/Access/ParserPublicSSHKey.cpp +++ b/src/Parsers/Access/ParserPublicSSHKey.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 67f4a306292d..2c8ab65d1fc6 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -5,9 +5,10 @@ #include #include -#include -#include #include +#include +#include +#include #include "Parsers/CommonParsers.h" #include @@ -1545,8 +1546,8 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e { if (auto * func = lambda->as(); func && func->name == "lambda") { - if (func->arguments->children.size() != 2) - throw Exception(ErrorCodes::SYNTAX_ERROR, "lambda requires two arguments"); + if (!isASTLambdaFunction(*func)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Lambda function definition expects two arguments, first argument must be a tuple of arguments"); const auto * lambda_args_tuple = func->arguments->children.at(0)->as(); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") @@ -2120,17 +2121,16 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect auto elem = std::make_shared(); + elem->children.push_back(expr_elem); + elem->direction = direction; elem->nulls_direction = nulls_direction; elem->nulls_direction_was_explicitly_specified = nulls_direction_was_explicitly_specified; - elem->collation = locale_node; + elem->setCollation(locale_node); elem->with_fill = has_with_fill; - elem->fill_from = fill_from; - elem->fill_to = fill_to; - elem->fill_step = fill_step; - elem->children.push_back(expr_elem); - if (locale_node) - elem->children.push_back(locale_node); + elem->setFillFrom(fill_from); + elem->setFillTo(fill_to); + elem->setFillStep(fill_step); node = elem; diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index 3b1b9d8ec848..fd2bbbab1778 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -54,19 +54,18 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected return false; } - auto index = std::make_shared(); + /// name is set below in ParserCreateIndexQuery + auto index = std::make_shared(expr, type, ""); index->part_of_create_index_query = true; - index->set(index->expr, expr); - if (type) - index->set(index->type, type); if (granularity) index->granularity = granularity->as().value.safeGet(); else { - if (index->type && index->type->name == "annoy") + auto index_type = index->getType(); + if (index_type && index_type->name == "annoy") index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; - else if (index->type && index->type->name == "usearch") + else if (index_type && index_type->name == "usearch") index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1510cc8e1954..ff88b58760b3 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -205,18 +205,16 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return false; } - auto index = std::make_shared(); - index->name = name->as().name(); - index->set(index->expr, expr); - index->set(index->type, type); + auto index = std::make_shared(expr, type, name->as().name()); if (granularity) index->granularity = granularity->as().value.safeGet(); else { - if (index->type->name == "annoy") + auto index_type = index->getType(); + if (index_type->name == "annoy") index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; - else if (index->type->name == "usearch") + else if (index_type->name == "usearch") index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index d1171dd48155..9373e6a1c936 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -36,7 +36,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_format(Keyword::FORMAT); ParserKeyword s_settings(Keyword::SETTINGS); ParserKeyword s_select(Keyword::SELECT); - ParserKeyword s_watch(Keyword::WATCH); ParserKeyword s_partition_by(Keyword::PARTITION_BY); ParserKeyword s_with(Keyword::WITH); ParserToken s_lparen(TokenType::OpeningRoundBracket); @@ -56,7 +55,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr columns; ASTPtr format; ASTPtr select; - ASTPtr watch; ASTPtr table_function; ASTPtr settings_ast; ASTPtr partition_by_expr; @@ -143,7 +141,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String format_str; Pos before_values = pos; - /// VALUES or FORMAT or SELECT or WITH or WATCH. + /// VALUES or FORMAT or SELECT or WITH. /// After FROM INFILE we expect FORMAT, SELECT, WITH or nothing. if (!infile && s_values.ignore(pos, expected)) { @@ -175,14 +173,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) tryGetIdentifierNameInto(format, format_str); } - else if (!infile && s_watch.ignore(pos, expected)) - { - /// If WATCH is defined, return to position before WATCH and parse - /// rest of query as WATCH query. - pos = before_values; - ParserWatchQuery watch_p; - watch_p.parse(pos, watch, expected); - } else if (!infile) { /// If all previous conditions were false and it's not FROM INFILE, query is incorrect @@ -286,7 +276,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->columns = columns; query->format = std::move(format_str); query->select = select; - query->watch = watch; query->settings_ast = settings_ast; query->data = data != end ? data : nullptr; query->end = end; @@ -295,8 +284,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->children.push_back(columns); if (select) query->children.push_back(select); - if (watch) - query->children.push_back(watch); if (settings_ast) query->children.push_back(settings_ast); diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 13b881635cd7..f08d2b978c62 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -210,12 +210,8 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p if (!s_eq.ignore(pos, expected)) return false; - if (ParserKeyword(Keyword::TRUE_KEYWORD).ignore(pos, expected)) - value = std::make_shared(Field(static_cast(1))); - else if (ParserKeyword(Keyword::FALSE_KEYWORD).ignore(pos, expected)) - value = std::make_shared(Field(static_cast(0))); /// for SETTINGS disk=disk(type='s3', path='', ...) - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { tryGetIdentifierNameInto(name, change.name); change.value = createFieldFromAST(function_ast); @@ -276,11 +272,7 @@ bool ParserSetQuery::parseNameValuePairWithParameterOrDefault( } /// Setting - if (ParserKeyword(Keyword::TRUE_KEYWORD).ignore(pos, expected)) - node = std::make_shared(Field(static_cast(1))); - else if (ParserKeyword(Keyword::FALSE_KEYWORD).ignore(pos, expected)) - node = std::make_shared(Field(static_cast(0))); - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { change.name = name; change.value = createFieldFromAST(function_ast); diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index e150b8a5956b..f00b1e6ab164 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -89,27 +89,8 @@ class CollectSetsVisitor : public ConstInDepthQueryTreeVisitoras()) - { - auto storage_snapshot = table_node->getStorageSnapshot(); - auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary)); - size_t columns_to_select_size = columns_to_select.size(); - auto column_nodes_to_select = std::make_shared(); - column_nodes_to_select->getNodes().reserve(columns_to_select_size); - NamesAndTypes projection_columns; - projection_columns.reserve(columns_to_select_size); - for (auto & column : columns_to_select) - { - column_nodes_to_select->getNodes().emplace_back(std::make_shared(column, subquery_to_execute)); - projection_columns.emplace_back(column.name, column.type); - } - auto subquery_for_table = std::make_shared(Context::createCopy(planner_context.getQueryContext())); - subquery_for_table->setIsSubquery(true); - subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select); - subquery_for_table->getJoinTree() = std::move(subquery_to_execute); - subquery_for_table->resolveProjectionColumns(std::move(projection_columns)); - subquery_to_execute = std::move(subquery_for_table); - } + if (in_second_argument->as()) + subquery_to_execute = buildSubqueryToReadColumnsFromTableExpression(std::move(subquery_to_execute), planner_context.getQueryContext()); sets.addFromSubquery(set_key, std::move(subquery_to_execute), settings); } diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 385381f13552..27b5909c13b0 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -235,7 +235,9 @@ class CollectPrewhereTableExpressionVisitor : public ConstInDepthQueryTreeVisito static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) { auto child_node_type = child_node->getNodeType(); - return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION); + return child_node_type != QueryTreeNodeType::QUERY && + child_node_type != QueryTreeNodeType::UNION && + child_node_type != QueryTreeNodeType::LAMBDA; } private: diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 5f73bba67a66..d75573c8d999 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1,9 +1,10 @@ #include +#include +#include #include -#include #include -#include +#include #include diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 326dd6833438..02c1c56fae2e 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -39,6 +39,7 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int INCORRECT_QUERY; } namespace @@ -169,7 +170,7 @@ class ActionNodeNameHelper { const auto & in_first_argument_node = function_node.getArguments().getNodes().at(0); const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1); - in_function_second_argument_node_name = planner_context.createSetKey(in_first_argument_node->getResultType(), in_second_argument_node); + in_function_second_argument_node_name = PlannerContext::createSetKey(in_first_argument_node->getResultType(), in_second_argument_node); } WriteBufferFromOwnString buffer; @@ -500,7 +501,41 @@ class PlannerActionsVisitorImpl ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); private: - using NodeNameAndNodeMinLevel = std::pair; + + class Levels + { + public: + explicit Levels(size_t level) { set(level); } + + void set(size_t level) + { + check(level); + if (level) + mask |= (uint64_t(1) << (level - 1)); + } + + void reset(size_t level) + { + check(level); + if (level) + mask &= ~(uint64_t(1) << (level - 1)); + } + + void add(Levels levels) { mask |= levels.mask; } + + size_t max() const { return 64 - getLeadingZeroBits(mask); } + + private: + uint64_t mask = 0; + + void check(size_t level) + { + if (level > 64) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Maximum lambda depth exceeded. Maximum 64."); + } + }; + + using NodeNameAndNodeMinLevel = std::pair; NodeNameAndNodeMinLevel visitImpl(QueryTreeNodePtr node); @@ -586,11 +621,11 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi column_source->getNodeType() == QueryTreeNodeType::LAMBDA && actions_stack[i].getScopeNode().get() == column_source.get()) { - return {column_node_name, i}; + return {column_node_name, Levels(i)}; } } - return {column_node_name, 0}; + return {column_node_name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitConstant(const QueryTreeNodePtr & node) @@ -660,7 +695,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); } - return {constant_node_name, 0}; + return {constant_node_name, Levels(0)}; } @@ -688,7 +723,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi auto lambda_actions_dag = std::make_shared(); actions_stack.emplace_back(lambda_actions_dag, node); - auto [lambda_expression_node_name, level] = visitImpl(lambda_node.getExpression()); + auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression()); lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name)); @@ -699,8 +734,9 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi ActionsDAG::NodeRawConstPtrs lambda_children; Names required_column_names = lambda_actions->getRequiredColumns(); - if (level == actions_stack.size() - 1) - --level; + actions_stack.pop_back(); + levels.reset(actions_stack.size()); + size_t level = levels.max(); const auto & lambda_argument_names = lambda_node.getArgumentNames(); @@ -718,7 +754,6 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi auto lambda_node_name = calculateActionNodeName(node, *planner_context); auto function_capture = std::make_shared( lambda_actions, captured_column_names, lambda_arguments_names_and_types, lambda_node.getExpression()->getResultType(), lambda_expression_node_name); - actions_stack.pop_back(); // TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver. const auto * actions_node = actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), function_capture); @@ -735,7 +770,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputColumnIfNecessary(lambda_node_name, result_type); } - return {lambda_node_name, level}; + return {lambda_node_name, levels}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::makeSetForInFunction(const QueryTreeNodePtr & node) @@ -799,7 +834,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma actions_stack_node.addInputConstantColumnIfNecessary(column.name, column); } - return {column.name, 0}; + return {column.name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node) @@ -833,7 +868,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi size_t index_hint_function_level = actions_stack.size() - 1; actions_stack[index_hint_function_level].addFunctionIfNecessary(function_node_name, {}, index_hint_function_overload_resolver); - return {function_node_name, index_hint_function_level}; + return {function_node_name, Levels(index_hint_function_level)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitFunction(const QueryTreeNodePtr & node) @@ -868,7 +903,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); } - return {function_node_name, 0}; + return {function_node_name, Levels(0)}; } const auto & function_arguments = function_node.getArguments().getNodes(); @@ -877,14 +912,14 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi Names function_arguments_node_names; function_arguments_node_names.reserve(function_arguments_size); - size_t level = 0; + Levels levels(0); for (size_t function_argument_index = 0; function_argument_index < function_arguments_size; ++function_argument_index) { if (in_function_second_argument_node_name_with_level && function_argument_index == 1) { - auto & [node_name, node_min_level] = *in_function_second_argument_node_name_with_level; + auto & [node_name, node_levels] = *in_function_second_argument_node_name_with_level; function_arguments_node_names.push_back(std::move(node_name)); - level = std::max(level, node_min_level); + levels.add(node_levels); continue; } @@ -892,20 +927,21 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi if (argument->getNodeType() == QueryTreeNodeType::LAMBDA) { - auto [node_name, node_min_level] = visitLambda(argument); + auto [node_name, node_levels] = visitLambda(argument); function_arguments_node_names.push_back(std::move(node_name)); - level = std::max(level, node_min_level); + levels.add(node_levels); continue; } - auto [node_name, node_min_level] = visitImpl(argument); + auto [node_name, node_levels] = visitImpl(argument); function_arguments_node_names.push_back(std::move(node_name)); - level = std::max(level, node_min_level); + levels.add(node_levels); } ActionsDAG::NodeRawConstPtrs children; children.reserve(function_arguments_size); + size_t level = levels.max(); for (auto & function_argument_node_name : function_arguments_node_names) children.push_back(actions_stack[level].getNodeOrThrow(function_argument_node_name)); @@ -930,7 +966,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); } - return {function_node_name, level}; + return {function_node_name, levels}; } } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index d2f37ff1ad4c..ba4f97181074 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -294,7 +294,7 @@ bool applyTrivialCountIfPossible( /// The query could use trivial count if it didn't use parallel replicas, so let's disable it query_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - query_context->setSetting("max_parallel_replicas", UInt64{0}); + query_context->setSetting("max_parallel_replicas", UInt64{1}); LOG_TRACE(getLogger("Planner"), "Disabling parallel replicas to be able to use a trivial count optimization"); } @@ -305,7 +305,7 @@ bool applyTrivialCountIfPossible( AggregateDataPtr place = state.data(); agg_count.create(place); SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place)); - agg_count.set(place, num_rows.value()); + AggregateFunctionCount::set(place, num_rows.value()); auto column = ColumnAggregateFunction::create(function_node.getAggregateFunction()); column->insertFrom(place); @@ -777,7 +777,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { planner_context->getMutableQueryContext()->setSetting( "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{0}); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); } else if (number_of_replicas_to_use < settings.max_parallel_replicas) @@ -814,7 +814,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); - if (storage->supportsPrewhere() && optimize_move_to_prewhere) + auto supported_prewhere_columns = storage->supportedPrewhereColumns(); + if (storage->canMoveConditionsToPrewhere() && optimize_move_to_prewhere && (!supported_prewhere_columns || supported_prewhere_columns->contains(filter_info.column_name))) { if (!prewhere_info) prewhere_info = std::make_shared(); diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index ec1fc539884a..1039cf0e97a1 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -121,7 +121,7 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea /// Take local task from queue if has one. if (!queue.empty() && !context.hasAsyncTasks() - && context.num_scheduled_local_tasks < context.max_scheduled_local_tasks) + && context.num_scheduled_local_tasks < ExecutionThreadContext::max_scheduled_local_tasks) { ++context.num_scheduled_local_tasks; context.setTask(queue.front()); diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 206e244c75f6..fc9a827be667 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -86,7 +86,7 @@ Chunk ArrowBlockInputFormat::read() /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; - arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, (*table_result)->num_rows(), block_missing_values_ptr); + res = arrow_column_to_ch_column->arrowTableToCHChunk(*table_result, (*table_result)->num_rows(), block_missing_values_ptr); /// There is no easy way to get original record batch size from Arrow metadata. /// Let's just use the number of bytes read from read buffer. diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index c6e951ada6a8..84375ccd5ce9 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -4,6 +4,7 @@ #if USE_ARROW || USE_ORC || USE_PARQUET #include +#include #include #include #include @@ -41,9 +42,18 @@ arrow::Result ArrowBufferedOutputStream::Tell() const arrow::Status ArrowBufferedOutputStream::Write(const void * data, int64_t length) { - out.write(reinterpret_cast(data), length); - total_length += length; - return arrow::Status::OK(); + try + { + out.write(reinterpret_cast(data), length); + total_length += length; + return arrow::Status::OK(); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while writing to arrow stream: {}", message); + return arrow::Status::IOError(message); + } } RandomAccessFileFromSeekableReadBuffer::RandomAccessFileFromSeekableReadBuffer(ReadBuffer & in_, std::optional file_size_, bool avoid_buffering_) @@ -74,9 +84,18 @@ arrow::Result RandomAccessFileFromSeekableReadBuffer::Tell() const arrow::Result RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, void * out) { - if (avoid_buffering) - in.setReadUntilPosition(seekable_in.getPosition() + nbytes); - return in.readBig(reinterpret_cast(out), nbytes); + try + { + if (avoid_buffering) + in.setReadUntilPosition(seekable_in.getPosition() + nbytes); + return in.readBig(reinterpret_cast(out), nbytes); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while reading from arrow stream: {}", message); + return arrow::Status::IOError(message); + } } arrow::Result> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes) @@ -98,14 +117,23 @@ arrow::Future> RandomAccessFileFromSeekableReadBu arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position) { - if (avoid_buffering) + try { - // Seeking to a position above a previous setReadUntilPosition() confuses some of the - // ReadBuffer implementations. - in.setReadUntilEnd(); + if (avoid_buffering) + { + // Seeking to a position above a previous setReadUntilPosition() confuses some of the + // ReadBuffer implementations. + in.setReadUntilEnd(); + } + seekable_in.seek(position, SEEK_SET); + return arrow::Status::OK(); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while seeking arrow file: {}", message); + return arrow::Status::IOError(message); } - seekable_in.seek(position, SEEK_SET); - return arrow::Status::OK(); } @@ -115,7 +143,16 @@ ArrowInputStreamFromReadBuffer::ArrowInputStreamFromReadBuffer(ReadBuffer & in_) arrow::Result ArrowInputStreamFromReadBuffer::Read(int64_t nbytes, void * out) { - return in.readBig(reinterpret_cast(out), nbytes); + try + { + return in.readBig(reinterpret_cast(out), nbytes); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while reading from arrow stream: {}", message); + return arrow::Status::IOError(message); + } } arrow::Result> ArrowInputStreamFromReadBuffer::Read(int64_t nbytes) @@ -154,7 +191,16 @@ arrow::Result RandomAccessFileFromRandomAccessReadBuffer::GetSize() arrow::Result RandomAccessFileFromRandomAccessReadBuffer::ReadAt(int64_t position, int64_t nbytes, void* out) { - return in.readBigAt(reinterpret_cast(out), nbytes, position, nullptr); + try + { + return in.readBigAt(reinterpret_cast(out), nbytes, position, nullptr); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while reading from arrow stream: {}", message); + return arrow::Status::IOError(message); + } } arrow::Result> RandomAccessFileFromRandomAccessReadBuffer::ReadAt(int64_t position, int64_t nbytes) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 65704c85026c..ec2d17d73cb4 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -78,7 +78,7 @@ namespace ErrorCodes /// Inserts numeric data right into internal column data to reduce an overhead template > -static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithNumericData(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared>(); auto internal_column = internal_type->createColumn(); @@ -103,7 +103,7 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithStringData(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared(); auto internal_column = internal_type->createColumn(); @@ -147,7 +147,7 @@ static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithFixedStringData(const std::shared_ptr & arrow_column, const String & column_name) { const auto * fixed_type = assert_cast(arrow_column->type().get()); size_t fixed_len = fixed_type->byte_width(); @@ -166,7 +166,7 @@ static ColumnWithTypeAndName readColumnWithFixedStringData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) +static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(const std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) { const auto * fixed_type = assert_cast(arrow_column->type().get()); size_t fixed_len = fixed_type->byte_width(); @@ -193,7 +193,7 @@ static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(std::sh } template -static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) +static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(const std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) { size_t total_size = 0; for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) @@ -229,7 +229,7 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(std::shared_p return {std::move(internal_column), column_type, column_name}; } -static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithBooleanData(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = DataTypeFactory::instance().get("Bool"); auto internal_column = internal_type->createColumn(); @@ -248,7 +248,7 @@ static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr & arrow_column, const String & column_name, +static ColumnWithTypeAndName readColumnWithDate32Data(const std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & type_hint, FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior) { DataTypePtr internal_type; @@ -310,7 +310,7 @@ static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithDate64Data(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared(); auto internal_column = internal_type->createColumn(); @@ -329,7 +329,7 @@ static ColumnWithTypeAndName readColumnWithDate64Data(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTimestampData(const std::shared_ptr & arrow_column, const String & column_name) { const auto & arrow_type = static_cast(*(arrow_column->type())); const UInt8 scale = arrow_type.unit() * 3; @@ -350,7 +350,7 @@ static ColumnWithTypeAndName readColumnWithTimestampData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithTimeData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTimeData(const std::shared_ptr & arrow_column, const String & column_name) { const auto & arrow_type = static_cast(*(arrow_column->type())); const UInt8 scale = arrow_type.unit() * 3; @@ -373,18 +373,18 @@ static ColumnWithTypeAndName readColumnWithTimeData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTime32Data(const std::shared_ptr & arrow_column, const String & column_name) { return readColumnWithTimeData(arrow_column, column_name); } -static ColumnWithTypeAndName readColumnWithTime64Data(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTime64Data(const std::shared_ptr & arrow_column, const String & column_name) { return readColumnWithTimeData(arrow_column, column_name); } template -static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) +static ColumnWithTypeAndName readColumnWithDecimalDataImpl(const std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) { auto internal_column = internal_type->createColumn(); auto & column = assert_cast &>(*internal_column); @@ -403,7 +403,7 @@ static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr -static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithDecimalData(const std::shared_ptr & arrow_column, const String & column_name) { const auto * arrow_decimal_type = static_cast(arrow_column->type().get()); size_t precision = arrow_decimal_type->precision(); @@ -418,7 +418,7 @@ static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr & arrow_column) +static ColumnPtr readByteMapFromArrowColumn(const std::shared_ptr & arrow_column) { if (!arrow_column->null_count()) return ColumnUInt8::create(arrow_column->length(), 0); @@ -453,7 +453,7 @@ struct ArrowOffsetArray }; template -static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr & arrow_column) +static ColumnPtr readOffsetsFromArrowListColumn(const std::shared_ptr & arrow_column) { auto offsets_column = ColumnUInt64::create(); ColumnArray::Offsets & offsets_data = assert_cast &>(*offsets_column).getData(); @@ -463,7 +463,7 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr(*(arrow_column->chunk(chunk_i))); auto arrow_offsets_array = list_chunk.offsets(); - auto & arrow_offsets = dynamic_cast::type &>(*arrow_offsets_array); + auto & arrow_offsets = dynamic_cast::type &>(*arrow_offsets_array); /* * CH uses element size as "offsets", while arrow uses actual offsets as offsets. @@ -620,7 +620,7 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr } template -static std::shared_ptr getNestedArrowColumn(std::shared_ptr & arrow_column) +static std::shared_ptr getNestedArrowColumn(const std::shared_ptr & arrow_column) { arrow::ArrayVector array_vector; array_vector.reserve(arrow_column->num_chunks()); @@ -648,7 +648,7 @@ static std::shared_ptr getNestedArrowColumn(std::shared_ptr return std::make_shared(array_vector); } -static ColumnWithTypeAndName readIPv6ColumnFromBinaryData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readIPv6ColumnFromBinaryData(const std::shared_ptr & arrow_column, const String & column_name) { size_t total_size = 0; for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) @@ -684,7 +684,7 @@ static ColumnWithTypeAndName readIPv6ColumnFromBinaryData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readIPv4ColumnWithInt32Data(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared(); auto internal_column = internal_type->createColumn(); @@ -705,35 +705,31 @@ static ColumnWithTypeAndName readIPv4ColumnWithInt32Data(std::shared_ptr & arrow_column, - const std::string & column_name, - const std::string & format_name, - bool is_nullable, - std::unordered_map & dictionary_infos, - bool allow_null_type, - bool skip_columns_with_unsupported_types, - bool & skipped, - FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore, - DataTypePtr type_hint = nullptr, - bool is_map_nested = false) +struct ReadColumnFromArrowColumnSettings { - if (!is_nullable && (arrow_column->null_count() || (type_hint && type_hint->isNullable())) && arrow_column->type()->id() != arrow::Type::LIST - && arrow_column->type()->id() != arrow::Type::MAP && arrow_column->type()->id() != arrow::Type::STRUCT && - arrow_column->type()->id() != arrow::Type::DICTIONARY) - { - DataTypePtr nested_type_hint; - if (type_hint) - nested_type_hint = removeNullable(type_hint); - auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint); - if (skipped) - return {}; - auto nullmap_column = readByteMapFromArrowColumn(arrow_column); - auto nullable_type = std::make_shared(std::move(nested_column.type)); - auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); - return {std::move(nullable_column), std::move(nullable_type), column_name}; - } + std::string format_name; + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior; + bool allow_arrow_null_type; + bool skip_columns_with_unsupported_types; +}; +static ColumnWithTypeAndName readColumnFromArrowColumn( + const std::shared_ptr & arrow_column, + std::string column_name, + std::unordered_map dictionary_infos, + DataTypePtr type_hint, + bool is_nullable_column, + bool is_map_nested_column, + const ReadColumnFromArrowColumnSettings & settings); + +static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn( + const std::shared_ptr & arrow_column, + std::string column_name, + std::unordered_map dictionary_infos, + DataTypePtr type_hint, + bool is_map_nested_column, + const ReadColumnFromArrowColumnSettings & settings) +{ switch (arrow_column->type()->id()) { case arrow::Type::STRING: @@ -790,7 +786,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( case arrow::Type::BOOL: return readColumnWithBooleanData(arrow_column, column_name); case arrow::Type::DATE32: - return readColumnWithDate32Data(arrow_column, column_name, type_hint, date_time_overflow_behavior); + return readColumnWithDate32Data(arrow_column, column_name, type_hint, settings.date_time_overflow_behavior); case arrow::Type::DATE64: return readColumnWithDate64Data(arrow_column, column_name); // ClickHouse writes Date as arrow UINT16 and DateTime as arrow UINT32, @@ -837,9 +833,16 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( key_type_hint = map_type_hint->getKeyType(); } } + auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint, true); - if (skipped) + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, + column_name, + dictionary_infos, + nested_type_hint, + false /*is_nullable_column*/, + true /*is_map_nested_column*/, + settings); + if (!nested_column.column) return {}; auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); @@ -866,7 +869,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( case arrow::Type::LIST: case arrow::Type::LARGE_LIST: { - bool is_large = arrow_column->type()->id() == arrow::Type::LARGE_LIST; + bool is_large_list = arrow_column->type()->id() == arrow::Type::LARGE_LIST; DataTypePtr nested_type_hint; if (type_hint) { @@ -874,12 +877,33 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( if (array_type_hint) nested_type_hint = array_type_hint->getNestedType(); } - auto arrow_nested_column = is_large ? getNestedArrowColumn(arrow_column) : getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint); - if (skipped) + + bool is_nested_nullable_column = false; + if (is_large_list) + { + auto * arrow_large_list_type = assert_cast(arrow_column->type().get()); + is_nested_nullable_column = arrow_large_list_type->value_field()->nullable(); + } + else + { + auto * arrow_list_type = assert_cast(arrow_column->type().get()); + is_nested_nullable_column = arrow_list_type->value_field()->nullable(); + } + + auto arrow_nested_column = is_large_list ? getNestedArrowColumn(arrow_column) : getNestedArrowColumn(arrow_column); + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, + column_name, + dictionary_infos, + nested_type_hint, + is_nested_nullable_column, + false /*is_map_nested_column*/, + settings); + if (!nested_column.column) return {}; - auto offsets_column = is_large ? readOffsetsFromArrowListColumn(arrow_column) : readOffsetsFromArrowListColumn(arrow_column); + + auto offsets_column = is_large_list ? readOffsetsFromArrowListColumn(arrow_column) : readOffsetsFromArrowListColumn(arrow_column); auto array_column = ColumnArray::create(nested_column.column, offsets_column); + DataTypePtr array_type; /// If type hint is Nested, we should return Nested type, /// because we differentiate Nested and simple Array(Tuple) @@ -913,11 +937,13 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( for (int i = 0; i != arrow_struct_type->num_fields(); ++i) { - auto field_name = arrow_struct_type->field(i)->name(); + const auto & field = arrow_struct_type->field(i); + const auto & field_name = field->name(); + DataTypePtr nested_type_hint; if (tuple_type_hint) { - if (tuple_type_hint->haveExplicitNames() && !is_map_nested) + if (tuple_type_hint->haveExplicitNames() && !is_map_nested_column) { auto pos = tuple_type_hint->tryGetPositionByName(field_name); if (pos) @@ -926,13 +952,21 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( else if (size_t(i) < tuple_type_hint->getElements().size()) nested_type_hint = tuple_type_hint->getElement(i); } + auto nested_arrow_column = std::make_shared(nested_arrow_columns[i]); - auto element = readColumnFromArrowColumn(nested_arrow_column, field_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint); - if (skipped) + auto column_with_type_and_name = readColumnFromArrowColumn(nested_arrow_column, + field_name, + dictionary_infos, + nested_type_hint, + field->nullable(), + false /*is_map_nested_column*/, + settings); + if (!column_with_type_and_name.column) return {}; - tuple_elements.emplace_back(std::move(element.column)); - tuple_types.emplace_back(std::move(element.type)); - tuple_names.emplace_back(std::move(element.name)); + + tuple_elements.emplace_back(std::move(column_with_type_and_name.column)); + tuple_types.emplace_back(std::move(column_with_type_and_name.type)); + tuple_names.emplace_back(std::move(column_with_type_and_name.name)); } auto tuple_column = ColumnTuple::create(std::move(tuple_elements)); @@ -953,8 +987,19 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( arrow::DictionaryArray & dict_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); dict_array.emplace_back(dict_chunk.dictionary()); } + auto arrow_dict_column = std::make_shared(dict_array); - auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior); + auto dict_column = readColumnFromArrowColumn(arrow_dict_column, + column_name, + dictionary_infos, + nullptr /*nested_type_hint*/, + false /*is_nullable_column*/, + false /*is_map_nested_column*/, + settings); + + if (!dict_column.column) + return {}; + for (size_t i = 0; i != dict_column.column->size(); ++i) { if (dict_column.column->isDefaultAt(i)) @@ -963,6 +1008,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( break; } } + auto lc_type = std::make_shared(is_lc_nullable ? makeNullable(dict_column.type) : dict_column.type); auto tmp_lc_column = lc_type->createColumn(); auto tmp_dict_column = IColumn::mutate(assert_cast(tmp_lc_column.get())->getDictionaryPtr()); @@ -1002,7 +1048,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( // TODO: read UUID as a string? case arrow::Type::NA: { - if (allow_null_type) + if (settings.allow_arrow_null_type) { auto type = std::make_shared(); auto column = ColumnNothing::create(arrow_column->length()); @@ -1012,11 +1058,8 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( } default: { - if (skip_columns_with_unsupported_types) - { - skipped = true; + if (settings.skip_columns_with_unsupported_types) return {}; - } throw Exception( ErrorCodes::UNKNOWN_TYPE, @@ -1024,14 +1067,59 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( "If it happens during schema inference and you want to skip columns with " "unsupported types, you can enable setting input_format_{}" "_skip_columns_with_unsupported_types_in_schema_inference", - format_name, + settings.format_name, arrow_column->type()->name(), column_name, - boost::algorithm::to_lower_copy(format_name)); + boost::algorithm::to_lower_copy(settings.format_name)); } } } +static ColumnWithTypeAndName readColumnFromArrowColumn( + const std::shared_ptr & arrow_column, + std::string column_name, + std::unordered_map dictionary_infos, + DataTypePtr type_hint, + bool is_nullable_column, + bool is_map_nested_column, + const ReadColumnFromArrowColumnSettings & settings) +{ + bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable()); + if (read_as_nullable_column && + arrow_column->type()->id() != arrow::Type::LIST && + arrow_column->type()->id() != arrow::Type::LARGE_LIST && + arrow_column->type()->id() != arrow::Type::MAP && + arrow_column->type()->id() != arrow::Type::STRUCT && + arrow_column->type()->id() != arrow::Type::DICTIONARY) + { + DataTypePtr nested_type_hint; + if (type_hint) + nested_type_hint = removeNullable(type_hint); + + auto nested_column = readNonNullableColumnFromArrowColumn(arrow_column, + column_name, + dictionary_infos, + nested_type_hint, + is_map_nested_column, + settings); + + if (!nested_column.column) + return {}; + + auto nullmap_column = readByteMapFromArrowColumn(arrow_column); + auto nullable_type = std::make_shared(std::move(nested_column.type)); + auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); + + return {std::move(nullable_column), std::move(nullable_type), column_name}; + } + + return readNonNullableColumnFromArrowColumn(arrow_column, + column_name, + dictionary_infos, + type_hint, + is_map_nested_column, + settings); +} // Creating CH header by arrow schema. Will be useful in task about inserting // data from file without knowing table structure. @@ -1042,44 +1130,56 @@ static void checkStatus(const arrow::Status & status, const String & column_name throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()}; } +/// Create empty arrow column using specified field +static std::shared_ptr createArrowColumn(const std::shared_ptr & field, const String & format_name) +{ + arrow::MemoryPool * pool = arrow::default_memory_pool(); + std::unique_ptr array_builder; + arrow::Status status = MakeBuilder(pool, field->type(), &array_builder); + checkStatus(status, field->name(), format_name); + + std::shared_ptr arrow_array; + status = array_builder->Finish(&arrow_array); + checkStatus(status, field->name(), format_name); + + return std::make_shared(arrow::ArrayVector{arrow_array}); +} Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( - const arrow::Schema & schema, const std::string & format_name, - bool skip_columns_with_unsupported_types, const Block * hint_header, bool ignore_case) + const arrow::Schema & schema, + const std::string & format_name, + bool skip_columns_with_unsupported_types) { + ReadColumnFromArrowColumnSettings settings + { + .format_name = format_name, + .date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore, + .allow_arrow_null_type = false, + .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types + }; + ColumnsWithTypeAndName sample_columns; - std::unordered_set nested_table_names; - if (hint_header) - nested_table_names = Nested::getAllTableNames(*hint_header, ignore_case); for (const auto & field : schema.fields()) { - if (hint_header && !hint_header->has(field->name(), ignore_case) - && !nested_table_names.contains(ignore_case ? boost::to_lower_copy(field->name()) : field->name())) - continue; - /// Create empty arrow column by it's type and convert it to ClickHouse column. - arrow::MemoryPool * pool = arrow::default_memory_pool(); - std::unique_ptr array_builder; - arrow::Status status = MakeBuilder(pool, field->type(), &array_builder); - checkStatus(status, field->name(), format_name); + auto arrow_column = createArrowColumn(field, format_name); - std::shared_ptr arrow_array; - status = array_builder->Finish(&arrow_array); - checkStatus(status, field->name(), format_name); - - arrow::ArrayVector array_vector = {arrow_array}; - auto arrow_column = std::make_shared(array_vector); std::unordered_map dict_infos; - bool skipped = false; - bool allow_null_type = false; - if (hint_header && hint_header->has(field->name()) && hint_header->getByName(field->name()).type->isNullable()) - allow_null_type = true; - ColumnWithTypeAndName sample_column = readColumnFromArrowColumn( - arrow_column, field->name(), format_name, false, dict_infos, allow_null_type, skip_columns_with_unsupported_types, skipped); - if (!skipped) + + auto sample_column = readColumnFromArrowColumn( + arrow_column, + field->name(), + dict_infos, + nullptr /*nested_type_hint*/, + field->nullable() /*is_nullable_column*/, + false /*is_map_nested_column*/, + settings); + + if (sample_column.column) sample_columns.emplace_back(std::move(sample_column)); } + return Block(std::move(sample_columns)); } @@ -1101,30 +1201,43 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn( { } -void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values) +Chunk ArrowColumnToCHColumn::arrowTableToCHChunk(const std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values) { - NameToColumnPtr name_to_column_ptr; + NameToArrowColumn name_to_arrow_column; + for (auto column_name : table->ColumnNames()) { - std::shared_ptr arrow_column = table->GetColumnByName(column_name); + auto arrow_column = table->GetColumnByName(column_name); if (!arrow_column) throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name); + auto arrow_field = table->schema()->GetFieldByName(column_name); + if (case_insensitive_matching) boost::to_lower(column_name); - name_to_column_ptr[std::move(column_name)] = arrow_column; + + name_to_arrow_column[std::move(column_name)] = {std::move(arrow_column), std::move(arrow_field)}; } - arrowColumnsToCHChunk(res, name_to_column_ptr, num_rows, block_missing_values); + return arrowColumnsToCHChunk(name_to_arrow_column, num_rows, block_missing_values); } -void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values) +Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & name_to_arrow_column, size_t num_rows, BlockMissingValues * block_missing_values) { - Columns columns_list; - columns_list.reserve(header.columns()); + ReadColumnFromArrowColumnSettings settings + { + .format_name = format_name, + .date_time_overflow_behavior = date_time_overflow_behavior, + .allow_arrow_null_type = true, + .skip_columns_with_unsupported_types = false + }; + + Columns columns; + columns.reserve(header.columns()); + std::unordered_map>> nested_tables; - bool skipped = false; - for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) + + for (size_t column_i = 0, header_columns = header.columns(); column_i < header_columns; ++column_i) { const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); @@ -1133,15 +1246,17 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & boost::to_lower(search_column_name); ColumnWithTypeAndName column; - if (!name_to_column_ptr.contains(search_column_name)) + if (!name_to_arrow_column.contains(search_column_name)) { bool read_from_nested = false; + /// Check if it's a subcolumn from some struct. String nested_table_name = Nested::extractTableName(header_column.name); String search_nested_table_name = nested_table_name; if (case_insensitive_matching) boost::to_lower(search_nested_table_name); - if (name_to_column_ptr.contains(search_nested_table_name)) + + if (name_to_arrow_column.contains(search_nested_table_name)) { if (!nested_tables.contains(search_nested_table_name)) { @@ -1153,10 +1268,19 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } auto nested_table_type = Nested::collect(nested_columns).front().type; - std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; - ColumnsWithTypeAndName cols = { - readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, - skipped, date_time_overflow_behavior, nested_table_type)}; + const auto & arrow_column = name_to_arrow_column.find(search_nested_table_name)->second; + + ColumnsWithTypeAndName cols = + { + readColumnFromArrowColumn(arrow_column.column, + nested_table_name, + dictionary_infos, + nested_table_type, + arrow_column.field->nullable() /*is_nullable_column*/, + false /*is_map_nested_column*/, + settings) + }; + BlockPtr block_ptr = std::make_shared(cols); auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; @@ -1180,7 +1304,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & column.name = header_column.name; column.type = header_column.type; column.column = header_column.column->cloneResized(num_rows); - columns_list.push_back(std::move(column.column)); + columns.push_back(std::move(column.column)); if (block_missing_values) block_missing_values->setBits(column_i, num_rows); continue; @@ -1189,9 +1313,14 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } else { - auto arrow_column = name_to_column_ptr[search_column_name]; - column = readColumnFromArrowColumn( - arrow_column, header_column.name, format_name, false, dictionary_infos, true, false, skipped, date_time_overflow_behavior, header_column.type); + const auto & arrow_column = name_to_arrow_column.find(search_column_name)->second; + column = readColumnFromArrowColumn(arrow_column.column, + header_column.name, + dictionary_infos, + header_column.type, + arrow_column.field->nullable(), + false /*is_map_nested_column*/, + settings); } if (null_as_default) @@ -1216,10 +1345,10 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } column.type = header_column.type; - columns_list.push_back(std::move(column.column)); + columns.push_back(std::move(column.column)); } - res.setColumns(columns_list, num_rows); + return Chunk(std::move(columns), num_rows); } } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 079e03749176..27e9afdf763a 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -19,8 +19,6 @@ class Chunk; class ArrowColumnToCHColumn { public: - using NameToColumnPtr = std::unordered_map>; - ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, @@ -30,18 +28,13 @@ class ArrowColumnToCHColumn bool case_insensitive_matching_ = false, bool is_stream_ = false); - void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); - - void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); + Chunk arrowTableToCHChunk(const std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); - /// Transform arrow schema to ClickHouse header. If hint_header is provided, - /// we will skip columns in schema that are not in hint_header. + /// Transform arrow schema to ClickHouse header static Block arrowSchemaToCHHeader( const arrow::Schema & schema, const std::string & format_name, - bool skip_columns_with_unsupported_types = false, - const Block * hint_header = nullptr, - bool ignore_case = false); + bool skip_columns_with_unsupported_types = false); struct DictionaryInfo { @@ -52,6 +45,16 @@ class ArrowColumnToCHColumn private: + struct ArrowColumn + { + std::shared_ptr column; + std::shared_ptr field; + }; + + using NameToArrowColumn = std::unordered_map; + + Chunk arrowColumnsToCHChunk(const NameToArrowColumn & name_to_arrow_column, size_t num_rows, BlockMissingValues * block_missing_values); + const Block & header; const std::string format_name; /// If false, throw exception if some columns in header not exists in arrow table. diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index 9f2a795427c5..b64318e40930 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -19,6 +19,7 @@ static FormatSettings updateFormatSettings(const FormatSettings & settings, cons updated.date_time_input_format = FormatSettings::DateTimeInputFormat::BestEffort; updated.defaults_for_omitted_fields = true; updated.csv.delimiter = updated.hive_text.fields_delimiter; + updated.csv.allow_variable_number_of_columns = settings.hive_text.allow_variable_number_of_columns; if (settings.hive_text.input_field_names.empty()) updated.hive_text.input_field_names = header.getNames(); return updated; @@ -44,9 +45,6 @@ HiveTextFormatReader::HiveTextFormatReader(PeekableReadBuffer & buf_, const Form std::vector HiveTextFormatReader::readNames() { - PeekableReadBufferCheckpoint checkpoint{*buf, true}; - auto values = readHeaderRow(); - input_field_names.resize(values.size()); return input_field_names; } diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index 73ffc02bbc1c..a7a49ab6a8c6 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -82,7 +82,7 @@ class NativeOutputFormat final : public IOutputFormat std::string getContentType() const override { - return writer.getContentType(); + return NativeWriter::getContentType(); } protected: diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index a41eacf26b7a..aa83b87b2d27 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -71,12 +71,10 @@ Chunk ORCBlockInputFormat::read() approx_bytes_read_for_chunk = file_reader->GetRawORCReader()->getStripe(stripe_current)->getDataLength(); ++stripe_current; - Chunk res; /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; - arrow_column_to_ch_column->arrowTableToCHChunk(res, table, num_rows, block_missing_values_ptr); - return res; + return arrow_column_to_ch_column->arrowTableToCHChunk(table, num_rows, block_missing_values_ptr); } void ORCBlockInputFormat::resetParser() diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 62e576d49535..d41cb3447deb 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -601,7 +601,7 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; - row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(res.chunk, *tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); lock.lock(); diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h index aa2215731516..db8ee66ab2b5 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h @@ -1,9 +1,10 @@ #pragma once +#include #include -#include #include #include +#include namespace DB { diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 817961b709c2..0865ec8c0bb3 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -1,16 +1,17 @@ -#include -#include #include +#include #include #include +#include #include +#include +#include #include #include #include #include -#include #include @@ -61,6 +62,23 @@ RuleType ruleType(const String & s) throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "invalid rule type: {}", s); } +void Pattern::updateHash(SipHash & hash) const +{ + hash.update(rule_type); + hash.update(regexp_str); + if (function) + { + hash.update(function->getName()); + for (const auto & p : function->getParameters()) + hash.update(toString(p)); + } + for (const auto & r : retentions) + { + hash.update(r.age); + hash.update(r.precision); + } +} + static const Graphite::Pattern undef_pattern = { /// empty pattern for selectPatternForPath .rule_type = RuleTypeAll, @@ -68,7 +86,7 @@ static const Graphite::Pattern undef_pattern = .regexp_str = "", .function = nullptr, .retentions = Graphite::Retentions(), - .type = undef_pattern.TypeUndef, + .type = Graphite::Pattern::TypeUndef, }; inline static const Patterns & selectPatternsForMetricType(const Graphite::Params & params, std::string_view path) @@ -76,7 +94,7 @@ inline static const Patterns & selectPatternsForMetricType(const Graphite::Param if (params.patterns_typed) { std::string_view path_view = path; - if (path_view.find("?"sv) == path_view.npos) + if (path_view.find("?"sv) == std::string::npos) return params.patterns_plain; else return params.patterns_tagged; @@ -100,18 +118,18 @@ Graphite::RollupRule selectPatternForPath( if (!pattern.regexp) { /// Default pattern - if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll) + if (first_match->type == Graphite::Pattern::TypeUndef && pattern.type == Graphite::Pattern::TypeAll) { /// There is only default pattern for both retention and aggregation return {&pattern, &pattern}; } if (pattern.type != first_match->type) { - if (first_match->type == first_match->TypeRetention) + if (first_match->type == Graphite::Pattern::TypeRetention) { return {first_match, &pattern}; } - if (first_match->type == first_match->TypeAggregation) + if (first_match->type == Graphite::Pattern::TypeAggregation) { return {&pattern, first_match}; } @@ -122,23 +140,23 @@ Graphite::RollupRule selectPatternForPath( if (pattern.regexp->match(path.data(), path.size())) { /// General pattern with matched path - if (pattern.type == pattern.TypeAll) + if (pattern.type == Graphite::Pattern::TypeAll) { /// Only for not default patterns with both function and retention parameters return {&pattern, &pattern}; } - if (first_match->type == first_match->TypeUndef) + if (first_match->type == Graphite::Pattern::TypeUndef) { first_match = &pattern; continue; } if (pattern.type != first_match->type) { - if (first_match->type == first_match->TypeRetention) + if (first_match->type == Graphite::Pattern::TypeRetention) { return {first_match, &pattern}; } - if (first_match->type == first_match->TypeAggregation) + if (first_match->type == Graphite::Pattern::TypeAggregation) { return {&pattern, first_match}; } @@ -397,24 +415,24 @@ static const Pattern & appendGraphitePattern( if (!pattern.function) { - pattern.type = pattern.TypeRetention; + pattern.type = Graphite::Pattern::TypeRetention; } else if (pattern.retentions.empty()) { - pattern.type = pattern.TypeAggregation; + pattern.type = Graphite::Pattern::TypeAggregation; } else { - pattern.type = pattern.TypeAll; + pattern.type = Graphite::Pattern::TypeAll; } - if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll + if (pattern.type & Graphite::Pattern::TypeAggregation) /// TypeAggregation or TypeAll if (pattern.function->allocatesMemoryInArena()) throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Aggregate function {} isn't supported in GraphiteMergeTree", pattern.function->getName()); /// retention should be in descending order of age. - if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll + if (pattern.type & Graphite::Pattern::TypeRetention) /// TypeRetention or TypeAll ::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); patterns.emplace_back(pattern); diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h index 04bb4548c146..ce3331053d14 100644 --- a/src/Processors/Merges/Algorithms/Graphite.h +++ b/src/Processors/Merges/Algorithms/Graphite.h @@ -1,8 +1,9 @@ #pragma once -#include +#include +#include #include -#include +#include /** Intended for implementation of "rollup" - aggregation (rounding) of older data * for a table with Graphite data (Graphite is the system for time series monitoring). @@ -123,22 +124,7 @@ struct Pattern AggregateFunctionPtr function; Retentions retentions; /// Must be ordered by 'age' descending. enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically - void updateHash(SipHash & hash) const - { - hash.update(rule_type); - hash.update(regexp_str); - if (function) - { - hash.update(function->getName()); - for (const auto & p : function->getParameters()) - hash.update(toString(p)); - } - for (const auto & r : retentions) - { - hash.update(r.age); - hash.update(r.precision); - } - } + void updateHash(SipHash & hash) const; }; bool operator==(const Pattern & a, const Pattern & b); diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 74f293e5682f..0d7e05af1de1 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index c8d230c87d92..d45454824770 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -68,6 +68,10 @@ std::unique_ptr createLocalPlan( if (context->getSettingsRef().allow_experimental_analyzer) { + /// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to + /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace + /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289). + new_context->setSetting("enable_positional_arguments", Field(false)); auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); } diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h index e020cd3c4d3d..939d0900c867 100644 --- a/src/Processors/QueryPlan/FilterStep.h +++ b/src/Processors/QueryPlan/FilterStep.h @@ -24,6 +24,7 @@ class FilterStep : public ITransformingStep void describeActions(FormatSettings & settings) const override; const ActionsDAGPtr & getExpression() const { return actions_dag; } + ActionsDAGPtr & getExpression() { return actions_dag; } const String & getFilterColumnName() const { return filter_column_name; } bool removesFilterColumn() const { return remove_filter_column; } diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 1931b1eb3a14..8fe2515e3233 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -31,7 +31,7 @@ std::vector> describeJoinActions(const JoinPtr & join) description.emplace_back("ASOF inequality", toString(table_join.getAsofInequality())); if (!table_join.getClauses().empty()) - description.emplace_back("Clauses", table_join.formatClauses(table_join.getClauses(), true /*short_format*/)); + description.emplace_back("Clauses", TableJoin::formatClauses(table_join.getClauses(), true /*short_format*/)); return description; } diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index e71bcc5602aa..ebf780bb692e 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -100,7 +100,7 @@ static NameSet findIdentifiersOfNode(const ActionsDAG::Node * node) return res; } -static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & allowed_inputs, size_t child_idx = 0) +static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & available_inputs, size_t child_idx = 0) { QueryPlan::Node * child_node = parent_node->children.front(); checkChildrenSize(child_node, child_idx + 1); @@ -114,14 +114,12 @@ static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & al bool removes_filter = filter->removesFilterColumn(); const auto & all_inputs = child->getInputStreams()[child_idx].header.getColumnsWithTypeAndName(); - - auto split_filter = expression->cloneActionsForFilterPushDown(filter_column_name, removes_filter, allowed_inputs, all_inputs); - return split_filter; + return expression->splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs); } static size_t -tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const ActionsDAGPtr & split_filter, - bool can_remove_filter = true, size_t child_idx = 0) +addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const ActionsDAGPtr & split_filter, + bool can_remove_filter = true, size_t child_idx = 0, bool update_parent_filter = true) { QueryPlan::Node * child_node = parent_node->children.front(); checkChildrenSize(child_node, child_idx + 1); @@ -134,21 +132,18 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con const auto & filter_column_name = filter->getFilterColumnName(); const auto * filter_node = expression->tryFindInOutputs(filter_column_name); - if (!filter_node && !filter->removesFilterColumn()) + if (update_parent_filter && !filter_node && !filter->removesFilterColumn()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", filter_column_name, expression->dumpDAG()); - /// Filter column was replaced to constant. - const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); - - /// Add new Filter step before Aggregating. - /// Expression/Filter -> Aggregating -> Something + /// Add new Filter step before Child. + /// Expression/Filter -> Child -> Something auto & node = nodes.emplace_back(); node.children.emplace_back(&node); std::swap(node.children[0], child_node->children[child_idx]); - /// Expression/Filter -> Aggregating -> Filter -> Something + /// Expression/Filter -> Child -> Filter -> Something /// New filter column is the first one. String split_filter_column_name = split_filter->getOutputs().front()->result_name; @@ -171,12 +166,22 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con ErrorCodes::LOGICAL_ERROR, "We are trying to push down a filter through a step for which we cannot update input stream"); } - if (!filter_node || filter_is_constant) - /// This means that all predicates of filter were pushed down. - /// Replace current actions to expression, as we don't need to filter anything. - parent = std::make_unique(child->getOutputStream(), expression); - else - filter->updateInputStream(child->getOutputStream()); + if (update_parent_filter) + { + /// Filter column was replaced to constant. + const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); + + if (!filter_node || filter_is_constant) + { + /// This means that all predicates of filter were pushed down. + /// Replace current actions to expression, as we don't need to filter anything. + parent = std::make_unique(child->getOutputStream(), expression); + } + else + { + filter->updateInputStream(child->getOutputStream()); + } + } return 3; } @@ -186,7 +191,7 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con bool can_remove_filter = true, size_t child_idx = 0) { if (auto split_filter = splitFilter(parent_node, allowed_inputs, child_idx)) - return tryAddNewFilterStep(parent_node, nodes, split_filter, can_remove_filter, child_idx); + return addNewFilterStepOrThrow(parent_node, nodes, split_filter, can_remove_filter, child_idx); return 0; } @@ -204,6 +209,204 @@ static size_t simplePushDownOverStep(QueryPlan::Node * parent_node, QueryPlan::N return 0; } +static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, QueryPlanStepPtr & child) +{ + auto & parent = parent_node->step; + auto * filter = assert_cast(parent.get()); + + auto * join = typeid_cast(child.get()); + auto * filled_join = typeid_cast(child.get()); + + if (!join && !filled_join) + return 0; + + /** For equivalent JOIN with condition `ON lhs.x_1 = rhs.y_1 AND lhs.x_2 = rhs.y_2 ...`, we can build equivalent sets of columns and this + * will allow to push conditions that only use columns from equivalent sets to both sides of JOIN, without considering JOIN type. + * + * For example: `FROM lhs INNER JOIN rhs ON lhs.id = rhs.id AND lhs.value = rhs.value` + * In this example columns `id` and `value` from both tables are equivalent. + * + * During filter push down for different JOIN types filter push down logic is different: + * + * 1. For INNER JOIN we can push all valid conditions to both sides of JOIN. We also can push all valid conditions that use columns from + * equivalent sets to both sides of JOIN. + * 2. For LEFT/RIGHT JOIN we can push conditions that use columns from LEFT/RIGHT stream to LEFT/RIGHT JOIN side. We can also push conditions + * that use columns from LEFT/RIGHT equivalent sets to RIGHT/LEFT JOIN side. + * + * Additional filter push down optimizations: + * 1. TODO: Support building equivalent sets for more than 2 JOINS. It is possible, but will require more complex analysis step. + * 2. TODO: Support building equivalent sets for JOINs with more than 1 clause. + * 3. TODO: For LEFT/RIGHT JOIN, we can assume that RIGHT/LEFT columns used in filter will be default/NULL constants and + * check if filter will always be false, in those scenario we can transform LEFT/RIGHT JOIN into INNER JOIN and push conditions to both tables. + * 4. TODO: It is possible to pull up filter conditions from LEFT/RIGHT stream and push conditions that use columns from LEFT/RIGHT equivalent sets + * to RIGHT/LEFT JOIN side. + */ + + const auto & join_header = child->getOutputStream().header; + const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin(); + const auto & left_stream_input_header = child->getInputStreams().front().header; + const auto & right_stream_input_header = child->getInputStreams().back().header; + + if (table_join.kind() == JoinKind::Full) + return 0; + + std::unordered_map equivalent_left_stream_column_to_right_stream_column; + std::unordered_map equivalent_right_stream_column_to_left_stream_column; + + bool has_single_clause = table_join.getClauses().size() == 1; + + if (has_single_clause) + { + const auto & join_clause = table_join.getClauses()[0]; + size_t key_names_size = join_clause.key_names_left.size(); + + for (size_t i = 0; i < key_names_size; ++i) + { + const auto & left_table_key_name = join_clause.key_names_left[i]; + const auto & right_table_key_name = join_clause.key_names_right[i]; + + if (!join_header.has(left_table_key_name) || !join_header.has(right_table_key_name)) + continue; + + const auto & left_table_column = left_stream_input_header.getByName(left_table_key_name); + const auto & right_table_column = right_stream_input_header.getByName(right_table_key_name); + + if (!left_table_column.type->equals(*right_table_column.type)) + continue; + + equivalent_left_stream_column_to_right_stream_column[left_table_key_name] = right_table_column; + equivalent_right_stream_column_to_left_stream_column[right_table_key_name] = left_table_column; + } + } + + auto get_available_columns_for_filter = [&](bool push_to_left_stream, bool filter_push_down_input_columns_available) + { + Names available_input_columns_for_filter; + + if (!filter_push_down_input_columns_available) + return available_input_columns_for_filter; + + const auto & input_header = push_to_left_stream ? left_stream_input_header : right_stream_input_header; + const auto & input_columns_names = input_header.getNames(); + + for (const auto & name : input_columns_names) + { + if (!join_header.has(name)) + continue; + + /// Skip if type is changed. Push down expression expect equal types. + if (!input_header.getByName(name).type->equals(*join_header.getByName(name).type)) + continue; + + available_input_columns_for_filter.push_back(name); + } + + return available_input_columns_for_filter; + }; + + bool left_stream_filter_push_down_input_columns_available = true; + bool right_stream_filter_push_down_input_columns_available = true; + + if (table_join.kind() == JoinKind::Left) + right_stream_filter_push_down_input_columns_available = false; + else if (table_join.kind() == JoinKind::Right) + left_stream_filter_push_down_input_columns_available = false; + + /** We disable push down to right table in cases: + * 1. Right side is already filled. Example: JOIN with Dictionary. + * 2. ASOF Right join is not supported. + */ + bool allow_push_down_to_right = join && join->allowPushDownToRight() && table_join.strictness() != JoinStrictness::Asof; + if (!allow_push_down_to_right) + right_stream_filter_push_down_input_columns_available = false; + + Names equivalent_columns_to_push_down; + + if (left_stream_filter_push_down_input_columns_available) + { + for (const auto & [name, _] : equivalent_left_stream_column_to_right_stream_column) + equivalent_columns_to_push_down.push_back(name); + } + + if (right_stream_filter_push_down_input_columns_available) + { + for (const auto & [name, _] : equivalent_right_stream_column_to_left_stream_column) + equivalent_columns_to_push_down.push_back(name); + } + + Names left_stream_available_columns_to_push_down = get_available_columns_for_filter(true /*push_to_left_stream*/, left_stream_filter_push_down_input_columns_available); + Names right_stream_available_columns_to_push_down = get_available_columns_for_filter(false /*push_to_left_stream*/, right_stream_filter_push_down_input_columns_available); + + auto join_filter_push_down_actions = filter->getExpression()->splitActionsForJOINFilterPushDown(filter->getFilterColumnName(), + filter->removesFilterColumn(), + left_stream_available_columns_to_push_down, + left_stream_input_header.getColumnsWithTypeAndName(), + right_stream_available_columns_to_push_down, + right_stream_input_header.getColumnsWithTypeAndName(), + equivalent_columns_to_push_down, + equivalent_left_stream_column_to_right_stream_column, + equivalent_right_stream_column_to_left_stream_column); + + size_t updated_steps = 0; + + if (join_filter_push_down_actions.left_stream_filter_to_push_down) + { + updated_steps += addNewFilterStepOrThrow(parent_node, + nodes, + join_filter_push_down_actions.left_stream_filter_to_push_down, + join_filter_push_down_actions.left_stream_filter_removes_filter, + 0 /*child_idx*/, + false /*update_parent_filter*/); + LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), + "Pushed down filter {} to the {} side of join", + join_filter_push_down_actions.left_stream_filter_to_push_down->getOutputs()[0]->result_name, + JoinKind::Left); + } + + if (join_filter_push_down_actions.right_stream_filter_to_push_down) + { + updated_steps += addNewFilterStepOrThrow(parent_node, + nodes, + join_filter_push_down_actions.right_stream_filter_to_push_down, + join_filter_push_down_actions.right_stream_filter_removes_filter, + 1 /*child_idx*/, + false /*update_parent_filter*/); + LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), + "Pushed down filter {} to the {} side of join", + join_filter_push_down_actions.right_stream_filter_to_push_down->getOutputs()[0]->result_name, + JoinKind::Right); + } + + if (updated_steps > 0) + { + const auto & filter_column_name = filter->getFilterColumnName(); + const auto & filter_expression = filter->getExpression(); + + const auto * filter_node = filter_expression->tryFindInOutputs(filter_column_name); + if (!filter_node && !filter->removesFilterColumn()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", + filter_column_name, filter_expression->dumpDAG()); + + + /// Filter column was replaced to constant. + const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); + + if (!filter_node || filter_is_constant) + { + /// This means that all predicates of filter were pushed down. + /// Replace current actions to expression, as we don't need to filter anything. + parent = std::make_unique(child->getOutputStream(), filter_expression); + } + else + { + filter->updateInputStream(child->getOutputStream()); + } + } + + return updated_steps; +} + size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -317,9 +520,6 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (!keys.contains(column.name)) allowed_inputs.push_back(column.name); - // for (const auto & name : allowed_inputs) - // std::cerr << name << std::endl; - if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs)) return updated_steps; } @@ -327,77 +527,8 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto updated_steps = simplePushDownOverStep(parent_node, nodes, child)) return updated_steps; - auto * join = typeid_cast(child.get()); - auto * filled_join = typeid_cast(child.get()); - - if (join || filled_join) - { - auto join_push_down = [&](JoinKind kind) -> size_t - { - const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin(); - - /// Only inner, cross and left(/right) join are supported. Other types may generate default values for left table keys. - /// So, if we push down a condition like `key != 0`, not all rows may be filtered. - if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind) - return 0; - - /// There is no ASOF Right join, so we're talking about pushing to the right side - if (kind == JoinKind::Right && table_join.strictness() == JoinStrictness::Asof) - return 0; - - bool is_left = kind == JoinKind::Left; - const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header; - const auto & res_header = child->getOutputStream().header; - Names allowed_keys; - const auto & source_columns = input_header.getNames(); - for (const auto & name : source_columns) - { - /// Skip key if it is renamed. - /// I don't know if it is possible. Just in case. - if (!input_header.has(name) || !res_header.has(name)) - continue; - - /// Skip if type is changed. Push down expression expect equal types. - if (!input_header.getByName(name).type->equals(*res_header.getByName(name).type)) - continue; - - allowed_keys.push_back(name); - } - - /// For left JOIN, push down to the first child; for right - to the second one. - const auto child_idx = is_left ? 0 : 1; - ActionsDAGPtr split_filter = splitFilter(parent_node, allowed_keys, child_idx); - if (!split_filter) - return 0; - /* - * We should check the presence of a split filter column name in `source_columns` to avoid removing the required column. - * - * Example: - * A filter expression is `a AND b = c`, but `b` and `c` belong to another side of the join and not in `allowed_keys`, so the final split filter is just `a`. - * In this case `a` can be in `source_columns` but not `and(a, equals(b, c))`. - * - * New filter column is the first one. - */ - const String & split_filter_column_name = split_filter->getOutputs().front()->result_name; - bool can_remove_filter = source_columns.end() == std::find(source_columns.begin(), source_columns.end(), split_filter_column_name); - const size_t updated_steps = tryAddNewFilterStep(parent_node, nodes, split_filter, can_remove_filter, child_idx); - if (updated_steps > 0) - { - LOG_DEBUG(getLogger("QueryPlanOptimizations"), "Pushed down filter {} to the {} side of join", split_filter_column_name, kind); - } - return updated_steps; - }; - - if (size_t updated_steps = join_push_down(JoinKind::Left)) - return updated_steps; - - /// For full sorting merge join we push down both to the left and right tables, because left and right streams are not independent. - if (join && join->allowPushDownToRight()) - { - if (size_t updated_steps = join_push_down(JoinKind::Right)) - return updated_steps; - } - } + if (auto updated_steps = tryPushDownOverJoinStep(parent_node, nodes, child)) + return updated_steps; /// TODO. /// We can filter earlier if expression does not depend on WITH FILL columns. diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 2af1bcb02605..64af48dd53c6 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -125,14 +126,20 @@ int compareValues(const Values & lhs, const Values & rhs) class IndexAccess { public: - explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { } + explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) + { + /// Some suffix of index columns might not be loaded (see `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`) + /// and we need to use the same set of index columns across all parts. + for (const auto & part : parts) + loaded_columns = std::min(loaded_columns, part.data_part->getIndex().size()); + } Values getValue(size_t part_idx, size_t mark) const { const auto & index = parts[part_idx].data_part->getIndex(); - size_t size = index.size(); - Values values(size); - for (size_t i = 0; i < size; ++i) + chassert(index.size() >= loaded_columns); + Values values(loaded_columns); + for (size_t i = 0; i < loaded_columns; ++i) { index[i]->get(mark, values[i]); if (values[i].isNull()) @@ -199,6 +206,7 @@ class IndexAccess } private: const RangesInDataParts & parts; + size_t loaded_columns = std::numeric_limits::max(); }; class RangesInDataPartsBuilder diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index f4607cad040a..bee42c3dddec 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1408,8 +1408,8 @@ static void buildIndexes( if (metadata_snapshot->hasPartitionKey()) { const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_expression_actions = data.getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(context)); + auto minmax_columns_names = MergeTreeData::getMinMaxColumnsNames(partition_key); + auto minmax_expression_actions = MergeTreeData::getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(context)); indexes->minmax_idx_condition.emplace(filter_actions_dag, context, minmax_columns_names, minmax_expression_actions); indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */); diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index bb4f429d6268..3d1faf7c0797 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -1,10 +1,10 @@ +#include +#include +#include #include - -#include #include +#include #include -#include -#include #include namespace DB diff --git a/src/Processors/Transforms/DistinctTransform.cpp b/src/Processors/Transforms/DistinctTransform.cpp index 3619fa51bf6c..d528303a642b 100644 --- a/src/Processors/Transforms/DistinctTransform.cpp +++ b/src/Processors/Transforms/DistinctTransform.cpp @@ -55,6 +55,7 @@ void DistinctTransform::transform(Chunk & chunk) /// Convert to full column, because SetVariant for sparse column is not implemented. convertToFullIfSparse(chunk); + convertToFullIfConst(chunk); const auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 02d2762dab30..f43b9a2e7948 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1,23 +1,23 @@ -#include - -#include - #include -#include -#include -#include -#include -#include -#include #include +#include +#include #include +#include +#include +#include #include #include -#include -#include +#include #include #include -#include +#include +#include +#include +#include +#include + +#include /// See https://fmt.dev/latest/api.html#formatting-user-defined-types diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 347c2516230f..43fa6b28019e 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -21,6 +21,8 @@ using ExpressionActionsPtr = std::shared_ptr; class Arena; +class IWindowFunction; + // Runtime data for computing one window function. struct WindowFunctionWorkspace { diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 8050c7cc6714..34602ecccee2 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -13,8 +13,6 @@ #include #include -#include - namespace DB { diff --git a/src/Server/HTTP/ReadHeaders.cpp b/src/Server/HTTP/ReadHeaders.cpp index b70575010646..d6c7b8ddc0fe 100644 --- a/src/Server/HTTP/ReadHeaders.cpp +++ b/src/Server/HTTP/ReadHeaders.cpp @@ -77,7 +77,7 @@ void readHeaders( skipToNextLineOrEOF(in); Poco::trimRightInPlace(value); - headers.add(name, headers.decodeWord(value)); + headers.add(name, Poco::Net::MessageHeader::decodeWord(value)); ++fields; } } diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 91c6bd722d32..67823117758f 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -51,7 +51,10 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe if (!db.second->canContainMergeTreeTables()) continue; - for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) + // Note that in case `async_load_databases = true` we do not want replica status handler to be hanging + // and waiting (in getTablesIterator() call) for every table to be load, so we just skip not-yet-loaded tables. + // If they have some lag it will be reflected as soon as they are load. + for (auto iterator = db.second->getTablesIterator(getContext(), {}, true); iterator->isValid(); iterator->next()) { const auto & table = iterator->table(); if (!table) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 5c08c6974346..4e3d6ab69f65 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1371,17 +1371,6 @@ std::string formatHTTPErrorResponseWhenUserIsConnectedToWrongPort(const Poco::Ut return result; } -[[ maybe_unused ]] String createChallenge() -{ -#if USE_SSL - pcg64_fast rng(randomSeed()); - UInt64 rand = rng(); - return encodeSHA256(&rand, sizeof(rand)); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Can't generate challenge, because ClickHouse was built without OpenSSL"); -#endif -} - } std::unique_ptr TCPHandler::makeSession() @@ -1399,16 +1388,6 @@ std::unique_ptr TCPHandler::makeSession() return res; } -String TCPHandler::prepareStringForSshValidation(String username, String challenge) -{ - String output; - output.append(std::to_string(client_tcp_protocol_version)); - output.append(default_database); - output.append(username); - output.append(challenge); - return output; -} - void TCPHandler::receiveHello() { /// Receive `hello` packet. @@ -1466,11 +1445,9 @@ void TCPHandler::receiveHello() return; } - is_ssh_based_auth = startsWith(user, EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) && password.empty(); + is_ssh_based_auth = user.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) && password.empty(); if (is_ssh_based_auth) - { - user.erase(0, String(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size()); - } + user.erase(0, std::string_view(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size()); session = makeSession(); const auto & client_info = session->getClientInfo(); @@ -1498,7 +1475,9 @@ void TCPHandler::receiveHello() } } } +#endif +#if USE_SSH /// Perform handshake for SSH authentication if (is_ssh_based_auth) { @@ -1512,7 +1491,14 @@ void TCPHandler::receiveHello() if (packet_type != Protocol::Client::SSHChallengeRequest) throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Server expected to receive a packet for requesting a challenge string"); - auto challenge = createChallenge(); + auto create_challenge = []() + { + pcg64_fast rng(randomSeed()); + UInt64 rand = rng(); + return encodeSHA256(&rand, sizeof(rand)); + }; + + String challenge = create_challenge(); writeVarUInt(Protocol::Server::SSHChallenge, *out); writeStringBinary(challenge, *out); out->next(); @@ -1523,7 +1509,17 @@ void TCPHandler::receiveHello() throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Server expected to receive a packet with a response for a challenge"); readStringBinary(signature, *in); - auto cred = SshCredentials(user, signature, prepareStringForSshValidation(user, challenge)); + auto prepare_string_for_ssh_validation = [&](const String & username, const String & challenge_) + { + String output; + output.append(std::to_string(client_tcp_protocol_version)); + output.append(default_database); + output.append(username); + output.append(challenge_); + return output; + }; + + auto cred = SshCredentials(user, signature, prepare_string_for_ssh_validation(user, challenge)); session->authenticate(cred, getClientAddress(client_info)); return; } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 28259d3a3257..191617f19050 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -216,7 +216,7 @@ class TCPHandler : public Poco::Net::TCPServerConnection String default_database; - bool is_ssh_based_auth = false; + bool is_ssh_based_auth = false; /// authentication is via SSH pub-key challenge /// For inter-server secret (remote_server.*.secret) bool is_interserver_mode = false; bool is_interserver_authenticated = false; @@ -248,7 +248,6 @@ class TCPHandler : public Poco::Net::TCPServerConnection void extractConnectionSettingsFromContext(const ContextPtr & context); std::unique_ptr makeSession(); - String prepareStringForSshValidation(String user, String challenge); bool receiveProxyHeader(); void receiveHello(); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index eae5e1a8a478..281fc72dfc42 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1143,7 +1143,7 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context { auto minmax_columns = metadata_copy.getColumnsRequiredForPartitionKey(); auto partition_key = metadata_copy.partition_key.expression_list_ast->clone(); - FunctionNameNormalizer().visit(partition_key.get()); + FunctionNameNormalizer::visit(partition_key.get()); auto primary_key_asts = metadata_copy.primary_key.expression_list_ast->children; metadata_copy.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( metadata_copy.columns, partition_key, minmax_columns, primary_key_asts, context)); diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 3584f137225d..14a912a180de 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -282,11 +282,10 @@ struct DeltaLakeMetadataParser::Impl format_settings.date_time_overflow_behavior, /* case_insensitive_column_matching */false); - Chunk res; std::shared_ptr table; THROW_ARROW_NOT_OK(reader->ReadTable(&table)); - column_reader.arrowTableToCHChunk(res, table, reader->parquet_reader()->metadata()->num_rows()); + Chunk res = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); const auto & res_columns = res.getColumns(); if (res_columns.size() != 2) diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index ddbcc6d473f9..b89a8d7bcfd5 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -613,7 +613,7 @@ IColumn::Selector DistributedSink::createSelector(const Block & source_block) co const auto & key_column = current_block_with_sharding_key_expr.getByName(storage.getShardingKeyColumnName()); - return storage.createSelector(cluster, key_column); + return StorageDistributed::createSelector(cluster, key_column); } diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 7b0cfdf6a6cc..d3e31101d3b4 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -152,7 +152,7 @@ StorageFileLog::StorageFileLog( if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) { - if (LoadingStrictnessLevel::ATTACH <= mode) + if (LoadingStrictnessLevel::SECONDARY_CREATE <= mode) { LOG_ERROR(log, "The absolute data path should be inside `user_files_path`({})", getContext()->getUserFilesPath()); return; @@ -467,7 +467,7 @@ void StorageFileLog::openFilesAndSetPos() auto & reader = file_ctx.reader.value(); assertStreamGood(reader); - reader.seekg(0, reader.end); + reader.seekg(0, reader.end); /// NOLINT(readability-static-accessed-through-instance) assertStreamGood(reader); auto file_end = reader.tellg(); diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 91d58540c943..0434213c5580 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -177,7 +177,7 @@ class StorageFileLog final : public IStorage, WithContext }; std::shared_ptr task; - std::unique_ptr directory_watch = nullptr; + std::unique_ptr directory_watch; void loadFiles(); diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index 14555dca63b4..cef8fd85f97d 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -85,22 +85,23 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast if (index_definition->name.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Skip index must have name in definition."); - if (!index_definition->type) + auto index_type = index_definition->getType(); + if (!index_type) throw Exception(ErrorCodes::INCORRECT_QUERY, "TYPE is required for index"); - if (index_definition->type->parameters && !index_definition->type->parameters->children.empty()) + if (index_type->parameters && !index_type->parameters->children.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Index type cannot have parameters"); IndexDescription result; result.definition_ast = index_definition->clone(); result.name = index_definition->name; - result.type = Poco::toLower(index_definition->type->name); + result.type = Poco::toLower(index_type->name); result.granularity = index_definition->granularity; ASTPtr expr_list; - if (index_definition->expr) + if (auto index_expression = index_definition->getExpression()) { - expr_list = extractKeyExpressionList(index_definition->expr->clone()); + expr_list = extractKeyExpressionList(index_expression); ReplaceAliasToExprVisitor::Data data{columns}; ReplaceAliasToExprVisitor{data}.visit(expr_list); @@ -125,12 +126,11 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast result.data_types.push_back(elem.type); } - const auto & definition_arguments = index_definition->type->arguments; - if (definition_arguments) + if (index_type && index_type->arguments) { - for (size_t i = 0; i < definition_arguments->children.size(); ++i) + for (size_t i = 0; i < index_type->arguments->children.size(); ++i) { - const auto * argument = definition_arguments->children[i]->as(); + const auto * argument = index_type->arguments->children[i]->as(); if (!argument) throw Exception(ErrorCodes::INCORRECT_QUERY, "Only literals can be skip index arguments"); result.arguments.emplace_back(argument->value); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index d63b40e2b11e..2a697fa56547 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -172,7 +172,7 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio ParserExpression parser; ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); return getKeyFromAST(ast, columns, context); } diff --git a/src/Storages/MemorySettings.cpp b/src/Storages/MemorySettings.cpp index f5e182b3484e..30ae4e126684 100644 --- a/src/Storages/MemorySettings.cpp +++ b/src/Storages/MemorySettings.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -11,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_SETTING; + extern const int SETTING_CONSTRAINT_VIOLATION; } IMPLEMENT_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS) @@ -32,5 +32,31 @@ void MemorySettings::loadFromQuery(ASTStorage & storage_def) } } +ASTPtr MemorySettings::getSettingsChangesQuery() +{ + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + for (const auto & change : changes()) + settings_ast->changes.push_back(change); + + return settings_ast; +} + +void MemorySettings::sanityCheck() const +{ + if (min_bytes_to_keep > max_bytes_to_keep) + throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, + "Setting `min_bytes_to_keep` cannot be higher than the `max_bytes_to_keep`. `min_bytes_to_keep`: {}, `max_bytes_to_keep`: {}", + min_bytes_to_keep, + max_bytes_to_keep); + + + if (min_rows_to_keep > max_rows_to_keep) + throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, + "Setting `min_rows_to_keep` cannot be higher than the `max_rows_to_keep`. `min_rows_to_keep`: {}, `max_rows_to_keep`: {}", + min_rows_to_keep, + max_rows_to_keep); +} + } diff --git a/src/Storages/MemorySettings.h b/src/Storages/MemorySettings.h index ac6cdf73329c..f650746c4b28 100644 --- a/src/Storages/MemorySettings.h +++ b/src/Storages/MemorySettings.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -24,6 +25,8 @@ DECLARE_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS) struct MemorySettings : public BaseSettings { void loadFromQuery(ASTStorage & storage_def); + ASTPtr getSettingsChangesQuery(); + void sanityCheck() const; }; } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 18e4c87b298d..052e3ba4b744 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include namespace DB @@ -64,7 +66,7 @@ std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(Logger auto full_relative_path = fs::path(root_path); if (detached) - full_relative_path /= "detached"; + full_relative_path /= MergeTreeData::DETACHED_DIR_NAME; std::optional original_checksums_content; std::optional original_files_list; @@ -109,7 +111,7 @@ bool DataPartStorageOnDiskBase::looksLikeBrokenDetachedPartHasTheSameContent(con if (!exists("checksums.txt")) return false; - auto storage_from_detached = create(volume, fs::path(root_path) / "detached", detached_part_path, /*initialize=*/ true); + auto storage_from_detached = create(volume, fs::path(root_path) / MergeTreeData::DETACHED_DIR_NAME, detached_part_path, /*initialize=*/ true); if (!storage_from_detached->exists("checksums.txt")) return false; @@ -490,7 +492,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); /// Do not initialize storage in case of DETACH because part may be broken. - bool to_detached = dir_path.starts_with("detached/"); + bool to_detached = dir_path.starts_with(std::string_view((fs::path(MergeTreeData::DETACHED_DIR_NAME) / "").string())); return create(single_disk_volume, to, dir_path, /*initialize=*/ !to_detached && !params.external_transaction); } @@ -618,7 +620,7 @@ void DataPartStorageOnDiskBase::remove( if (part_dir_without_slash.has_parent_path()) { auto parent_path = part_dir_without_slash.parent_path(); - if (parent_path == "detached") + if (parent_path == MergeTreeData::DETACHED_DIR_NAME) throw Exception( ErrorCodes::LOGICAL_ERROR, "Trying to remove detached part {} with path {} in remove function. It shouldn't happen", diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 6bb5ff5a4ab4..d8445410ccc7 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -14,13 +14,14 @@ #include #include #include +#include #include #include +#include #include #include #include #include -#include #include @@ -120,6 +121,10 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write LOG_TRACE(log, "Sending part {}", part_name); + static const auto test_delay = data.getContext()->getConfigRef().getUInt64("test.data_parts_exchange.delay_before_sending_part_ms", 0); + if (test_delay) + randomDelayForMaxMilliseconds(test_delay, log, "DataPartsExchange: Before sending part"); + MergeTreeData::DataPartPtr part; auto report_broken_part = [&]() @@ -313,7 +318,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( } if (!from_remote_disk && isFullPartStorage(part->getDataPartStorage())) - part->checksums.checkEqual(data_checksums, false); + part->checksums.checkEqual(data_checksums, false, part->name); return data_checksums; } @@ -798,7 +803,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( throw Exception(ErrorCodes::LOGICAL_ERROR, "`tmp_prefix` and `part_name` cannot be empty or contain '.' or '/' characters."); auto part_dir = tmp_prefix + part_name; - auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : ""); + auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? MergeTreeData::DETACHED_DIR_NAME : ""); auto volume = std::make_shared("volume_" + part_name, disk); /// Create temporary part storage to write sent files. @@ -901,7 +906,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( else { if (isFullPartStorage(new_data_part->getDataPartStorage())) - new_data_part->checksums.checkEqual(data_checksums, false); + new_data_part->checksums.checkEqual(data_checksums, false, new_data_part->name); LOG_DEBUG(log, "Download of part {} onto disk {} finished.", part_name, disk->getName()); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 570175f66147..441437855abf 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -79,8 +79,8 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + auto minmax_column_names = MergeTreeData::getMinMaxColumnsNames(partition_key); + auto minmax_column_types = MergeTreeData::getMinMaxColumnsTypes(partition_key); size_t minmax_idx_size = minmax_column_types.size(); hyperrectangle.reserve(minmax_idx_size); @@ -112,8 +112,8 @@ IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::s auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + auto minmax_column_names = MergeTreeData::getMinMaxColumnsNames(partition_key); + auto minmax_column_types = MergeTreeData::getMinMaxColumnsTypes(partition_key); return store(minmax_column_names, minmax_column_types, part_storage, out_checksums); } @@ -204,7 +204,7 @@ void IMergeTreeDataPart::MinMaxIndex::appendFiles(const MergeTreeData & data, St { auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); + auto minmax_column_names = MergeTreeData::getMinMaxColumnsNames(partition_key); size_t minmax_idx_size = minmax_column_names.size(); for (size_t i = 0; i < minmax_idx_size; ++i) { @@ -1213,7 +1213,7 @@ void IMergeTreeDataPart::appendFilesOfPartitionAndMinMaxIndex(Strings & files) c return; if (!parent_part) - partition.appendFiles(storage, files); + MergeTreePartition::appendFiles(storage, files); if (!parent_part) minmax_idx->appendFiles(storage, files); @@ -1844,7 +1844,7 @@ try } catch (...) { - if (startsWith(new_relative_path, "detached/")) + if (startsWith(new_relative_path, fs::path(MergeTreeData::DETACHED_DIR_NAME) / "")) { // Don't throw when the destination is to the detached folder. It might be able to // recover in some cases, such as fetching parts into multi-disks while some of the @@ -1957,7 +1957,7 @@ std::optional IMergeTreeDataPart::getRelativePathForDetachedPart(const S DetachedPartInfo::DETACH_REASONS.end(), prefix) != DetachedPartInfo::DETACH_REASONS.end()); if (auto path = getRelativePathForPrefix(prefix, /* detached */ true, broken)) - return "detached/" + *path; + return fs::path(MergeTreeData::DETACHED_DIR_NAME) / *path; return {}; } @@ -2061,7 +2061,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!isEmpty() && !parent_part) { - for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) + for (const String & col_name : MergeTreeData::getMinMaxColumnsNames(partition_key)) { if (!checksums.files.contains("minmax_" + escapeForFileName(col_name) + ".idx")) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No minmax idx file checksum for column {}", col_name); @@ -2101,7 +2101,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!parent_part) { - for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) + for (const String & col_name : MergeTreeData::getMinMaxColumnsNames(partition_key)) check_file_not_empty("minmax_" + escapeForFileName(col_name) + ".idx"); } } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 3d1c5db07b59..e8d55f75b08a 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -426,7 +426,7 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); write_part_log({}); - storage.incrementMergedPartsProfileEvent(part->getType()); + StorageReplicatedMergeTree::incrementMergedPartsProfileEvent(part->getType()); return true; } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index c218acce903c..866a63911c3c 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -149,7 +149,7 @@ void MergePlainMergeTreeTask::finish() ThreadFuzzer::maybeInjectMemoryLimitException(); write_part_log({}); - storage.incrementMergedPartsProfileEvent(new_part->getType()); + StorageMergeTree::incrementMergedPartsProfileEvent(new_part->getType()); transfer_profile_counters_to_initial_query(); if (auto txn_ = txn_holder.getTransaction()) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8faed72b198d..5d4c3ab078e5 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8,7 +8,6 @@ #include #include #include -#include "Common/logger_useful.h" #include #include #include @@ -262,7 +261,7 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re if (need_create_directories) { disk->createDirectories(relative_data_path); - disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + disk->createDirectories(fs::path(relative_data_path) / DETACHED_DIR_NAME); } if (disk->exists(format_version_path)) @@ -652,8 +651,10 @@ void MergeTreeData::checkProperties( if (!allow_suspicious_indices && !attach) { const auto * index_ast = typeid_cast(index.definition_ast.get()); - if (const auto * index_function = typeid_cast(index_ast->expr)) - checkSuspiciousIndices(index_function); + ASTPtr index_expression = index_ast ? index_ast->getExpression() : nullptr; + const auto * index_expression_ptr = index_expression ? typeid_cast(index_expression.get()) : nullptr; + if (index_expression_ptr) + checkSuspiciousIndices(index_expression_ptr); } MergeTreeIndexFactory::instance().validate(index, attach); @@ -1312,7 +1313,8 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( /// during loading, such as "not enough memory" or network error. if (isRetryableException(std::current_exception())) throw; - LOG_DEBUG(log, "Failed to load data part {}, unknown exception", part_name); + + LOG_DEBUG(log, "Failed to load data part {} with exception: {}", part_name, getExceptionMessage(std::current_exception(), false)); mark_broken(); return res; } @@ -1343,6 +1345,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( /// during loading, such as "not enough memory" or network error. if (isRetryableException(std::current_exception())) throw; + mark_broken(); return res; } @@ -1461,25 +1464,9 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPartWithRetries( if (try_no + 1 == max_tries) throw; - String exception_message; - try - { - rethrow_exception(exception_ptr); - } - catch (const Exception & e) - { - exception_message = e.message(); - } - #if USE_AZURE_BLOB_STORAGE - catch (const Azure::Core::RequestFailedException & e) - { - exception_message = e.Message; - } - #endif - - - LOG_DEBUG(log, "Failed to load data part {} at try {} with retryable error: {}. Will retry in {} ms", - part_name, try_no, exception_message, initial_backoff_ms); + LOG_DEBUG(log, + "Failed to load data part {} at try {} with retryable error: {}. Will retry in {} ms", + part_name, try_no, getExceptionMessage(exception_ptr, false), initial_backoff_ms); std::this_thread::sleep_for(std::chrono::milliseconds(initial_backoff_ms)); initial_backoff_ms = std::min(initial_backoff_ms * 2, max_backoff_ms); @@ -1711,7 +1698,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalname(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME - || it->name() == MergeTreeData::DETACHED_DIR_NAME) + || it->name() == DETACHED_DIR_NAME) continue; if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version)) @@ -1984,6 +1971,15 @@ void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_ if (isStaticStorage()) return; + /// If waiting is not required, do NOT log and do NOT enable/disable turbo mode to make `waitForOutdatedPartsToBeLoaded` a lightweight check + { + std::unique_lock lock(outdated_data_parts_mutex); + if (outdated_data_parts_loading_canceled) + throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of outdated data parts was already canceled"); + if (outdated_data_parts_loading_finished) + return; + } + /// We need to load parts as fast as possible getOutdatedPartsLoadingThreadPool().enableTurboMode(); SCOPE_EXIT({ @@ -2794,7 +2790,7 @@ void MergeTreeData::dropAllData() && settings_ptr->allow_remote_fs_zero_copy_replication; try { - bool keep_shared = removeDetachedPart(part.disk, fs::path(relative_data_path) / "detached" / part.dir_name / "", part.dir_name); + bool keep_shared = removeDetachedPart(part.disk, fs::path(relative_data_path) / DETACHED_DIR_NAME / part.dir_name / "", part.dir_name); LOG_DEBUG(log, "Dropped detached part {}, keep shared data: {}", part.dir_name, keep_shared); } catch (...) @@ -2877,8 +2873,8 @@ void MergeTreeData::dropIfEmpty() if (disk->isBroken()) continue; /// Non recursive, exception is thrown if there are more files. - disk->removeFileIfExists(fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME); - disk->removeDirectory(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + disk->removeFileIfExists(fs::path(relative_data_path) / FORMAT_VERSION_FILE_NAME); + disk->removeDirectory(fs::path(relative_data_path) / DETACHED_DIR_NAME); disk->removeDirectory(relative_data_path); } } @@ -2992,7 +2988,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context commands.apply(new_metadata, local_context); - if (commands.hasInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index) + if (AlterCommands::hasInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental Inverted Index feature is not enabled (turn on setting 'allow_experimental_inverted_index')"); @@ -3441,7 +3437,7 @@ void MergeTreeData::changeSettings( { auto disk = new_storage_policy->getDiskByName(disk_name); disk->createDirectories(relative_data_path); - disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + disk->createDirectories(fs::path(relative_data_path) / DETACHED_DIR_NAME); } /// FIXME how would that be done while reloading configuration??? @@ -5362,7 +5358,7 @@ void MergeTreeData::restoreDataFromBackup(RestorerFromBackup & restorer, const S return; if (!restorer.isNonEmptyTableAllowed() && getTotalActiveSizeInBytes() && backup->hasFiles(data_path_in_backup)) - restorer.throwTableIsNotEmpty(getStorageID()); + RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); restorePartsFromBackup(restorer, data_path_in_backup, partitions); } @@ -6035,7 +6031,7 @@ DetachedPartsInfo MergeTreeData::getDetachedParts() const for (const auto & disk : getDisks()) { - String detached_path = fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME; + String detached_path = fs::path(relative_data_path) / DETACHED_DIR_NAME; /// Note: we don't care about TOCTOU issue here. if (disk->exists(detached_path)) @@ -6061,7 +6057,7 @@ void MergeTreeData::validateDetachedPartName(const String & name) void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr local_context) { - PartsTemporaryRename renamed_parts(*this, "detached/"); + PartsTemporaryRename renamed_parts(*this, DETACHED_DIR_NAME); if (part) { @@ -6086,7 +6082,7 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) { - bool keep_shared = removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); + bool keep_shared = removeDetachedPart(disk, fs::path(relative_data_path) / DETACHED_DIR_NAME / new_name / "", old_name); LOG_DEBUG(log, "Dropped detached part {}, keep shared data: {}", old_name, keep_shared); old_name.clear(); } @@ -6095,14 +6091,14 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const ASTPtr & partition, bool attach_part, ContextPtr local_context, PartsTemporaryRename & renamed_parts) { - const String source_dir = "detached/"; + const fs::path source_dir = DETACHED_DIR_NAME; /// Let's compose a list of parts that should be added. if (attach_part) { const String part_id = partition->as().value.safeGet(); validateDetachedPartName(part_id); - if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_id)) + if (temporary_parts.contains(source_dir / part_id)) { LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " "probably it's being detached right now", part_id); @@ -6179,7 +6175,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const LOG_DEBUG(log, "Checking part {}", new_name); auto single_disk_volume = std::make_shared("volume_" + old_name, disk); - auto part = getDataPartBuilder(old_name, single_disk_volume, source_dir + new_name) + auto part = getDataPartBuilder(old_name, single_disk_volume, source_dir / new_name) .withPartFormatFromDisk() .build(); @@ -6687,7 +6683,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( auto * place = arena.alignedAlloc(size_of_state, align_of_state); func->create(place); if (const AggregateFunctionCount * agg_count = typeid_cast(func.get())) - agg_count->set(place, value.get()); + AggregateFunctionCount::set(place, value.get()); else { auto value_column = func->getArgumentTypes().front()->createColumnConst(1, value)->convertToFullColumnIfConst(); @@ -7210,11 +7206,10 @@ String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const DiskPtr MergeTreeData::tryGetDiskForDetachedPart(const String & part_name) const { - String additional_path = "detached/"; const auto disks = getStoragePolicy()->getDisks(); for (const DiskPtr & disk : disks) - if (disk->exists(fs::path(relative_data_path) / additional_path / part_name)) + if (disk->exists(fs::path(relative_data_path) / DETACHED_DIR_NAME / part_name)) return disk; return nullptr; @@ -7789,7 +7784,7 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & return result; } -bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason) +bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, PreformattedMessage & out_reason) { auto remove_broken_parts_from_consideration = [](auto & parts) { @@ -7811,7 +7806,7 @@ bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const if (left_projection_parts.size() != right_projection_parts.size()) { - out_reason = fmt::format( + out_reason = PreformattedMessage::create( "Parts have different number of projections: {} in part '{}' and {} in part '{}'", left_projection_parts.size(), left->name, @@ -7825,7 +7820,7 @@ bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const { if (!right_projection_parts.contains(name)) { - out_reason = fmt::format( + out_reason = PreformattedMessage::create( "The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name ); return false; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 0d56b902f1a5..d21f87c337ef 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -418,7 +418,7 @@ class MergeTreeData : public IStorage, public WithMutableContext static ReservationPtr tryReserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage); static ReservationPtr reserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage); - static bool partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason); + static bool partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, PreformattedMessage & out_reason); StoragePolicyPtr getStoragePolicy() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 53d49b51e8fe..2d49e1df19b3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -136,7 +136,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( const AllowedMergingPredicate & can_merge_callback, bool merge_with_ttl_allowed, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, const PartitionIdsHint * partitions_hint) { MergeTreeData::DataPartsVector data_parts = getDataPartsToSelectMergeFrom(txn, partitions_hint); @@ -145,7 +145,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (data_parts.empty()) { - out_disable_reason = "There are no parts in the table"; + out_disable_reason = PreformattedMessage::create("There are no parts in the table"); return SelectPartsDecision::CANNOT_SELECT; } @@ -153,7 +153,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (info.parts_selected_precondition == 0) { - out_disable_reason = "No parts satisfy preconditions for merge"; + out_disable_reason = PreformattedMessage::create("No parts satisfy preconditions for merge"); return SelectPartsDecision::CANNOT_SELECT; } @@ -177,9 +177,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /*optimize_skip_merged_partitions=*/true); } - if (!out_disable_reason.empty()) - out_disable_reason += ". "; - out_disable_reason += "There is no need to merge parts according to merge selector algorithm"; + if (!out_disable_reason.text.empty()) + out_disable_reason.text += ". "; + out_disable_reason.text += "There is no need to merge parts according to merge selector algorithm"; return SelectPartsDecision::CANNOT_SELECT; } @@ -196,7 +196,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart auto metadata_snapshot = data.getInMemoryMetadataPtr(); - String out_reason; + PreformattedMessage out_reason; MergeSelectingInfo info = getPossibleMergeRanges(data_parts, can_merge_callback, txn, out_reason); if (info.parts_selected_precondition == 0) @@ -223,7 +223,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart for (size_t i = 0; i < all_partition_ids.size(); ++i) { auto future_part = std::make_shared(); - String out_disable_reason; + PreformattedMessage out_disable_reason; /// This method should have been const, but something went wrong... it's const with dry_run = true auto status = const_cast(this)->selectPartsToMergeFromRanges( future_part, /*aggressive*/ false, max_total_size_to_merge, merge_with_ttl_allowed, @@ -232,7 +232,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart if (status == SelectPartsDecision::SELECTED) res.insert(all_partition_ids[i]); else - LOG_TEST(log, "Nothing to merge in partition {}: {}", all_partition_ids[i], out_disable_reason); + LOG_TEST(log, "Nothing to merge in partition {}: {}", all_partition_ids[i], out_disable_reason.text); } String best_partition_id_to_optimize = getBestPartitionToOptimizeEntire(info.partitions_info); @@ -331,7 +331,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo const MergeTreeData::DataPartsVector & data_parts, const AllowedMergingPredicate & can_merge_callback, const MergeTreeTransactionPtr & txn, - String & out_disable_reason) const + PreformattedMessage & out_disable_reason) const { MergeSelectingInfo res; @@ -444,7 +444,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( const StorageMetadataPtr & metadata_snapshot, const IMergeSelector::PartsRanges & parts_ranges, const time_t & current_time, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool dry_run) { const auto data_settings = data.getSettings(); @@ -515,7 +515,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( if (parts_to_merge.empty()) { - out_disable_reason = "Did not find any parts to merge (with usual merge selectors)"; + out_disable_reason = PreformattedMessage::create("Did not find any parts to merge (with usual merge selectors)"); return SelectPartsDecision::CANNOT_SELECT; } } @@ -573,20 +573,20 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti bool final, const StorageMetadataPtr & metadata_snapshot, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions) { MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id); if (parts.empty()) { - out_disable_reason = "There are no parts inside partition"; + out_disable_reason = PreformattedMessage::create("There are no parts inside partition"); return SelectPartsDecision::CANNOT_SELECT; } if (!final && parts.size() == 1) { - out_disable_reason = "There is only one part inside partition"; + out_disable_reason = PreformattedMessage::create("There is only one part inside partition"); return SelectPartsDecision::CANNOT_SELECT; } @@ -595,7 +595,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti if (final && optimize_skip_merged_partitions && parts.size() == 1 && parts[0]->info.level > 0 && (!metadata_snapshot->hasAnyTTL() || parts[0]->checkAllTTLCalculated(metadata_snapshot))) { - out_disable_reason = "Partition skipped due to optimize_skip_merged_partitions"; + out_disable_reason = PreformattedMessage::create("Partition skipped due to optimize_skip_merged_partitions"); return SelectPartsDecision::NOTHING_TO_MERGE; } @@ -636,7 +636,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti static_cast((DISK_USAGE_COEFFICIENT_TO_SELECT - 1.0) * 100)); } - out_disable_reason = fmt::format("Insufficient available disk space, required {}", ReadableSize(required_disk_space)); + out_disable_reason = PreformattedMessage::create("Insufficient available disk space, required {}", ReadableSize(required_disk_space)); return SelectPartsDecision::CANNOT_SELECT; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 669ee040af33..aad34bfb914c 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -43,7 +43,7 @@ class MergeTreeDataMergerMutator using AllowedMergingPredicate = std::function; + PreformattedMessage &)>; explicit MergeTreeDataMergerMutator(MergeTreeData & data_); @@ -92,7 +92,7 @@ class MergeTreeDataMergerMutator const MergeTreeData::DataPartsVector & data_parts, const AllowedMergingPredicate & can_merge_callback, const MergeTreeTransactionPtr & txn, - String & out_disable_reason) const; + PreformattedMessage & out_disable_reason) const; /// The third step of selecting parts to merge: takes ranges that we can merge, and selects parts that we want to merge SelectPartsDecision selectPartsToMergeFromRanges( @@ -103,7 +103,7 @@ class MergeTreeDataMergerMutator const StorageMetadataPtr & metadata_snapshot, const IMergeSelector::PartsRanges & parts_ranges, const time_t & current_time, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool dry_run = false); String getBestPartitionToOptimizeEntire(const PartitionsInfo & partitions_info) const; @@ -129,7 +129,7 @@ class MergeTreeDataMergerMutator const AllowedMergingPredicate & can_merge, bool merge_with_ttl_allowed, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, const PartitionIdsHint * partitions_hint = nullptr); /** Select all the parts in the specified partition for merge, if possible. @@ -144,7 +144,7 @@ class MergeTreeDataMergerMutator bool final, const StorageMetadataPtr & metadata_snapshot, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions = false); /** Creates a task to merge parts. diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index d60f4cc73540..7c9e4a371ab5 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -28,33 +28,34 @@ namespace ErrorCodes } -void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name) const +void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name, const String & part_name) const { if (is_compressed && have_uncompressed) { if (!rhs.is_compressed) - throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "No uncompressed checksum for file {}", name); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "No uncompressed checksum for file {}, data part {}", name, part_name); + if (rhs.uncompressed_size != uncompressed_size) { - throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part ({} vs {})", - name, uncompressed_size, rhs.uncompressed_size); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part {} ({} vs {})", + name, part_name, uncompressed_size, rhs.uncompressed_size); } if (rhs.uncompressed_hash != uncompressed_hash) { - throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part ({} vs {})", - name, getHexUIntLowercase(uncompressed_hash), getHexUIntLowercase(rhs.uncompressed_hash)); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part {} ({} vs {})", + name, part_name, getHexUIntLowercase(uncompressed_hash), getHexUIntLowercase(rhs.uncompressed_hash)); } return; } if (rhs.file_size != file_size) { - throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part ({} vs {})", - name, file_size, rhs.file_size); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part {} ({} vs {})", + name, part_name, file_size, rhs.file_size); } if (rhs.file_hash != file_hash) { - throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part ({} vs {})", - name, getHexUIntLowercase(file_hash), getHexUIntLowercase(rhs.file_hash)); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part {} ({} vs {})", + name, part_name, getHexUIntLowercase(file_hash), getHexUIntLowercase(rhs.file_hash)); } } @@ -79,7 +80,7 @@ void MergeTreeDataPartChecksum::checkSize(const IDataPartStorage & storage, cons } -void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed) const +void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed, const String & part_name) const { for (const auto & [name, _] : rhs.files) if (!files.contains(name)) @@ -95,7 +96,7 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r if (it == rhs.files.end()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name); - checksum.checkEqual(it->second, have_uncompressed, name); + checksum.checkEqual(it->second, have_uncompressed, name, part_name); } } @@ -435,19 +436,19 @@ String MinimalisticDataPartChecksums::getSerializedString(const MergeTreeDataPar return checksums.getSerializedString(); } -void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const +void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const { if (full_checksums && rhs.full_checksums) - full_checksums->checkEqual(*rhs.full_checksums, check_uncompressed_hash_in_compressed_files); + full_checksums->checkEqual(*rhs.full_checksums, check_uncompressed_hash_in_compressed_files, part_name); // If full checksums were checked, check total checksums just in case checkEqualImpl(rhs, check_uncompressed_hash_in_compressed_files); } -void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const +void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const { if (full_checksums) - full_checksums->checkEqual(rhs, check_uncompressed_hash_in_compressed_files); + full_checksums->checkEqual(rhs, check_uncompressed_hash_in_compressed_files, part_name); // If full checksums were checked, check total checksums just in case MinimalisticDataPartChecksums rhs_minimalistic; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index d4980a67a43d..05178dc3a609 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -32,7 +32,7 @@ struct MergeTreeDataPartChecksum : file_size(file_size_), file_hash(file_hash_), is_compressed(true), uncompressed_size(uncompressed_size_), uncompressed_hash(uncompressed_hash_) {} - void checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name) const; + void checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name, const String & part_name) const; void checkSize(const IDataPartStorage & storage, const String & name) const; }; @@ -61,7 +61,7 @@ struct MergeTreeDataPartChecksums /// Checks that the set of columns and their checksums are the same. If not, throws an exception. /// If have_uncompressed, for compressed files it compares the checksums of the decompressed data. /// Otherwise, it compares only the checksums of the files. - void checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed) const; + void checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed, const String & part_name) const; static bool isBadChecksumsErrorCode(int code); @@ -132,8 +132,8 @@ struct MinimalisticDataPartChecksums String getSerializedString() const; static String getSerializedString(const MergeTreeDataPartChecksums & full_checksums, bool minimalistic); - void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const; - void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const; + void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const; + void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const; void checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const; }; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 6471f510291b..bcc936c57396 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -513,11 +513,11 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( { chassert(minmax_idx_condition && partition_pruner); const auto & partition_key = metadata_snapshot->getPartitionKey(); - minmax_columns_types = data.getMinMaxColumnsTypes(partition_key); + minmax_columns_types = MergeTreeData::getMinMaxColumnsTypes(partition_key); if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { - auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); + auto minmax_columns_names = MergeTreeData::getMinMaxColumnsNames(partition_key); throw Exception(ErrorCodes::INDEX_NOT_USED, "Neither MinMax index by columns ({}) nor partition expr is used and setting 'force_index_by_date' is set", fmt::join(minmax_columns_names, ", ")); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index cadd94867eca..64d4b1fd7ffd 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -426,7 +426,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( column.type = block.getByName(column.name).type; auto minmax_idx = std::make_shared(); - minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); + minmax_idx->update(block, MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); MergeTreePartition partition(block_with_partition.partition); @@ -656,7 +656,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( /// Size of part would not be greater than block.bytes() + epsilon size_t expected_size = block.bytes(); // just check if there is enough space on parent volume - data.reserveSpace(expected_size, parent_part->getDataPartStorage()); + MergeTreeData::reserveSpace(expected_size, parent_part->getDataPartStorage()); part_type = data.choosePartFormatOnDisk(expected_size, block.rows()).part_type; auto new_data_part = parent_part->getProjectionPartBuilder(part_name, is_temp).withPartType(part_type).build(); diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index f506230b5eaf..7ab90dac5b03 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -590,7 +590,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( for (const auto & f : value_field.get()) { - if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) + if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) /// NOLINT(readability-static-accessed-through-instance) return false; auto converted = convertFieldToType(f, *actual_type); diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 6798f97e4942..1e9a320fa953 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -210,7 +210,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync() if (mark_cache) { - auto key = mark_cache->hash(fs::path(data_part_storage->getFullPath()) / mrk_path); + auto key = MarkCache::hash(fs::path(data_part_storage->getFullPath()) / mrk_path); if (save_marks_in_cache) { auto callback = [this] { return loadMarksImpl(); }; diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index d32bc6d18262..1db70162bff3 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -158,7 +158,7 @@ bool MergeTreePartsMover::selectPartsForMove( { auto destination = data->getDestinationForMoveTTL(*ttl_entry); if (destination && !data->isPartInTTLDestination(*ttl_entry, *part)) - reservation = data->tryReserveSpace(part->getBytesOnDisk(), data->getDestinationForMoveTTL(*ttl_entry)); + reservation = MergeTreeData::tryReserveSpace(part->getBytesOnDisk(), data->getDestinationForMoveTTL(*ttl_entry)); } if (reservation) /// Found reservation by TTL rule. diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index b42da22239eb..5d6f08d3c530 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -230,7 +230,7 @@ void MergeTreeColumnSettings::validate(const SettingsChanges & changes) "Setting {} is unknown or not supported at column level, supported settings: {}", change.name, fmt::join(allowed_column_level_settings, ", ")); - merge_tree_settings.checkCanSet(change.name, change.value); + MergeTreeSettings::checkCanSet(change.name, change.value); } } diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 3ead766cba91..b7dede3cb002 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -63,7 +63,7 @@ void MergeTreeSink::consume(Chunk chunk) if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); - auto part_blocks = storage.writer.splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); using DelayedPartitions = std::vector; DelayedPartitions partitions; @@ -195,7 +195,7 @@ void MergeTreeSink::finishDelayedChunk() { auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot)); - storage.incrementInsertedPartsProfileEvent(part->getType()); + StorageMergeTree::incrementInsertedPartsProfileEvent(part->getType()); /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. storage.background_operations_assignee.trigger(); diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 7536eb45903f..3415b08cebb4 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -116,7 +116,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() /// Once we mutate part, we must reserve space on the same disk, because mutations can possibly create hardlinks. /// Can throw an exception. - reserved_space = storage.reserveSpace(estimated_space_for_result, source_part->getDataPartStorage()); + reserved_space = StorageReplicatedMergeTree::reserveSpace(estimated_space_for_result, source_part->getDataPartStorage()); future_mutated_part->updatePath(storage, reserved_space.get()); table_lock_holder = storage.lockForShare( diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 90e1cb0606e1..a971c4fda1c6 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -980,13 +980,13 @@ struct MutationContext QueryPipelineBuilder mutating_pipeline_builder; QueryPipeline mutating_pipeline; // in - std::unique_ptr mutating_executor{nullptr}; + std::unique_ptr mutating_executor; ProgressCallback progress_callback; Block updated_header; std::unique_ptr interpreter; - UInt64 watch_prev_elapsed{0}; - std::unique_ptr stage_progress{nullptr}; + UInt64 watch_prev_elapsed = 0; + std::unique_ptr stage_progress; MutationCommands commands_for_part; MutationCommands for_interpreter; @@ -998,12 +998,12 @@ struct MutationContext NameSet materialized_statistics; MergeTreeData::MutableDataPartPtr new_data_part; - IMergedBlockOutputStreamPtr out{nullptr}; + IMergedBlockOutputStreamPtr out; String mrk_extension; std::vector projections_to_build; - IMergeTreeDataPart::MinMaxIndexPtr minmax_idx{nullptr}; + IMergeTreeDataPart::MinMaxIndexPtr minmax_idx; std::set indices_to_recalc; std::set stats_to_recalc; @@ -1283,7 +1283,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) - ctx->minmax_idx->update(cur_block, ctx->data->getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey())); + ctx->minmax_idx->update(cur_block, MergeTreeData::getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey())); ctx->out->write(cur_block); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 181f54688f91..d7601e6e6387 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -359,7 +359,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); - zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); + zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true, part_name); checkDataPart( part, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index ee4ed87d456a..da94916d514b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -8,10 +8,8 @@ #include #include #include -#include "Storages/MutationCommands.h" #include #include - #include #include @@ -221,6 +219,43 @@ void ReplicatedMergeTreeQueue::createLogEntriesToFetchBrokenParts() broken_parts_to_enqueue_fetches_on_loading.clear(); } +void ReplicatedMergeTreeQueue::addDropReplaceIntent(const MergeTreePartInfo & intent) +{ + std::lock_guard lock{state_mutex}; + drop_replace_range_intents.push_back(intent); +} + +void ReplicatedMergeTreeQueue::removeDropReplaceIntent(const MergeTreePartInfo & intent) +{ + std::lock_guard lock{state_mutex}; + auto it = std::find(drop_replace_range_intents.begin(), drop_replace_range_intents.end(), intent); + chassert(it != drop_replace_range_intents.end()); + drop_replace_range_intents.erase(it); +} + +bool ReplicatedMergeTreeQueue::isIntersectingWithDropReplaceIntent( + const LogEntry & entry, const String & part_name, String & out_reason, std::unique_lock & /*state_mutex lock*/) const +{ + const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); + for (const auto & intent : drop_replace_range_intents) + { + if (!intent.isDisjoint(part_info)) + { + constexpr auto fmt_string = "Not executing {} of type {} for part {} (actual part {})" + "because there is a drop or replace intent with part name {}."; + LOG_INFO( + LogToStr(out_reason, log), + fmt_string, + entry.znode_name, + entry.type, + entry.new_part_name, + part_name, + intent.getPartNameForLogs()); + return true; + } + } + return false; +} void ReplicatedMergeTreeQueue::insertUnlocked( const LogEntryPtr & entry, std::optional & min_unprocessed_insert_time_changed, @@ -1175,6 +1210,33 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( entry->execution_complete.wait(lock, [&entry] { return !entry->currently_executing; }); } +void ReplicatedMergeTreeQueue::waitForCurrentlyExecutingOpsInRange(const MergeTreePartInfo & part_info) const +{ + Queue to_wait; + + std::unique_lock lock(state_mutex); + + for (const auto& entry : queue) + { + if (!entry->currently_executing) + continue; + + const auto virtual_part_names = entry->getVirtualPartNames(format_version); + for (const auto & virtual_part_name : virtual_part_names) + { + if (!part_info.isDisjoint(MergeTreePartInfo::fromPartName(virtual_part_name, format_version))) + { + to_wait.push_back(entry); + break; + } + } + } + + LOG_DEBUG(log, "Waiting for {} entries that are currently executing.", to_wait.size()); + + for (LogEntryPtr & entry : to_wait) + entry->execution_complete.wait(lock, [&entry] { return !entry->currently_executing; }); +} bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry, const String & new_part_name, String & out_reason, std::unique_lock & /* queue_lock */, @@ -1303,6 +1365,9 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( /// We can wait in worker threads, but not in scheduler. if (isCoveredByFuturePartsImpl(entry, new_part_name, out_postpone_reason, state_lock, /* covered_entries_to_wait */ nullptr)) return false; + + if (isIntersectingWithDropReplaceIntent(entry, new_part_name, out_postpone_reason, state_lock)) + return false; } if (entry.type != LogEntry::DROP_RANGE && entry.type != LogEntry::DROP_PART) @@ -2287,7 +2352,7 @@ bool BaseMergePredicate::operator()( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, const MergeTreeTransaction *, - String & out_reason) const + PreformattedMessage & out_reason) const { if (left) return canMergeTwoParts(left, right, out_reason); @@ -2299,7 +2364,7 @@ template bool BaseMergePredicate::canMergeTwoParts( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, - String & out_reason) const + PreformattedMessage & out_reason) const { /// A sketch of a proof of why this method actually works: /// @@ -2343,19 +2408,19 @@ bool BaseMergePredicate::canMergeTwoParts( { if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { - out_reason = "Part " + part->name + " has uuid " + toString(part->uuid) + " which is currently pinned"; + out_reason = PreformattedMessage::create("Part {} has uuid {} which is currently pinned", part->name, part->uuid); return false; } if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { - out_reason = "Quorum insert for part " + part->name + " is currently in progress"; + out_reason = PreformattedMessage::create("Quorum insert for part {} is currently in progress", part->name); return false; } if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { - out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet"; + out_reason = PreformattedMessage::create("Entry for part {} hasn't been read from the replication log yet", part->name); return false; } } @@ -2369,7 +2434,7 @@ bool BaseMergePredicate::canMergeTwoParts( { if (partition_ids_hint && !partition_ids_hint->contains(left->info.partition_id)) { - out_reason = fmt::format("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id); + out_reason = PreformattedMessage::create("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id); return false; } @@ -2381,8 +2446,7 @@ bool BaseMergePredicate::canMergeTwoParts( auto block_it = block_numbers.upper_bound(left_max_block); if (block_it != block_numbers.end() && *block_it < right_min_block) { - out_reason = "Block number " + toString(*block_it) + " is still being inserted between parts " - + left->name + " and " + right->name; + out_reason = PreformattedMessage::create("Block number {} is still being inserted between parts {} and {}", *block_it, left->name, right->name); return false; } } @@ -2401,7 +2465,7 @@ bool BaseMergePredicate::canMergeTwoParts( String containing_part = virtual_parts_->getContainingPart(part->info); if (containing_part != part->name) { - out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part; + out_reason = PreformattedMessage::create("Part {} has already been assigned a merge into {}", part->name, containing_part); return false; } } @@ -2418,9 +2482,9 @@ bool BaseMergePredicate::canMergeTwoParts( Strings covered = virtual_parts_->getPartsCoveredBy(gap_part_info); if (!covered.empty()) { - out_reason = "There are " + toString(covered.size()) + " parts (from " + covered.front() - + " to " + covered.back() + ") that are still not present or being processed by " - + " other background process on this replica between " + left->name + " and " + right->name; + out_reason = PreformattedMessage::create("There are {} parts (from {} to {}) " + "that are still not present or being processed by other background process " + "on this replica between {} and {}", covered.size(), covered.front(), covered.back(), left->name, right->name); return false; } } @@ -2436,8 +2500,8 @@ bool BaseMergePredicate::canMergeTwoParts( if (left_mutation_ver != right_mutation_ver) { - out_reason = "Current mutation versions of parts " + left->name + " and " + right->name + " differ: " - + toString(left_mutation_ver) + " and " + toString(right_mutation_ver) + " respectively"; + out_reason = PreformattedMessage::create("Current mutation versions of parts {} and {} differ: " + "{} and {} respectively", left->name, right->name, left_mutation_ver, right_mutation_ver); return false; } } @@ -2448,23 +2512,23 @@ bool BaseMergePredicate::canMergeTwoParts( template bool BaseMergePredicate::canMergeSinglePart( const MergeTreeData::DataPartPtr & part, - String & out_reason) const + PreformattedMessage & out_reason) const { if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { - out_reason = fmt::format("Part {} has uuid {} which is currently pinned", part->name, part->uuid); + out_reason = PreformattedMessage::create("Part {} has uuid {} which is currently pinned", part->name, part->uuid); return false; } if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { - out_reason = fmt::format("Quorum insert for part {} is currently in progress", part->name); + out_reason = PreformattedMessage::create("Quorum insert for part {} is currently in progress", part->name); return false; } if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { - out_reason = fmt::format("Entry for part {} hasn't been read from the replication log yet", part->name); + out_reason = PreformattedMessage::create("Entry for part {} hasn't been read from the replication log yet", part->name); return false; } @@ -2479,7 +2543,7 @@ bool BaseMergePredicate::canMergeSinglePart( String containing_part = virtual_parts_->getContainingPart(part->info); if (containing_part != part->name) { - out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part); + out_reason = PreformattedMessage::create("Part {} has already been assigned a merge into {}", part->name, containing_part); return false; } } @@ -2488,7 +2552,7 @@ bool BaseMergePredicate::canMergeSinglePart( } -bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String & out_reason) const +bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, PreformattedMessage & out_reason) const { std::lock_guard lock(queue.state_mutex); for (const auto & entry : queue.queue) @@ -2501,7 +2565,7 @@ bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const Mer if (part->info.isDisjoint(MergeTreePartInfo::fromPartName(part_name, queue.format_version))) continue; - out_reason = fmt::format("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name); + out_reason = PreformattedMessage::create("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name); return true; } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index b17e78199463..df4176f5e3de 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -107,6 +107,8 @@ class ReplicatedMergeTreeQueue */ ActiveDataPartSet virtual_parts; + /// Used to prevent operations to start in ranges which will be affected by DROP_RANGE/REPLACE_RANGE + std::vector drop_replace_range_intents; /// We do not add DROP_PARTs to virtual_parts because they can intersect, /// so we store them separately in this structure. @@ -251,6 +253,10 @@ class ReplicatedMergeTreeQueue std::optional min_unprocessed_insert_time_changed, std::optional max_processed_insert_time_changed) const; + bool isIntersectingWithDropReplaceIntent( + const LogEntry & entry, + const String & part_name, String & out_reason, std::unique_lock & /*state_mutex lock*/) const; + /// Marks the element of the queue as running. class CurrentlyExecuting { @@ -349,6 +355,9 @@ class ReplicatedMergeTreeQueue const MergeTreePartInfo & part_info, const std::optional & covering_entry); + /// Wait for the execution of currently executing actions with virtual parts intersecting with part_info + void waitForCurrentlyExecutingOpsInRange(const MergeTreePartInfo & part_info) const; + /** In the case where there are not enough parts to perform the merge in part_name * - move actions with merged parts to the end of the queue * (in order to download a already merged part from another replica). @@ -490,6 +499,12 @@ class ReplicatedMergeTreeQueue void setBrokenPartsToEnqueueFetchesOnLoading(Strings && parts_to_fetch); /// Must be called right after queue loading. void createLogEntriesToFetchBrokenParts(); + + /// Add an intent to block operations to start in the range. All intents must be removed by calling + /// removeDropReplaceIntent(). The same intent can be added multiple times, but it has to be removed exactly + /// the same amount of times. + void addDropReplaceIntent(const MergeTreePartInfo& intent); + void removeDropReplaceIntent(const MergeTreePartInfo& intent); }; using CommittingBlocks = std::unordered_map>; @@ -505,19 +520,19 @@ class BaseMergePredicate bool operator()(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, const MergeTreeTransaction * txn, - String & out_reason) const; + PreformattedMessage & out_reason) const; /// Can we assign a merge with these two parts? /// (assuming that no merge was assigned after the predicate was constructed) /// If we can't and out_reason is not nullptr, set it to the reason why we can't merge. bool canMergeTwoParts(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, - String & out_reason) const; + PreformattedMessage & out_reason) const; /// Can we assign a merge this part and some other part? /// For example a merge of a part and itself is needed for TTL. /// This predicate is checked for the first part of each range. - bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String & out_reason) const; + bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, PreformattedMessage & out_reason) const; CommittingBlocks getCommittingBlocks(zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, LoggerPtr log_); @@ -561,7 +576,7 @@ class ReplicatedMergeTreeMergePredicate : public BaseMergePredicate::ReplicatedMergeTreeSinkImpl( bool deduplicate_, bool majority_quorum, ContextPtr context_, - bool is_attach_) + bool is_attach_, + bool allow_attach_while_readonly_) : SinkToStorage(metadata_snapshot_->getSampleBlock()) , storage(storage_) , metadata_snapshot(metadata_snapshot_) @@ -137,6 +138,7 @@ ReplicatedMergeTreeSinkImpl::ReplicatedMergeTreeSinkImpl( , quorum_timeout_ms(quorum_timeout_ms_) , max_parts_per_block(max_parts_per_block_) , is_attach(is_attach_) + , allow_attach_while_readonly(allow_attach_while_readonly_) , quorum_parallel(quorum_parallel_) , deduplicate(deduplicate_) , log(getLogger(storage.getLogName() + " (Replicated OutputStream)")) @@ -289,7 +291,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - auto part_blocks = storage.writer.splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; using DelayedPartitions = std::vector; @@ -441,7 +443,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF try { - bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num, false).second; + bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; last_block_is_duplicate = last_block_is_duplicate || deduplicated; @@ -449,7 +451,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot), ExecutionStatus(error)); - storage.incrementInsertedPartsProfileEvent(part->getType()); + StorageReplicatedMergeTree::incrementInsertedPartsProfileEvent(part->getType()); } catch (...) { @@ -486,7 +488,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa while (true) { partition.temp_part.finalize(); - auto conflict_block_ids = commitPart(zookeeper, partition.temp_part.part, partition.block_id, delayed_chunk->replicas_num, false).first; + auto conflict_block_ids = commitPart(zookeeper, partition.temp_part.part, partition.block_id, delayed_chunk->replicas_num).first; if (conflict_block_ids.empty()) { auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); @@ -557,7 +559,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl { part->version.setCreationTID(Tx::PrehistoricTID, nullptr); String block_id = deduplicate ? fmt::format("{}_{}", part->info.partition_id, part->checksums.getTotalChecksumHex()) : ""; - bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num, /* writing_existing_part */ true).second; + bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num).second; /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; @@ -648,8 +650,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: const ZooKeeperWithFaultInjectionPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const BlockIDsType & block_id, - size_t replicas_num, - bool writing_existing_part) + size_t replicas_num) { /// It is possible that we alter a part with different types of source columns. /// In this case, if column was not altered, the result type will be different with what we have in metadata. @@ -799,9 +800,9 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: throw Exception( ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to shutdown: replica_path={}", storage.replica_path); - /// When we attach existing parts it's okay to be in read-only mode - /// For example during RESTORE REPLICA. - if (!writing_existing_part) + /// Usually parts should not be attached in read-only mode. So we retry until the table is not read-only. + /// However there is one case when it's necessary to attach in read-only mode - during execution of the RESTORE REPLICA command. + if (!allow_attach_while_readonly) { retries_ctl.setUserError( Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode: replica_path={}", storage.replica_path)); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 29f3183be646..39623c205840 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -45,7 +45,8 @@ class ReplicatedMergeTreeSinkImpl : public SinkToStorage ContextPtr context_, // special flag to determine the ALTER TABLE ATTACH PART without the query context, // needed to set the special LogEntryType::ATTACH_PART - bool is_attach_ = false); + bool is_attach_ = false, + bool allow_attach_while_readonly_ = false); ~ReplicatedMergeTreeSinkImpl() override; @@ -93,8 +94,7 @@ class ReplicatedMergeTreeSinkImpl : public SinkToStorage const ZooKeeperWithFaultInjectionPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const BlockIDsType & block_id, - size_t replicas_num, - bool writing_existing_part); + size_t replicas_num); /// Wait for quorum to be satisfied on path (quorum_path) form part (part_name) @@ -123,6 +123,7 @@ class ReplicatedMergeTreeSinkImpl : public SinkToStorage UInt64 cache_version = 0; bool is_attach = false; + bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; bool last_block_is_duplicate = false; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 0ca7a4d74d93..287a4d20543c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -33,7 +33,7 @@ static String formattedASTNormalized(const ASTPtr & ast) if (!ast) return ""; auto ast_normalized = ast->clone(); - FunctionNameNormalizer().visit(ast_normalized.get()); + FunctionNameNormalizer::visit(ast_normalized.get()); WriteBufferFromOwnString buf; formatAST(*ast_normalized, buf, false, true); return buf.str(); @@ -43,7 +43,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr { if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - auto minmax_idx_column_names = data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()); + auto minmax_idx_column_names = MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()); date_column = minmax_idx_column_names[data.minmax_idx_date_column_pos]; } diff --git a/src/Storages/MergeTree/ReplicatedTableStatus.h b/src/Storages/MergeTree/ReplicatedTableStatus.h index ce9ad3640f41..786a5fdb44d0 100644 --- a/src/Storages/MergeTree/ReplicatedTableStatus.h +++ b/src/Storages/MergeTree/ReplicatedTableStatus.h @@ -24,8 +24,8 @@ struct ReplicatedTableStatus UInt64 log_max_index; UInt64 log_pointer; UInt64 absolute_delay; - UInt8 total_replicas; - UInt8 active_replicas; + UInt32 total_replicas; + UInt32 active_replicas; UInt64 lost_part_count; String last_queue_update_exception; /// If the error has happened fetching the info from ZooKeeper, this field will be set. diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index d64568e0c3e0..b4d32e71d0d0 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -16,11 +15,9 @@ #include #include #include +#include #include -#if USE_AZURE_BLOB_STORAGE -#include -#endif namespace CurrentMetrics { @@ -66,33 +63,28 @@ bool isRetryableException(std::exception_ptr exception_ptr) #if USE_AWS_S3 catch (const S3Exception & s3_exception) { - if (s3_exception.isRetryableError()) - return true; + return s3_exception.isRetryableError(); } #endif #if USE_AZURE_BLOB_STORAGE - catch (const Azure::Core::RequestFailedException &) + catch (const Azure::Core::RequestFailedException & e) { - return true; + return isRetryableAzureException(e); } #endif catch (const ErrnoException & e) { - if (e.getErrno() == EMFILE) - return true; + return e.getErrno() == EMFILE; } - catch (const Coordination::Exception & e) + catch (const Coordination::Exception & e) { - if (Coordination::isHardwareError(e.code)) - return true; + return Coordination::isHardwareError(e.code); } catch (const Exception & e) { - if (isNotEnoughMemoryErrorCode(e.code())) - return true; - - if (e.code() == ErrorCodes::NETWORK_ERROR || e.code() == ErrorCodes::SOCKET_TIMEOUT) - return true; + return isNotEnoughMemoryErrorCode(e.code()) + || e.code() == ErrorCodes::NETWORK_ERROR + || e.code() == ErrorCodes::SOCKET_TIMEOUT; } catch (const Poco::Net::NetException &) { @@ -102,10 +94,12 @@ bool isRetryableException(std::exception_ptr exception_ptr) { return true; } - - /// In fact, there can be other similar situations. - /// But it is OK, because there is a safety guard against deleting too many parts. - return false; + catch (...) + { + /// In fact, there can be other similar situations. + /// But it is OK, because there is a safety guard against deleting too many parts. + return false; + } } @@ -350,7 +344,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( return {}; if (require_checksums || !checksums_txt.files.empty()) - checksums_txt.checkEqual(checksums_data, check_uncompressed); + checksums_txt.checkEqual(checksums_data, check_uncompressed, data_part->name); return checksums_data; } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 9a3c17923d87..d552a4b6fa5a 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -585,7 +585,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); auto partition_key = metadata.partition_key.expression_list_ast->clone(); - FunctionNameNormalizer().visit(partition_key.get()); + FunctionNameNormalizer::visit(partition_key.get()); auto primary_key_asts = metadata.primary_key.expression_list_ast->children; metadata.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( columns, partition_key, minmax_columns, primary_key_asts, context)); @@ -694,7 +694,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); auto partition_key = metadata.partition_key.expression_list_ast->clone(); - FunctionNameNormalizer().visit(partition_key.get()); + FunctionNameNormalizer::visit(partition_key.get()); auto primary_key_asts = metadata.primary_key.expression_list_ast->children; metadata.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( columns, partition_key, minmax_columns, primary_key_asts, context)); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 306a5eac8e59..86e96f295806 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -432,7 +432,8 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co try { result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } catch (const Azure::Storage::StorageException & e) + } + catch (const Azure::Storage::StorageException & e) { if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict && e.ReasonPhrase == "The specified container already exists.") diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index a0c4156a7048..447fd87cdc96 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -116,7 +116,7 @@ StorageDictionary::StorageDictionary( : StorageDictionary( table_id, table_id.getFullNameNotQuoted(), - context_->getExternalDictionariesLoader().getDictionaryStructure(*dictionary_configuration), + context_->getExternalDictionariesLoader().getDictionaryStructure(*dictionary_configuration), /// NOLINT(readability-static-accessed-through-instance) dictionary_configuration->getString("dictionary.comment", ""), Location::SameDatabaseAndNameAsDictionary, context_) diff --git a/src/Storages/StorageDummy.h b/src/Storages/StorageDummy.h index e9d8f90f755f..ae9bf2483e13 100644 --- a/src/Storages/StorageDummy.h +++ b/src/Storages/StorageDummy.h @@ -19,6 +19,12 @@ class StorageDummy final : public IStorage bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } + + std::optional supportedPrewhereColumns() const override + { + return original_storage_snapshot ? original_storage_snapshot->storage.supportedPrewhereColumns() : std::nullopt; + } + bool supportsSubcolumns() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool canMoveConditionsToPrewhere() const override diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 0d220f2fd5da..f747bbf6b28d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1741,7 +1741,7 @@ class StorageFileSink final : public SinkToStorage, WithContext void initialize() { - std::unique_ptr naked_buffer = nullptr; + std::unique_ptr naked_buffer; if (use_table_fd) { naked_buffer = std::make_unique(table_fd, DBMS_DEFAULT_BUFFER_SIZE); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 588429284f01..945ee4f369f5 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -286,7 +286,7 @@ class StorageFileSource : public SourceWithKeyCondition, WithContext std::unique_ptr reader; std::shared_ptr archive_reader; - std::unique_ptr file_enumerator = nullptr; + std::unique_ptr file_enumerator; ColumnsDescription columns_description; NamesAndTypesList requested_columns; diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index 87790dd2fdc5..fbfc67f4c7c1 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -364,7 +364,7 @@ JSONNode & fuzzSingleJSONNode(JSONNode & n, const StorageFuzzJSON::Configuration if (val.fixed) val.fixed = generateRandomFixedValue(config, rnd); - else if (val.array && val.array->size() < config.max_array_size && node_count + val.array->size() < config.value_number_limit) + else if (val.array && val.array->size() < config.max_array_size && node_count + val.array->size() < StorageFuzzJSON::Configuration::value_number_limit) { if (val.array->empty()) val.array->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ false, depth)); @@ -377,7 +377,7 @@ JSONNode & fuzzSingleJSONNode(JSONNode & n, const StorageFuzzJSON::Configuration } ++node_count; } - else if (val.object && val.object->size() < config.max_object_size && node_count + val.object->size() < config.value_number_limit) + else if (val.object && val.object->size() < config.max_object_size && node_count + val.object->size() < StorageFuzzJSON::Configuration::value_number_limit) { val.object->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ true, depth)); ++node_count; @@ -437,7 +437,7 @@ void fuzzJSONObject( bool first = true; for (const auto & ptr : node_list) { - if (node_count >= config.value_number_limit) + if (node_count >= StorageFuzzJSON::Configuration::value_number_limit) break; WriteBufferFromOwnString child_out; @@ -619,11 +619,11 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration { configuration.max_output_length = collection.get("max_output_length"); - if (configuration.max_output_length < 2 || configuration.max_output_length > configuration.output_length_limit) + if (configuration.max_output_length < 2 || configuration.max_output_length > StorageFuzzJSON::Configuration::output_length_limit) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The value of the 'max_output_length' argument must be within the interval [2, {}.]", - configuration.output_length_limit); + StorageFuzzJSON::Configuration::output_length_limit); } if (collection.has("max_nesting_level")) @@ -638,11 +638,11 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration if (collection.has("max_string_value_length")) { auto max_string_value_length = collection.get("max_string_value_length"); - if (max_string_value_length > configuration.output_length_limit) + if (max_string_value_length > StorageFuzzJSON::Configuration::output_length_limit) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The value of the 'max_string_value_length' argument must be at most {}.", - configuration.output_length_limit); + StorageFuzzJSON::Configuration::output_length_limit); configuration.max_string_value_length = std::min(max_string_value_length, configuration.max_output_length); } @@ -650,11 +650,11 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration if (collection.has("max_key_length")) { auto max_key_length = collection.get("max_key_length"); - if (max_key_length > configuration.output_length_limit) + if (max_key_length > StorageFuzzJSON::Configuration::output_length_limit) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The value of the 'max_key_length' argument must be less or equal than {}.", - configuration.output_length_limit); + StorageFuzzJSON::Configuration::output_length_limit); configuration.max_key_length = std::min(max_key_length, configuration.max_output_length); configuration.min_key_length = std::min(configuration.min_key_length, configuration.max_key_length); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 549cfca1b6c4..b652750346f3 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -39,6 +39,8 @@ #include #include +#include + #define DBMS_STORAGE_LOG_DATA_FILE_EXTENSION ".bin" #define DBMS_STORAGE_LOG_MARKS_FILE_NAME "__marks.mrk" diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index c6222d2124ea..f69c4adb5521 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -46,7 +46,6 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int CANNOT_RESTORE_TABLE; extern const int NOT_IMPLEMENTED; - extern const int SETTING_CONSTRAINT_VIOLATION; } class MemorySink : public SinkToStorage @@ -76,7 +75,7 @@ class MemorySink : public SinkToStorage convertDynamicColumnsToTuples(block, storage_snapshot); } - if (storage.compress) + if (storage.getMemorySettingsRef().compress) { Block compressed_block; for (const auto & elem : block) @@ -106,15 +105,16 @@ class MemorySink : public SinkToStorage auto new_data = std::make_unique(*(storage.data.get())); UInt64 new_total_rows = storage.total_size_rows.load(std::memory_order_relaxed) + inserted_rows; UInt64 new_total_bytes = storage.total_size_bytes.load(std::memory_order_relaxed) + inserted_bytes; + const auto & memory_settings = storage.getMemorySettingsRef(); while (!new_data->empty() - && ((storage.max_bytes_to_keep && new_total_bytes > storage.max_bytes_to_keep) - || (storage.max_rows_to_keep && new_total_rows > storage.max_rows_to_keep))) + && ((memory_settings.max_bytes_to_keep && new_total_bytes > memory_settings.max_bytes_to_keep) + || (memory_settings.max_rows_to_keep && new_total_rows > memory_settings.max_rows_to_keep))) { Block oldest_block = new_data->front(); UInt64 rows_to_remove = oldest_block.rows(); UInt64 bytes_to_remove = oldest_block.allocatedBytes(); - if (new_total_bytes - bytes_to_remove < storage.min_bytes_to_keep - || new_total_rows - rows_to_remove < storage.min_rows_to_keep) + if (new_total_bytes - bytes_to_remove < memory_settings.min_bytes_to_keep + || new_total_rows - rows_to_remove < memory_settings.min_rows_to_keep) { break; // stop - removing next block will put us under min_bytes / min_rows threshold } @@ -145,15 +145,16 @@ StorageMemory::StorageMemory( ColumnsDescription columns_description_, ConstraintsDescription constraints_, const String & comment, - const MemorySettings & settings) - : IStorage(table_id_), data(std::make_unique()), compress(settings.compress), - min_rows_to_keep(settings.min_rows_to_keep), max_rows_to_keep(settings.max_rows_to_keep), - min_bytes_to_keep(settings.min_bytes_to_keep), max_bytes_to_keep(settings.max_bytes_to_keep) + const MemorySettings & memory_settings_) + : IStorage(table_id_) + , data(std::make_unique()) + , memory_settings(memory_settings_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(std::move(columns_description_)); storage_metadata.setConstraints(std::move(constraints_)); storage_metadata.setComment(comment); + storage_metadata.setSettingsChanges(memory_settings.getSettingsChangesQuery()); setInMemoryMetadata(storage_metadata); } @@ -239,7 +240,7 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context Block block; while (executor.pull(block)) { - if (compress) + if (memory_settings.compress) for (auto & elem : block) elem.column = elem.column->compress(); @@ -294,6 +295,59 @@ void StorageMemory::truncate( total_size_rows.store(0, std::memory_order_relaxed); } +void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr context, DB::IStorage::AlterLockHolder & /*alter_lock_holder*/) +{ + auto table_id = getStorageID(); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + params.apply(new_metadata, context); + + if (params.isSettingsAlter()) + { + auto & settings_changes = new_metadata.settings_changes->as(); + auto changed_settings = memory_settings; + changed_settings.applyChanges(settings_changes.changes); + changed_settings.sanityCheck(); + + /// When modifying the values of max_bytes_to_keep and max_rows_to_keep to be smaller than the old values, + /// the old data needs to be removed. + if (!memory_settings.max_bytes_to_keep || memory_settings.max_bytes_to_keep > changed_settings.max_bytes_to_keep + || !memory_settings.max_rows_to_keep || memory_settings.max_rows_to_keep > changed_settings.max_rows_to_keep) + { + std::lock_guard lock(mutex); + + auto new_data = std::make_unique(*(data.get())); + UInt64 new_total_rows = total_size_rows.load(std::memory_order_relaxed); + UInt64 new_total_bytes = total_size_bytes.load(std::memory_order_relaxed); + while (!new_data->empty() + && ((changed_settings.max_bytes_to_keep && new_total_bytes > changed_settings.max_bytes_to_keep) + || (changed_settings.max_rows_to_keep && new_total_rows > changed_settings.max_rows_to_keep))) + { + Block oldest_block = new_data->front(); + UInt64 rows_to_remove = oldest_block.rows(); + UInt64 bytes_to_remove = oldest_block.allocatedBytes(); + if (new_total_bytes - bytes_to_remove < changed_settings.min_bytes_to_keep + || new_total_rows - rows_to_remove < changed_settings.min_rows_to_keep) + { + break; // stop - removing next block will put us under min_bytes / min_rows threshold + } + + // delete old block from current storage table + new_total_rows -= rows_to_remove; + new_total_bytes -= bytes_to_remove; + new_data->erase(new_data->begin()); + } + + data.set(std::move(new_data)); + total_size_rows.store(new_total_rows, std::memory_order_relaxed); + total_size_bytes.store(new_total_bytes, std::memory_order_relaxed); + } + memory_settings = std::move(changed_settings); + } + + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); + setInMemoryMetadata(new_metadata); +} + namespace { @@ -499,7 +553,7 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat while (auto block = block_in.read()) { - if (compress) + if (memory_settings.compress) { Block compressed_block; for (const auto & elem : block) @@ -534,7 +588,8 @@ void StorageMemory::checkAlterIsPossible(const AlterCommands & commands, Context { if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN && command.type != AlterCommand::Type::DROP_COLUMN && command.type != AlterCommand::Type::COMMENT_COLUMN - && command.type != AlterCommand::Type::COMMENT_TABLE && command.type != AlterCommand::Type::RENAME_COLUMN) + && command.type != AlterCommand::Type::COMMENT_TABLE && command.type != AlterCommand::Type::RENAME_COLUMN + && command.type != AlterCommand::Type::MODIFY_SETTING) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", command.type, getName()); } @@ -566,9 +621,7 @@ void registerStorageMemory(StorageFactory & factory) if (has_settings) settings.loadFromQuery(*args.storage_def); - if (settings.min_bytes_to_keep > settings.max_bytes_to_keep - || settings.min_rows_to_keep > settings.max_rows_to_keep) - throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, "Min. bytes / rows must be set with a max."); + settings.sanityCheck(); return std::make_shared(args.table_id, args.columns, args.constraints, args.comment, settings); }, diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 13f1c971d823..50581aa0d61a 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -31,7 +31,7 @@ friend class MemorySink; ColumnsDescription columns_description_, ConstraintsDescription constraints_, const String & comment, - const MemorySettings & settings = MemorySettings()); + const MemorySettings & memory_settings_ = MemorySettings()); String getName() const override { return "Memory"; } @@ -46,6 +46,8 @@ friend class MemorySink; StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override; + const MemorySettings & getMemorySettingsRef() const { return memory_settings; } + void read( QueryPlan & query_plan, const Names & column_names, @@ -78,6 +80,7 @@ friend class MemorySink; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override; + void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & alter_lock_holder) override; std::optional totalRows(const Settings &) const override; std::optional totalBytes(const Settings &) const override; @@ -134,12 +137,7 @@ friend class MemorySink; std::atomic total_size_bytes = 0; std::atomic total_size_rows = 0; - bool compress; - UInt64 min_rows_to_keep; - UInt64 max_rows_to_keep; - UInt64 min_bytes_to_keep; - UInt64 max_bytes_to_keep; - + MemorySettings memory_settings; friend class ReadFromMemoryStorageStep; }; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index aad4fc36a1bd..86af02be8990 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -436,7 +436,7 @@ CurrentlyMergingPartsTagger::CurrentlyMergingPartsTagger( /// if we mutate part, than we should reserve space on the same disk, because mutations possible can create hardlinks if (is_mutation) { - reserved_space = storage.tryReserveSpace(total_size, future_part->parts[0]->getDataPartStorage()); + reserved_space = StorageMergeTree::tryReserveSpace(total_size, future_part->parts[0]->getDataPartStorage()); } else { @@ -933,7 +933,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( bool aggressive, const String & partition_id, bool final, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, TableLockHolder & /* table_lock_holder */, std::unique_lock & lock, const MergeTreeTransactionPtr & txn, @@ -951,7 +951,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( CurrentlyMergingPartsTaggerPtr merging_tagger; MergeList::EntryPtr merge_entry; - auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String & disable_reason) -> bool + auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, PreformattedMessage & disable_reason) -> bool { if (tx) { @@ -960,7 +960,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if ((left && !left->version.isVisible(tx->getSnapshot(), Tx::EmptyTID)) || (right && !right->version.isVisible(tx->getSnapshot(), Tx::EmptyTID))) { - disable_reason = "Some part is not visible in transaction"; + disable_reason = PreformattedMessage::create("Some part is not visible in transaction"); return false; } @@ -968,7 +968,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if ((left && left->version.isRemovalTIDLocked()) || (right && right->version.isRemovalTIDLocked())) { - disable_reason = "Some part is locked for removal in another cuncurrent transaction"; + disable_reason = PreformattedMessage::create("Some part is locked for removal in another cuncurrent transaction"); return false; } } @@ -979,7 +979,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( { if (currently_merging_mutating_parts.contains(right)) { - disable_reason = "Some part currently in a merging or mutating process"; + disable_reason = PreformattedMessage::create("Some part currently in a merging or mutating process"); return false; } else @@ -988,13 +988,13 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (currently_merging_mutating_parts.contains(left) || currently_merging_mutating_parts.contains(right)) { - disable_reason = "Some part currently in a merging or mutating process"; + disable_reason = PreformattedMessage::create("Some part currently in a merging or mutating process"); return false; } if (getCurrentMutationVersion(left, lock) != getCurrentMutationVersion(right, lock)) { - disable_reason = "Some parts have different mutation version"; + disable_reason = PreformattedMessage::create("Some parts have different mutation version"); return false; } @@ -1004,7 +1004,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( auto max_possible_level = getMaxLevelInBetween(left, right); if (max_possible_level > std::max(left->info.level, right->info.level)) { - disable_reason = fmt::format("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level); + disable_reason = PreformattedMessage::create("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level); return false; } @@ -1013,11 +1013,11 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; - auto is_background_memory_usage_ok = [](String & disable_reason) -> bool + auto is_background_memory_usage_ok = [](PreformattedMessage & disable_reason) -> bool { if (canEnqueueBackgroundTask()) return true; - disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})", + disable_reason = PreformattedMessage::create("Current background tasks memory usage ({}) is more than the limit ({})", formatReadableSizeWithBinarySuffix(background_memory_tracker.get()), formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit())); return false; @@ -1045,7 +1045,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( out_disable_reason); } else - out_disable_reason = "Current value of max_source_parts_size is zero"; + out_disable_reason = PreformattedMessage::create("Current value of max_source_parts_size is zero"); } } else @@ -1086,7 +1086,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (std::cv_status::timeout == currently_processing_in_background_condition.wait_for(lock, timeout)) { - out_disable_reason = fmt::format("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms); + out_disable_reason = PreformattedMessage::create("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms); break; } } @@ -1102,9 +1102,9 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (select_decision != SelectPartsDecision::SELECTED) { - if (!out_disable_reason.empty()) - out_disable_reason += ". "; - out_disable_reason += "Cannot select parts for optimization"; + if (!out_disable_reason.text.empty()) + out_disable_reason.text += ". "; + out_disable_reason.text += "Cannot select parts for optimization"; return {}; } @@ -1125,7 +1125,7 @@ bool StorageMergeTree::merge( const Names & deduplicate_by_columns, bool cleanup, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions) { auto table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); @@ -1180,7 +1180,7 @@ bool StorageMergeTree::partIsAssignedToBackgroundOperation(const DataPartPtr & p } MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( - const StorageMetadataPtr & metadata_snapshot, String & /* disable_reason */, TableLockHolder & /* table_lock_holder */, + const StorageMetadataPtr & metadata_snapshot, PreformattedMessage & /* disable_reason */, TableLockHolder & /* table_lock_holder */, std::unique_lock & /*currently_processing_in_background_mutex_lock*/) { if (current_mutations_by_version.empty()) @@ -1396,7 +1396,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign if (merger_mutator.merges_blocker.isCancelled()) return false; - String out_reason; + PreformattedMessage out_reason; merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, out_reason, shared_lock, lock, txn); if (!merge_entry && !current_mutations_by_version.empty()) @@ -1559,14 +1559,12 @@ bool StorageMergeTree::optimize( auto txn = local_context->getCurrentTransaction(); - String disable_reason; + PreformattedMessage disable_reason; if (!partition && final) { if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing) { - constexpr const char * message = "Cannot OPTIMIZE with CLEANUP table: {}"; - disable_reason = "only ReplacingMergeTree can be CLEANUP"; - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, "Cannot OPTIMIZE with CLEANUP table: only ReplacingMergeTree can be CLEANUP"); } if (cleanup && !getSettings()->allow_experimental_replacing_merge_with_cleanup) @@ -1592,12 +1590,12 @@ bool StorageMergeTree::optimize( local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; - if (disable_reason.empty()) - disable_reason = "unknown reason"; - LOG_INFO(log, message, disable_reason); + if (disable_reason.text.empty()) + disable_reason = PreformattedMessage::create("unknown reason"); + LOG_INFO(log, message, disable_reason.text); if (local_context->getSettingsRef().optimize_throw_if_noop) - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason.text); return false; } } @@ -1620,12 +1618,12 @@ bool StorageMergeTree::optimize( local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; - if (disable_reason.empty()) - disable_reason = "unknown reason"; - LOG_INFO(log, message, disable_reason); + if (disable_reason.text.empty()) + disable_reason = PreformattedMessage::create("unknown reason"); + LOG_INFO(log, message, disable_reason.text); if (local_context->getSettingsRef().optimize_throw_if_noop) - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason.text); return false; } } @@ -2024,7 +2022,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( bool attach_part, ContextPtr local_context) { PartitionCommandsResultInfo results; - PartsTemporaryRename renamed_parts(*this, "detached/"); + PartsTemporaryRename renamed_parts(*this, DETACHED_DIR_NAME); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, local_context, renamed_parts); for (size_t i = 0; i < loaded_parts.size(); ++i) @@ -2319,7 +2317,7 @@ std::optional StorageMergeTree::checkDataNext(DataValidationTasksPt try { auto calculated_checksums = checkDataPart(part, false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true); - calculated_checksums.checkEqual(part->checksums, true); + calculated_checksums.checkEqual(part->checksums, true, part->name); auto & part_mutable = const_cast(*part); part_mutable.writeChecksums(part->checksums, local_context->getWriteSettings()); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index d864b3e626c5..d6e97c556042 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -175,7 +175,7 @@ class StorageMergeTree final : public MergeTreeData const Names & deduplicate_by_columns, bool cleanup, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions = false); void renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction); @@ -202,7 +202,7 @@ class StorageMergeTree final : public MergeTreeData bool aggressive, const String & partition_id, bool final, - String & disable_reason, + PreformattedMessage & disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & lock, const MergeTreeTransactionPtr & txn, @@ -211,7 +211,7 @@ class StorageMergeTree final : public MergeTreeData MergeMutateSelectedEntryPtr selectPartsToMutate( - const StorageMetadataPtr & metadata_snapshot, String & disable_reason, + const StorageMetadataPtr & metadata_snapshot, PreformattedMessage & disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & currently_processing_in_background_mutex_lock); /// For current mutations queue, returns maximum version of mutation for a part, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8ca061db4ecf..15d1b7f40103 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -1524,8 +1525,13 @@ void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart( if (!found) { - LOG_WARNING(log, "Part {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. " - "It may cause false-positive 'part is lost forever' messages", part_name, covering_part); + LOG_WARNING( + log, + "Part {} of table {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. " + "It may cause false-positive 'part is lost forever' messages", + part_name, + getStorageID().getNameForLogs(), + covering_part); ProfileEvents::increment(ProfileEvents::ReplicatedCoveredPartsInZooKeeperOnStart); chassert(false); } @@ -1831,7 +1837,7 @@ bool StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps( "(it may rarely happen on race condition with KILL MUTATION).", part_name, replica); } - replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); + replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true, part_name); break; } @@ -1977,7 +1983,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { - for (const auto it = disk->iterateDirectory(fs::path(relative_data_path) / "detached/"); it->isValid(); it->next()) + for (const auto it = disk->iterateDirectory(fs::path(relative_data_path) / DETACHED_DIR_NAME); it->isValid(); it->next()) { const auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version); @@ -1987,7 +1993,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo const auto part_old_name = part_info->getPartNameV1(); const auto volume = std::make_shared("volume_" + part_old_name, disk); - auto part = getDataPartBuilder(entry.new_part_name, volume, fs::path("detached") / part_old_name) + auto part = getDataPartBuilder(entry.new_part_name, volume, fs::path(DETACHED_DIR_NAME) / part_old_name) .withPartFormatFromDisk() .build(); @@ -2351,8 +2357,12 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::executeFetchShared } } -static void paranoidCheckForCoveredPartsInZooKeeper(const ZooKeeperPtr & zookeeper, const String & replica_path, - MergeTreeDataFormatVersion format_version, const String & covering_part_name) +static void paranoidCheckForCoveredPartsInZooKeeper( + const ZooKeeperPtr & zookeeper, + const String & replica_path, + MergeTreeDataFormatVersion format_version, + const String & covering_part_name, + const StorageReplicatedMergeTree & storage) { #ifdef ABORT_ON_LOGICAL_ERROR constexpr bool paranoid_check_for_covered_parts_default = true; @@ -2371,8 +2381,12 @@ static void paranoidCheckForCoveredPartsInZooKeeper(const ZooKeeperPtr & zookeep { auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); if (drop_range_info.contains(part_info)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Part {} remains in ZooKeeper after DROP_RANGE {}", part_name, covering_part_name); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Part {} from table {} remains in ZooKeeper after DROP_RANGE {}", + part_name, + storage.getStorageID().getNameForLogs(), + covering_part_name); } } @@ -2426,7 +2440,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) { String part_dir = part_to_detach->getDataPartStorage().getPartDirectory(); LOG_INFO(log, "Detaching {}", part_dir); - auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + auto holder = getTemporaryPartDirectoryHolder(fs::path(DETACHED_DIR_NAME) / part_dir); part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } @@ -2434,7 +2448,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) /// Forcibly remove parts from ZooKeeper removePartsFromZooKeeperWithRetries(parts_to_remove); - paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry.new_part_name); + paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry.new_part_name, *this); if (entry.detach) LOG_DEBUG(log, "Detached {} parts inside {}.", parts_to_remove.size(), entry.new_part_name); @@ -2572,7 +2586,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) LOG_INFO(log, "All parts from REPLACE PARTITION command have been already attached"); removePartsFromZooKeeperWithRetries(parts_to_remove); if (replace) - paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); + paranoidCheckForCoveredPartsInZooKeeper( + getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name, *this); return true; } @@ -2893,7 +2908,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) removePartsFromZooKeeperWithRetries(parts_to_remove); if (replace) - paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); + paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name, *this); res_parts.clear(); parts_to_remove.clear(); cleanup_thread.wakeup(); @@ -2952,7 +2967,7 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr part = get_part(); // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - part->renameTo("detached/" + entry.new_part_name, true); + part->renameTo(fs::path(DETACHED_DIR_NAME) / entry.new_part_name, true); LOG_INFO(log, "Cloned part {} to detached directory", part->name); } @@ -3115,6 +3130,10 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo Strings active_parts = get_part_set.getParts(); /// Remove local parts if source replica does not have them, because such parts will never be fetched by other replicas. + static const auto test_delay = getContext()->getConfigRef().getUInt64("test.clone_replica.delay_before_removing_local_parts_ms", 0); + if (test_delay) + randomDelayForMaxMilliseconds(test_delay, log.load(), "cloneReplica: Before removing local parts"); + Strings local_parts_in_zk = zookeeper->getChildren(fs::path(replica_path) / "parts"); Strings parts_to_remove_from_zk; @@ -3813,7 +3832,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() merge_pred.emplace(queue.getMergePredicate(zookeeper, partitions_to_merge_in)); } - String out_reason; + PreformattedMessage out_reason; if (can_assign_merge && merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, *merge_pred, merge_with_ttl_allowed, NO_TRANSACTION_PTR, out_reason, &partitions_to_merge_in) == SelectPartsDecision::SELECTED) @@ -4968,7 +4987,7 @@ bool StorageReplicatedMergeTree::fetchPart( { // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - part->renameTo(fs::path("detached") / part_name, true); + part->renameTo(fs::path(DETACHED_DIR_NAME) / part_name, true); } } catch (const Exception & e) @@ -5656,7 +5675,7 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu { auto connection = std::make_shared( node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, ssh::SSHKey(), node.quota_key, node.cluster, node.cluster_secret, + node.user, node.password, SSHKey(), node.quota_key, node.cluster, node.cluster_secret, "ParallelInsertSelectInititiator", node.compression, node.secure @@ -5795,7 +5814,7 @@ bool StorageReplicatedMergeTree::optimize( future_merged_part->uuid = UUIDHelpers::generateV4(); constexpr const char * unknown_disable_reason = "unknown reason"; - String disable_reason = unknown_disable_reason; + PreformattedMessage disable_reason = PreformattedMessage::create(unknown_disable_reason); SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; if (partition_id.empty()) @@ -5818,10 +5837,10 @@ bool StorageReplicatedMergeTree::optimize( if (select_decision != SelectPartsDecision::SELECTED) { constexpr const char * message_fmt = "Cannot select parts for optimization: {}"; - assert(disable_reason != unknown_disable_reason); + assert(disable_reason.text != unknown_disable_reason); if (!partition_id.empty()) - disable_reason += fmt::format(" (in partition {})", partition_id); - return handle_noop(message_fmt, disable_reason); + disable_reason.text += fmt::format(" (in partition {})", partition_id); + return handle_noop(message_fmt, disable_reason.text); } ReplicatedMergeTreeLogEntryData merge_entry; @@ -6528,13 +6547,13 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( assertNotReadonly(); PartitionCommandsResultInfo results; - PartsTemporaryRename renamed_parts(*this, "detached/"); + PartsTemporaryRename renamed_parts(*this, DETACHED_DIR_NAME); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); /// TODO Allow to use quorum here. ReplicatedMergeTreeSink output(*this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0, /* quorum_parallel */ false, query_context->getSettingsRef().insert_deduplicate, - /* majority_quorum */ false, query_context, /*is_attach*/true); + /* majority_quorum */ false, query_context, /* is_attach */ true, /* allow_attach_while_readonly */ true); for (size_t i = 0; i < loaded_parts.size(); ++i) { @@ -7003,7 +7022,7 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit } res.log_pointer = log_pointer_str.empty() ? 0 : parse(log_pointer_str); - res.total_replicas = all_replicas.size(); + res.total_replicas = UInt32(all_replicas.size()); if (get_result[1].error == Coordination::Error::ZNONODE) res.lost_part_count = 0; else @@ -8008,6 +8027,20 @@ void StorageReplicatedMergeTree::replacePartitionFrom( assert(replace == !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range)); + scope_guard intent_guard; + if (replace) + { + queue.addDropReplaceIntent(drop_range); + intent_guard = scope_guard{[this, my_drop_range = drop_range]() { queue.removeDropReplaceIntent(my_drop_range); }}; + + getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); + queue.waitForCurrentlyExecutingOpsInRange(drop_range); + { + auto pause_checking_parts = part_check_thread.pausePartsCheck(); + part_check_thread.cancelRemovedPartsCheck(drop_range); + } + } + String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); std::set replaced_parts; @@ -8176,8 +8209,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom( lock2.reset(); lock1.reset(); - /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) + /// We need to pull the REPLACE_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); + // No need to block operations further, especially that in case we have to wait for mutation to finish, the intent would block + // the execution of REPLACE_RANGE + intent_guard.reset(); parts_holder.clear(); cleanup_thread.wakeup(); @@ -8229,11 +8265,23 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta Coordination::Stat alter_partition_version_stat; zookeeper->get(alter_partition_version_path, &alter_partition_version_stat); - MergeTreePartInfo drop_range; std::optional delimiting_block_lock; + MergeTreePartInfo drop_range; getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); + queue.addDropReplaceIntent(drop_range); + // Let's copy drop_range to make sure it doesn't get modified, otherwise we might run into issue on removal + scope_guard intent_guard{[this, my_drop_range = drop_range]() { queue.removeDropReplaceIntent(my_drop_range); }}; + + getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); + + queue.waitForCurrentlyExecutingOpsInRange(drop_range); + { + auto pause_checking_parts = part_check_thread.pausePartsCheck(); + part_check_thread.cancelRemovedPartsCheck(drop_range); + } + DataPartPtr covering_part; DataPartsVector src_all_parts; { @@ -8438,6 +8486,9 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); + // No need to block operations further, especially that in case we have to wait for mutation to finish, the intent would block + // the execution of DROP_RANGE + intent_guard.reset(); parts_holder.clear(); cleanup_thread.wakeup(); @@ -8490,9 +8541,9 @@ void StorageReplicatedMergeTree::movePartitionToShard( } /// canMergeSinglePart is overlapping with dropPart, let's try to use the same code. - String out_reason; + PreformattedMessage out_reason; if (!merge_pred.canMergeSinglePart(part, out_reason)) - throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: {}", out_reason); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: {}", out_reason.text); } { @@ -8750,18 +8801,18 @@ bool StorageReplicatedMergeTree::dropPartImpl( /// There isn't a lot we can do otherwise. Can't cancel merges because it is possible that a replica already /// finished the merge. - String out_reason; + PreformattedMessage out_reason; if (!merge_pred.canMergeSinglePart(part, out_reason)) { if (throw_if_noop) - throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); return false; } if (merge_pred.partParticipatesInReplaceRange(part, out_reason)) { if (throw_if_noop) - throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); return false; } @@ -9967,7 +10018,7 @@ bool StorageReplicatedMergeTree::checkIfDetachedPartExists(const String & part_n { fs::directory_iterator dir_end; for (const std::string & path : getDataPaths()) - for (fs::directory_iterator dir_it{fs::path(path) / "detached/"}; dir_it != dir_end; ++dir_it) + for (fs::directory_iterator dir_it{fs::path(path) / DETACHED_DIR_NAME}; dir_it != dir_end; ++dir_it) if (dir_it->path().filename().string() == part_name) return true; return false; @@ -9980,7 +10031,7 @@ bool StorageReplicatedMergeTree::checkIfDetachedPartitionExists(const String & p for (const std::string & path : getDataPaths()) { - for (fs::directory_iterator dir_it{fs::path(path) / "detached/"}; dir_it != dir_end; ++dir_it) + for (fs::directory_iterator dir_it{fs::path(path) / DETACHED_DIR_NAME}; dir_it != dir_end; ++dir_it) { const String file_name = dir_it->path().filename().string(); auto part_info = MergeTreePartInfo::tryParsePartName(file_name, format_version); @@ -10512,7 +10563,7 @@ void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & rest } auto backup = restorer.getBackup(); if (!empty && backup->hasFiles(data_path_in_backup)) - restorer.throwTableIsNotEmpty(getStorageID()); + RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); } restorePartsFromBackup(restorer, data_path_in_backup, partitions); @@ -10521,7 +10572,11 @@ void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & rest void StorageReplicatedMergeTree::attachRestoredParts(MutableDataPartsVector && parts) { auto metadata_snapshot = getInMemoryMetadataPtr(); - auto sink = std::make_shared(*this, metadata_snapshot, 0, 0, 0, false, false, false, getContext(), /*is_attach*/true); + + auto sink = std::make_shared( + *this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0, /* quorum_parallel */ false, + /* deduplicate */ false, /* majority_quorum */ false, getContext(), /* is_attach */ true, /* allow_attach_while_readonly */ false); + for (auto part : parts) sink->writeExistingPart(part); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2d3aef312bf9..6cda0fca60bb 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -207,7 +207,7 @@ class StorageS3Source::DisclosedGlobIterator::Impl : WithContext , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) , file_progress_callback(file_progress_callback_) { - if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) + if (globbed_uri.bucket.find_first_of("*?{") != std::string::npos) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{")); diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 5887018268b1..04634bcf1b3f 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -292,7 +292,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U } } -S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user) const +S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user, bool ignore_user) const { std::lock_guard lock(mutex); auto next_prefix_setting = s3_settings.upper_bound(endpoint); @@ -302,7 +302,7 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint, const String { std::advance(possible_prefix_setting, -1); const auto & [endpoint_prefix, settings] = *possible_prefix_setting; - if (endpoint.starts_with(endpoint_prefix) && settings.auth_settings.canBeUsedByUser(user)) + if (endpoint.starts_with(endpoint_prefix) && (ignore_user || settings.auth_settings.canBeUsedByUser(user))) return possible_prefix_setting->second; } diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 21b6264717eb..0f972db02b11 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -112,7 +112,7 @@ class StorageS3Settings public: void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings); - S3Settings getSettings(const String & endpoint, const String & user) const; + S3Settings getSettings(const String & endpoint, const String & user, bool ignore_user = false) const; private: mutable std::mutex mutex; diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index ff782647c791..2afc03d0e5ea 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -131,8 +131,10 @@ class DataSkippingIndicesSource : public ISource // 'type_full' column if (column_mask[src_index++]) { - if (auto * expression = index.definition_ast->as(); expression && expression->type) - res_columns[res_index++]->insert(queryToString(*expression->type)); + auto * expression = index.definition_ast->as(); + auto index_type = expression ? expression->getType() : nullptr; + if (index_type) + res_columns[res_index++]->insert(queryToString(*index_type)); else res_columns[res_index++]->insertDefault(); } diff --git a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp new file mode 100644 index 000000000000..8915032baf70 --- /dev/null +++ b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp @@ -0,0 +1,72 @@ +#include "StorageSystemFilesystemCacheSettings.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +ColumnsDescription StorageSystemFilesystemCacheSettings::getColumnsDescription() +{ + return ColumnsDescription + { + {"cache_name", std::make_shared(), "Name of the cache object"}, + {"path", std::make_shared(), "Cache directory"}, + {"max_size", std::make_shared(), "Cache size limit by the number of bytes"}, + {"max_elements", std::make_shared(), "Cache size limit by the number of elements"}, + {"current_size", std::make_shared(), "Current cache size by the number of bytes"}, + {"current_elements", std::make_shared(), "Current cache size by the number of elements"}, + {"max_file_segment_size", std::make_shared(), "Maximum allowed file segment size"}, + {"boundary_alignment", std::make_shared(), "Boundary alignment of file segments"}, + {"cache_on_write_operations", std::make_shared(), "Write-through cache enablemenet setting"}, + {"cache_hits_threshold", std::make_shared(), "Cache hits threshold enablemenet setting"}, + {"background_download_threads", std::make_shared(), "Number of background download threads"}, + {"background_download_queue_size_limit", std::make_shared(), "Queue size limit for background download"}, + {"load_metadata_threads", std::make_shared(), "Number of load metadata threads"}, + {"enable_bypass_cache_threshold", std::make_shared(), "Bypass cache threshold limit enablement setting"}, + }; +} + +StorageSystemFilesystemCacheSettings::StorageSystemFilesystemCacheSettings(const StorageID & table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) +{ +} + +void StorageSystemFilesystemCacheSettings::fillData( + MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const +{ + context->checkAccess(AccessType::SHOW_FILESYSTEM_CACHES); + + auto caches = FileCacheFactory::instance().getAll(); + + for (const auto & [cache_name, cache_data] : caches) + { + const auto & settings = cache_data->getSettings(); + const auto & cache = cache_data->cache; + + size_t i = 0; + res_columns[i++]->insert(cache_name); + res_columns[i++]->insert(settings.base_path); + res_columns[i++]->insert(settings.max_size); + res_columns[i++]->insert(settings.max_elements); + res_columns[i++]->insert(cache->getUsedCacheSize()); + res_columns[i++]->insert(cache->getFileSegmentsNum()); + res_columns[i++]->insert(settings.max_file_segment_size); + res_columns[i++]->insert(settings.boundary_alignment); + res_columns[i++]->insert(settings.cache_on_write_operations); + res_columns[i++]->insert(settings.cache_hits_threshold); + res_columns[i++]->insert(settings.background_download_threads); + res_columns[i++]->insert(settings.background_download_queue_size_limit); + res_columns[i++]->insert(settings.load_metadata_threads); + res_columns[i++]->insert(settings.enable_bypass_cache_with_threshold); + } +} + +} diff --git a/src/Storages/System/StorageSystemFilesystemCacheSettings.h b/src/Storages/System/StorageSystemFilesystemCacheSettings.h new file mode 100644 index 000000000000..59a123c32c12 --- /dev/null +++ b/src/Storages/System/StorageSystemFilesystemCacheSettings.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class StorageSystemFilesystemCacheSettings final : public IStorageSystemOneBlock +{ +public: + explicit StorageSystemFilesystemCacheSettings(const StorageID & table_id_); + + std::string getName() const override { return "SystemFilesystemCacheSettings"; } + + static ColumnsDescription getColumnsDescription(); + +protected: + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index 967132e4d4a7..c5c912948011 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -179,7 +179,7 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c } const auto & user_defined_executable_functions_factory = UserDefinedExecutableFunctionFactory::instance(); - const auto & user_defined_executable_functions_names = user_defined_executable_functions_factory.getRegisteredNames(context); + const auto & user_defined_executable_functions_names = user_defined_executable_functions_factory.getRegisteredNames(context); /// NOLINT(readability-static-accessed-through-instance) for (const auto & function_name : user_defined_executable_functions_names) { fillRow(res_columns, function_name, 0, {0}, "", FunctionOrigin::EXECUTABLE_USER_DEFINED, user_defined_executable_functions_factory); diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index d8b760e1302d..ef13c3c24da0 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -1,7 +1,8 @@ -#include -#include +#include #include #include +#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemMySQLBinlogs.cpp b/src/Storages/System/StorageSystemMySQLBinlogs.cpp index 32648d22ee8c..846fe3547d00 100644 --- a/src/Storages/System/StorageSystemMySQLBinlogs.cpp +++ b/src/Storages/System/StorageSystemMySQLBinlogs.cpp @@ -1,11 +1,11 @@ -#include - -#include -#include #include -#include +#include #include +#include +#include #include +#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 0ca76430ceb4..f54fa220e830 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -1,14 +1,15 @@ #include "StorageSystemRemoteDataPaths.h" -#include +#include +#include +#include #include +#include #include +#include #include #include -#include -#include -#include #include -#include +#include namespace fs = std::filesystem; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index edfc7213dcd6..9bd7ff945adb 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -25,6 +25,8 @@ #include #include +#include + namespace DB { diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index cd8be60e342c..6ff86b26ca9e 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -213,6 +214,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "part_moves_between_shards", "Contains information about parts which are currently in a process of moving between shards and their progress."); attach(context, system_database, "asynchronous_inserts", "Contains information about pending asynchronous inserts in queue in server's memory."); attachNoDescription(context, system_database, "filesystem_cache", "Contains information about all entries inside filesystem cache for remote objects."); + attachNoDescription(context, system_database, "filesystem_cache_settings", "Contains information about all filesystem cache settings"); attachNoDescription(context, system_database, "query_cache", "Contains information about all entries inside query cache in server's memory."); attachNoDescription(context, system_database, "remote_data_paths", "Contains a mapping from a filename on local filesystem to a blob name inside object storage."); attach(context, system_database, "certificates", "Contains information about available certificates and their sources."); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 3d1ce76dff17..6e7ea32ee59a 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -426,7 +426,7 @@ TTLTableDescription TTLTableDescription::parse(const String & str, const Columns ParserTTLExpressionList parser; ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); return getTTLForTableFromAST(ast, columns, context, primary_key, context->getSettingsRef().allow_suspicious_ttl_expressions); } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 0b822b9aab3d..04c26053dbab 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -439,6 +439,7 @@ bool StorageWindowView::optimize( bool cleanup, ContextPtr local_context) { + throwIfWindowViewIsDisabled(local_context); auto storage_ptr = getInnerTable(); auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context); @@ -449,6 +450,7 @@ void StorageWindowView::alter( ContextPtr local_context, AlterLockHolder &) { + throwIfWindowViewIsDisabled(local_context); auto table_id = getStorageID(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); @@ -508,8 +510,9 @@ void StorageWindowView::alter( startup(); } -void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /*local_context*/) const +void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const { + throwIfWindowViewIsDisabled(local_context); for (const auto & command : commands) { if (!command.isCommentAlter() && command.type != AlterCommand::MODIFY_QUERY) @@ -519,6 +522,7 @@ void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, Con std::pair StorageWindowView::getNewBlocks(UInt32 watermark) { + throwIfWindowViewIsDisabled(); UInt32 w_start = addTime(watermark, window_kind, -window_num_units, *time_zone); auto inner_table = getInnerTable(); @@ -654,6 +658,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) inline void StorageWindowView::fire(UInt32 watermark) { + throwIfWindowViewIsDisabled(); LOG_TRACE(log, "Watch streams number: {}, target table: {}", watch_streams.size(), target_table_id.empty() ? "None" : target_table_id.getNameForLogs()); @@ -722,6 +727,7 @@ inline void StorageWindowView::fire(UInt32 watermark) ASTPtr StorageWindowView::getSourceTableSelectQuery() { + throwIfWindowViewIsDisabled(); auto query = select_query->clone(); auto & modified_select = query->as(); @@ -947,6 +953,7 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) void StorageWindowView::addFireSignal(std::set & signals) { + throwIfWindowViewIsDisabled(); std::lock_guard lock(fire_signal_mutex); for (const auto & signal : signals) fire_signal.push_back(signal); @@ -962,6 +969,7 @@ void StorageWindowView::updateMaxTimestamp(UInt32 timestamp) void StorageWindowView::updateMaxWatermark(UInt32 watermark) { + throwIfWindowViewIsDisabled(); if (is_proctime) { max_watermark = watermark; @@ -1014,6 +1022,7 @@ void StorageWindowView::cleanup() void StorageWindowView::threadFuncCleanup() { + throwIfWindowViewIsDisabled(); if (shutdown_called) return; @@ -1033,6 +1042,7 @@ void StorageWindowView::threadFuncCleanup() void StorageWindowView::threadFuncFireProc() { + throwIfWindowViewIsDisabled(); if (shutdown_called) return; @@ -1069,6 +1079,7 @@ void StorageWindowView::threadFuncFireProc() void StorageWindowView::threadFuncFireEvent() { + throwIfWindowViewIsDisabled(); std::lock_guard lock(fire_signal_mutex); LOG_TRACE(log, "Fire events: {}", fire_signal.size()); @@ -1100,6 +1111,7 @@ void StorageWindowView::read( const size_t max_block_size, const size_t num_streams) { + throwIfWindowViewIsDisabled(local_context); if (target_table_id.empty()) return; @@ -1140,6 +1152,7 @@ Pipe StorageWindowView::watch( size_t /*max_block_size*/, const size_t /*num_streams*/) { + throwIfWindowViewIsDisabled(local_context); ASTWatchQuery & query = typeid_cast(*query_info.query); bool has_limit = false; @@ -1178,8 +1191,10 @@ StorageWindowView::StorageWindowView( , clean_interval_usec(context_->getSettingsRef().window_view_clean_interval.totalMicroseconds()) { if (context_->getSettingsRef().allow_experimental_analyzer) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Experimental WINDOW VIEW feature is not supported with new infrastructure for query analysis (the setting 'allow_experimental_analyzer')"); + disabled_due_to_analyzer = true; + + if (mode <= LoadingStrictnessLevel::CREATE) + throwIfWindowViewIsDisabled(); if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); @@ -1243,6 +1258,9 @@ StorageWindowView::StorageWindowView( } } + if (disabled_due_to_analyzer) + return; + clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); fire_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName(), [this] { is_proctime ? threadFuncFireProc() : threadFuncFireEvent(); }); @@ -1400,6 +1418,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, const Block & block, ContextPtr local_context) { + window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); @@ -1589,6 +1608,9 @@ void StorageWindowView::writeIntoWindowView( void StorageWindowView::startup() { + if (disabled_due_to_analyzer) + return; + DatabaseCatalog::instance().addViewDependency(select_table_id, getStorageID()); fire_task->activate(); @@ -1602,6 +1624,8 @@ void StorageWindowView::startup() void StorageWindowView::shutdown(bool) { shutdown_called = true; + if (disabled_due_to_analyzer) + return; fire_condition.notify_all(); @@ -1657,6 +1681,7 @@ Block StorageWindowView::getInputHeader() const const Block & StorageWindowView::getOutputHeader() const { + throwIfWindowViewIsDisabled(); std::lock_guard lock(sample_block_lock); if (!output_header) { @@ -1681,6 +1706,13 @@ StoragePtr StorageWindowView::getTargetTable() const return DatabaseCatalog::instance().getTable(target_table_id, getContext()); } +void StorageWindowView::throwIfWindowViewIsDisabled(ContextPtr local_context) const +{ + if (disabled_due_to_analyzer || (local_context && local_context->getSettingsRef().allow_experimental_analyzer)) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Experimental WINDOW VIEW feature is not supported " + "in the current infrastructure for query analysis (the setting 'allow_experimental_analyzer')"); +} + void registerStorageWindowView(StorageFactory & factory) { factory.registerStorage("WindowView", [](const StorageFactory::Arguments & args) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 0b7cd54e3a7f..f79867df424a 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -271,5 +271,9 @@ class StorageWindowView final : public IStorage, WithContext StoragePtr getSourceTable() const; StoragePtr getInnerTable() const; StoragePtr getTargetTable() const; + + bool disabled_due_to_analyzer = false; + + void throwIfWindowViewIsDisabled(ContextPtr local_context = nullptr) const; }; } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 5284f52a7e42..5bbdbe487b00 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -361,10 +361,14 @@ QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_contex { auto & in_function_subquery_node = in_function_node->getArguments().getNodes().at(1); auto in_function_node_type = in_function_subquery_node->getNodeType(); - if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION) + if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION && in_function_node_type != QueryTreeNodeType::TABLE) continue; - auto temporary_table_expression_node = executeSubqueryNode(in_function_subquery_node, + auto subquery_to_execute = in_function_subquery_node; + if (subquery_to_execute->as()) + subquery_to_execute = buildSubqueryToReadColumnsFromTableExpression(std::move(subquery_to_execute), planner_context->getQueryContext()); + + auto temporary_table_expression_node = executeSubqueryNode(subquery_to_execute, planner_context->getMutableQueryContext(), global_in_or_join_node.subquery_depth); diff --git a/src/Storages/checkAndGetLiteralArgument.cpp b/src/Storages/checkAndGetLiteralArgument.cpp index 5baf47fe91a9..39dc27cd5e81 100644 --- a/src/Storages/checkAndGetLiteralArgument.cpp +++ b/src/Storages/checkAndGetLiteralArgument.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -12,8 +13,14 @@ namespace ErrorCodes template T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name) { - if (arg && arg->as()) - return checkAndGetLiteralArgument(*arg->as(), arg_name); + if (arg) + { + if (const auto * func = arg->as(); func && func->name == "_CAST") + return checkAndGetLiteralArgument(func->arguments->children.at(0), arg_name); + + if (arg->as()) + return checkAndGetLiteralArgument(*arg->as(), arg_name); + } throw Exception( ErrorCodes::BAD_ARGUMENTS, diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index 8607597fa678..df2835dd6307 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -136,7 +136,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { - Block sample_block = getInterpreter(context).getSampleBlock(query->as()->getKind()); + Block sample_block = getInterpreter(context).getSampleBlock(query->as()->getKind()); /// NOLINT(readability-static-accessed-through-instance) ColumnsDescription columns_description; for (const auto & column : sample_block.getColumnsWithTypeAndName()) columns_description.add(ColumnDescription(column.name, column.type)); diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp index af2845949870..157725620794 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -88,7 +89,11 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co // All the arguments must be literals. for (const auto & arg : args) { - if (!arg->as()) + const IAST * arg_raw = arg.get(); + if (const auto * func = arg_raw->as(); func && func->name == "_CAST") + arg_raw = func->arguments->children.at(0).get(); + + if (!arg_raw->as()) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "All arguments of table function '{}' except structure argument must be literals. " @@ -107,7 +112,11 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co if (args.size() >= arg_index + 1) { - const auto & literal = args[arg_index]->as(); + const IAST * arg_raw = args[arg_index].get(); + if (const auto * func = arg_raw->as(); func && func->name == "_CAST") + arg_raw = func->arguments->children.at(0).get(); + + const auto & literal = arg_raw->as(); ++arg_index; if (!literal.value.isNull()) random_seed = checkAndGetLiteralArgument(literal, "random_seed"); diff --git a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp index 435ed4bdf0d6..06a48f0e25f7 100644 --- a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp +++ b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index 2989eb5fbef0..16f56eab9812 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int BAD_ARGUMENTS; } namespace @@ -78,6 +79,9 @@ StoragePtr TableFunctionNumbers::executeImpl( UInt64 length = arguments.size() >= 2 ? evaluateArgument(context, arguments[1]) : evaluateArgument(context, arguments[0]); UInt64 step = arguments.size() == 3 ? evaluateArgument(context, arguments[2]) : 1; + if (!step) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function {} requires step to be a positive number", getName()); + auto res = std::make_shared( StorageID(getDatabaseName(), table_name), multithreaded, std::string{"number"}, length, offset, step); res->startup(); diff --git a/tests/README.md b/tests/README.md index a1fc0f530f2e..7df5da8d0784 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1 +1 @@ -Find CI documents and instructions on running CI checks localy [here](https://clickhouse.com/docs/en/development/continuous-integration). \ No newline at end of file +Find CI documents and instructions on running CI checks locally [here](https://clickhouse.com/docs/en/development/continuous-integration). diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 36e9b1838052..8434355ce465 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -318,7 +318,7 @@ def fetch_records_data(self): self.update() if self.cache_data_fetched: - # there are no record w/o underling data - no need to fetch + # there are no records without fetched data - no need to fetch return self # clean up @@ -773,6 +773,7 @@ def create_from_pr_message( not pr_info.is_pr() and not debug_message ): # if commit_message is provided it's test/debug scenario - do not return # CI options can be configured in PRs only + # if debug_message is provided - it's a test return res message = debug_message or GitRunner(set_cwd_to_git_root=True).run( f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1" @@ -790,9 +791,9 @@ def create_from_pr_message( print(f"CI tags from PR body: [{matches_pr}]") matches = list(set(matches + matches_pr)) - if "do not test" in pr_info.labels: - # do_not_test could be set in GH labels - res.do_not_test = True + if "do not test" in pr_info.labels: + # do_not_test could be set in GH labels + res.do_not_test = True for match in matches: if match.startswith("job_"): @@ -1756,6 +1757,32 @@ def _upload_build_profile_data( logging.error("Failed to insert binary_size_file for the build, continue") +def _add_build_to_version_history( + pr_info: PRInfo, + job_report: JobReport, + version: str, + docker_tag: str, + ch_helper: ClickHouseHelper, +) -> None: + # with some probability we will not silently break this logic + assert pr_info.sha and pr_info.commit_html_url and pr_info.head_ref and version + + data = { + "check_start_time": job_report.start_time, + "pull_request_number": pr_info.number, + "pull_request_url": pr_info.pr_html_url, + "commit_sha": pr_info.sha, + "commit_url": pr_info.commit_html_url, + "version": version, + "docker_tag": docker_tag, + "git_ref": pr_info.head_ref, + } + + print(f"::notice ::Log Adding record to versions history: {data}") + + ch_helper.insert_event_into(db="default", table="version_history", event=data) + + def _run_test(job_name: str, run_command: str) -> int: assert ( run_command or CI_CONFIG.get_job_config(job_name).run_command @@ -2113,6 +2140,15 @@ def main() -> int: ch_helper.insert_events_into( db="default", table="checks", events=prepared_events ) + + if "DockerServerImage" in args.job_name and indata is not None: + _add_build_to_version_history( + pr_info, + job_report, + indata["version"], + indata["build"], + ch_helper, + ) else: # no job report print(f"No job report for {[args.job_name]} - do nothing") diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index bda2db139919..56728c3d3ba7 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -148,6 +148,11 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: """It adds or updates the comment status to all Pull Requests but for release one, so the method does nothing for simple pushes and pull requests with `release`/`release-lts` labels""" + + if pr_info.is_merge_queue(): + # skip report creation for the MQ + return + # to reduce number of parameters, the Github is constructed on the fly gh = Github() gh.__requester = commit._requester # type:ignore #pylint:disable=protected-access @@ -441,7 +446,9 @@ def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> or pr_info.release_pr or pr_info.number == 0 ) - if not_run: + + # FIXME: For now, always set mergeable check in the Merge Queue. It's required to pass MQ + if not_run and not pr_info.is_merge_queue(): # Let's avoid unnecessary work return diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index f50124500cc5..90e2b08386fc 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -342,6 +342,8 @@ def _install_clickhouse(self, debs_path): "clickhouse-common-static_", "clickhouse-server_", "clickhouse-client", + "clickhouse-odbc-bridge_", + "clickhouse-library-bridge_", "clickhouse-common-static-dbg_", ): # order matters logging.info("Installing package %s", package) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index ddf59c49e1f2..293004fc4f3a 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -26,6 +26,7 @@ DIFF_IN_DOCUMENTATION_EXT = [ ".html", ".md", + ".mdx", ".yml", ".txt", ".css", @@ -198,7 +199,6 @@ def __init__( EventType.MERGE_QUEUE in github_event ): # pull request and other similar events self.event_type = EventType.MERGE_QUEUE - # FIXME: need pr? we can parse it from ["head_ref": "refs/heads/gh-readonly-queue/test-merge-queue/pr-6751-4690229995a155e771c52e95fbd446d219c069bf"] self.number = 0 self.sha = github_event[EventType.MERGE_QUEUE]["head_sha"] self.base_ref = github_event[EventType.MERGE_QUEUE]["base_ref"] @@ -207,6 +207,8 @@ def __init__( self.base_name = github_event["repository"]["full_name"] # any_branch-name - the name of working branch name self.head_ref = github_event[EventType.MERGE_QUEUE]["head_ref"] + # parse underlying pr from ["head_ref": "refs/heads/gh-readonly-queue/test-merge-queue/pr-6751-4690229995a155e771c52e95fbd446d219c069bf"] + self.merged_pr = int(self.head_ref.split("/pr-")[-1].split("-")[0]) # UserName/ClickHouse or ClickHouse/ClickHouse self.head_name = self.base_name self.user_login = github_event["sender"]["login"] @@ -234,6 +236,8 @@ def __init__( if pull_request is None or pull_request["state"] == "closed": # it's merged PR to master self.number = 0 + if pull_request: + self.merged_pr = pull_request["number"] self.labels = set() self.pr_html_url = f"{repo_prefix}/commits/{ref}" self.base_ref = ref diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 6187656983e8..435a5f726f25 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -201,14 +201,17 @@ def main(): ci_report_url = create_ci_report(pr_info, []) print("::notice ::Can run") - post_commit_status( - commit, - PENDING, - ci_report_url, - description, - CI_STATUS_NAME, - pr_info, - ) + + if not pr_info.is_merge_queue(): + # we need clean CI status for MQ to merge (no pending statuses) + post_commit_status( + commit, + PENDING, + ci_report_url, + description, + CI_STATUS_NAME, + pr_info, + ) if __name__ == "__main__": diff --git a/tests/ci/stress.py b/tests/ci/stress.py index e0601b86f00b..b1f5a28ec9ec 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -71,6 +71,11 @@ def get_options(i: int, upgrade_check: bool) -> str: if random.random() < 0.3: client_options.append(f"http_make_head_request={random.randint(0, 1)}") + # TODO: After release 24.3 use ignore_drop_queries_probability for both + # stress test and upgrade check + if not upgrade_check: + client_options.append("ignore_drop_queries_probability=0.5") + if client_options: options.append(" --client-option " + " ".join(client_options)) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 373fa7b316f1..4580f0076065 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -131,6 +131,11 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) pr_info = PRInfo() + + if pr_info.is_merge_queue() and args.push: + print("Auto style fix will be disabled for Merge Queue workflow") + args.push = False + run_cpp_check = True run_shell_check = True run_python_check = True diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py new file mode 100644 index 000000000000..f33f6122f309 --- /dev/null +++ b/tests/ci/sync_pr.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +"""Script for automatic sync PRs handling in private repos""" + +import sys + +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from github_helper import GitHub + + +def main(): + gh = GitHub(get_best_robot_token()) + + pr_info = PRInfo() + assert pr_info.merged_pr, "BUG. merged PR number could not been determined" + + prs = gh.get_pulls_from_search( + query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr", + repo="ClickHouse/clickhouse-private", + ) + if len(prs) > 1: + print(f"WARNING: More than one PR found [{prs}] - exiting") + sys.exit(0) + if len(prs) == 0: + print("WARNING: No Sync PR found") + sys.exit(0) + + pr = prs[0] + + if pr.state == "closed": + print(f"Sync PR [{pr.number}] already closed - exiting") + sys.exit(0) + + if pr.state != "open": + print(f"WARNING: Unknown Sync PR [{pr.number}] state [{pr.state}] - exiting") + sys.exit(0) + + print(f"Trying to merge Sync PR [{pr.number}]") + if pr.draft: + gh.toggle_pr_draft(pr) + pr.merge() + + +if __name__ == "__main__": + main() diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 30b0c2d96be2..f649732171fc 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -357,8 +357,9 @@ def update_contributors( # format: " 1016 Alexey Arno" shortlog = git_runner.run("git shortlog HEAD --summary") + escaping = str.maketrans({"\\": "\\\\", '"': '\\"'}) contributors = sorted( - [c.split(maxsplit=1)[-1].replace('"', r"\"") for c in shortlog.split("\n")], + [c.split(maxsplit=1)[-1].translate(escaping) for c in shortlog.split("\n")], ) contributors = [f' "{c}",' for c in contributors] diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index 281dff5b1c27..effc224c2d5d 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -9,7 +9,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.313.0 +export RUNNER_VERSION=2.315.0 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 624512058bcf..b2077f6179c3 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -3175,6 +3175,8 @@ def parse_args(): help="Do not run shard related tests", ) + # TODO: Remove upgrade-check option after release 24.3 and use + # ignore_drop_queries_probability option in stress.py as in stress tests group.add_argument( "--upgrade-check", action="store_true", diff --git a/tests/config/config.d/serverwide_trace_collector.xml b/tests/config/config.d/serverwide_trace_collector.xml new file mode 100644 index 000000000000..602e07469f3e --- /dev/null +++ b/tests/config/config.d/serverwide_trace_collector.xml @@ -0,0 +1,4 @@ + + 1000000000 + 1000000000 + diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 00d8cb3aea56..d40854247cd0 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -19,7 +19,7 @@ cache s3_disk s3_cache/ - 64Mi + 104857600 1 100 LRU diff --git a/tests/config/install.sh b/tests/config/install.sh index 652d25a0a35b..06f2f5fe902b 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -67,6 +67,7 @@ ln -sf $SRC_PATH/config.d/validate_tcp_client_information.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/config.d/zero_copy_destructive_operations.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/block_number.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/handlers.yaml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/serverwide_trace_collector.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index a7c7b4395604..55c2969d8d39 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -66,11 +66,11 @@ def cluster(): def azure_query( - node, query, expect_error="false", try_num=10, settings={}, query_on_retry=None + node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None ): for i in range(try_num): try: - if expect_error == "true": + if expect_error: return node.query_and_get_error(query, settings=settings) else: return node.query(query, settings=settings) diff --git a/tests/integration/test_backup_restore_on_cluster/configs/remote_servers.xml b/tests/integration/test_backup_restore_on_cluster/configs/cluster.xml similarity index 56% rename from tests/integration/test_backup_restore_on_cluster/configs/remote_servers.xml rename to tests/integration/test_backup_restore_on_cluster/configs/cluster.xml index c3bb226c1f40..1f7cb8155eb3 100644 --- a/tests/integration/test_backup_restore_on_cluster/configs/remote_servers.xml +++ b/tests/integration/test_backup_restore_on_cluster/configs/cluster.xml @@ -20,21 +20,5 @@ - - - - node1 - 9000 - - - node2 - 9000 - - - node3 - 9000 - - - diff --git a/tests/integration/test_backup_restore_on_cluster/configs/cluster3.xml b/tests/integration/test_backup_restore_on_cluster/configs/cluster3.xml new file mode 100644 index 000000000000..a591f22447ec --- /dev/null +++ b/tests/integration/test_backup_restore_on_cluster/configs/cluster3.xml @@ -0,0 +1,20 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + + + diff --git a/tests/integration/test_backup_restore_on_cluster/configs/slow_replicated_merge_tree.xml b/tests/integration/test_backup_restore_on_cluster/configs/slow_replicated_merge_tree.xml new file mode 100644 index 000000000000..c6bc1e318024 --- /dev/null +++ b/tests/integration/test_backup_restore_on_cluster/configs/slow_replicated_merge_tree.xml @@ -0,0 +1,10 @@ + + + + 250 + + + 250 + + + diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 2c60b096428d..0f0821d229c5 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -9,7 +9,8 @@ cluster = ClickHouseCluster(__file__) main_configs = [ - "configs/remote_servers.xml", + "configs/cluster.xml", + "configs/cluster3.xml", "configs/replicated_access_storage.xml", "configs/replicated_user_defined_sql_objects.xml", "configs/backups_disk.xml", diff --git a/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py b/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py new file mode 100644 index 000000000000..15c344eadf85 --- /dev/null +++ b/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py @@ -0,0 +1,119 @@ +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV, assert_eq_with_retry, exec_query_with_retry + + +cluster = ClickHouseCluster(__file__) + +main_configs = [ + "configs/backups_disk.xml", + "configs/cluster.xml", + "configs/slow_replicated_merge_tree.xml", +] + +user_configs = [ + "configs/allow_database_types.xml", + "configs/zookeeper_retries.xml", +] + +node1 = cluster.add_instance( + "node1", + main_configs=main_configs, + user_configs=user_configs, + external_dirs=["/backups/"], + macros={"replica": "node1", "shard": "shard1"}, + with_zookeeper=True, +) + +node2 = cluster.add_instance( + "node2", + main_configs=main_configs, + user_configs=user_configs, + external_dirs=["/backups/"], + macros={"replica": "node2", "shard": "shard1"}, + with_zookeeper=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def drop_after_test(): + try: + yield + finally: + node1.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' SYNC") + + +backup_id_counter = 0 + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"Disk('backups', '{backup_id_counter}')" + + +def test_replicated_database_async(): + node1.query( + "CREATE DATABASE mydb ON CLUSTER 'cluster' ENGINE=Replicated('/clickhouse/path/','{shard}','{replica}')" + ) + + node1.query("CREATE TABLE mydb.tbl(x UInt8) ENGINE=ReplicatedMergeTree ORDER BY x") + + node1.query( + "CREATE TABLE mydb.tbl2(y String) ENGINE=ReplicatedMergeTree ORDER BY y" + ) + + node2.query("SYSTEM SYNC DATABASE REPLICA mydb") + + node1.query("INSERT INTO mydb.tbl VALUES (1)") + node1.query("INSERT INTO mydb.tbl VALUES (22)") + node2.query("INSERT INTO mydb.tbl2 VALUES ('a')") + node2.query("INSERT INTO mydb.tbl2 VALUES ('bb')") + node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl") + + backup_name = new_backup_name() + id, status = node1.query( + f"BACKUP DATABASE mydb ON CLUSTER 'cluster' TO {backup_name} ASYNC" + ).split("\t") + + assert status == "CREATING_BACKUP\n" or status == "BACKUP_CREATED\n" + + assert_eq_with_retry( + node1, + f"SELECT status, error FROM system.backups WHERE id='{id}'", + TSV([["BACKUP_CREATED", ""]]), + ) + + node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' SYNC") + + id, status = node1.query( + f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} ASYNC" + ).split("\t") + + assert status == "RESTORING\n" or status == "RESTORED\n" + + assert_eq_with_retry( + node1, + f"SELECT status, error FROM system.backups WHERE id='{id}'", + TSV([["RESTORED", ""]]), + ) + + # exec_query_with_retry() is here because `SYSTEM SYNC REPLICA` can throw `TABLE_IS_READ_ONLY` + # if any of these tables didn't start completely yet. + exec_query_with_retry(node1, "SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl") + exec_query_with_retry(node1, "SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl2") + + assert node1.query("SELECT * FROM mydb.tbl ORDER BY x") == TSV([1, 22]) + assert node2.query("SELECT * FROM mydb.tbl2 ORDER BY y") == TSV(["a", "bb"]) + assert node2.query("SELECT * FROM mydb.tbl ORDER BY x") == TSV([1, 22]) + assert node1.query("SELECT * FROM mydb.tbl2 ORDER BY y") == TSV(["a", "bb"]) diff --git a/tests/integration/test_backup_restore_s3/configs/remote_servers.xml b/tests/integration/test_backup_restore_s3/configs/remote_servers.xml new file mode 100644 index 000000000000..9607aac20031 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/remote_servers.xml @@ -0,0 +1,12 @@ + + + + + + node + 9000 + + + + + diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index d65fc1f09d6b..05424887736e 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterable +from typing import Dict import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -13,11 +13,13 @@ "configs/named_collection_s3_backups.xml", "configs/s3_settings.xml", "configs/blob_log.xml", + "configs/remote_servers.xml", ], user_configs=[ "configs/zookeeper_retries.xml", ], with_minio=True, + with_zookeeper=True, ) @@ -544,9 +546,45 @@ def create_user(user): "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", user="regularuser", ) + node.query( "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", user="superuser1", ) + assert "Access Denied" in node.query_and_get_error( + "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="regularuser", + ) + + node.query( + "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="superuser1", + ) + + assert "Access Denied" in node.query_and_get_error( + "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="regularuser", + ) + + node.query( + "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="superuser1", + ) + + assert "Access Denied" in node.query_and_get_error( + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')", + user="regularuser", + ) + + node.query( + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')", + user="superuser1", + ) + + assert "Access Denied" in node.query_and_get_error( + "SELECT * FROM s3Cluster(cluster, 'http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')", + user="regularuser", + ) + node.query("DROP TABLE IF EXISTS test.specific_auth") diff --git a/tests/integration/test_cluster_all_replicas/test.py b/tests/integration/test_cluster_all_replicas/test.py index eb406de6a8d0..d8bad180e1b4 100644 --- a/tests/integration/test_cluster_all_replicas/test.py +++ b/tests/integration/test_cluster_all_replicas/test.py @@ -42,6 +42,17 @@ def test_cluster(start_cluster): ) +def test_global_in(start_cluster): + node1.query("DROP TABLE IF EXISTS u;") + node1.query("CREATE TABLE u(uid Int16) ENGINE=Memory as select 0") + + assert set( + node1.query( + """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) where dummy GLOBAL IN u""" + ).splitlines() + ) == {"node1\t0", "node2\t0"} + + @pytest.mark.parametrize( "cluster", [ diff --git a/tests/integration/test_distributed_config/test.py b/tests/integration/test_distributed_config/test.py index bf4bb5a4335c..e551e69b93f4 100644 --- a/tests/integration/test_distributed_config/test.py +++ b/tests/integration/test_distributed_config/test.py @@ -31,7 +31,7 @@ def test_distibuted_settings(start_cluster): DETACH TABLE dist_1; """ ) - assert "flush_on_detach = 1" in node.query("SHOW CREATE dist_1") + assert "flush_on_detach = true" in node.query("SHOW CREATE dist_1") # flush_on_detach=true, so data_1 should have 1 row assert int(node.query("SELECT count() FROM data_1")) == 1 diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index cffab672bd1e..7f77627e7935 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -714,7 +714,7 @@ def test_endpoint_error_check(cluster): """ expected_err_msg = "Expected container_name in endpoint" - assert expected_err_msg in azure_query(node, query, expect_error="true") + assert expected_err_msg in azure_query(node, query, expect_error=True) query = f""" DROP TABLE IF EXISTS test SYNC; @@ -731,7 +731,7 @@ def test_endpoint_error_check(cluster): """ expected_err_msg = "Expected account_name in endpoint" - assert expected_err_msg in azure_query(node, query, expect_error="true") + assert expected_err_msg in azure_query(node, query, expect_error=True) query = f""" DROP TABLE IF EXISTS test SYNC; @@ -748,4 +748,76 @@ def test_endpoint_error_check(cluster): """ expected_err_msg = "Expected container_name in endpoint" - assert expected_err_msg in azure_query(node, query, expect_error="true") + assert expected_err_msg in azure_query(node, query, expect_error=True) + + +def get_azure_client(container_name, port): + connection_string = ( + f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" + f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" + f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;" + ) + + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + return blob_service_client.get_container_client(container_name) + + +def test_azure_broken_parts(cluster): + node = cluster.instances[NODE_NAME] + account_name = "devstoreaccount1" + container_name = "cont5" + port = cluster.azurite_port + + query = f""" + DROP TABLE IF EXISTS t_azure_broken_parts SYNC; + + CREATE TABLE t_azure_broken_parts (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk( + type = azure_blob_storage, + endpoint = 'http://azurite1:{port}/{account_name}/{container_name}', + endpoint_contains_account_name = 'true', + account_name = 'devstoreaccount1', + account_key = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', + skip_access_check = 0), min_bytes_for_wide_part = 0, min_bytes_for_full_part_storage = 0; + + INSERT INTO t_azure_broken_parts VALUES (1); + """ + + azure_query(node, query) + + result = azure_query(node, "SELECT count() FROM t_azure_broken_parts").strip() + assert int(result) == 1 + + result = azure_query( + node, + "SELECT count() FROM system.detached_parts WHERE table = 't_azure_broken_parts'", + ).strip() + + assert int(result) == 0 + + data_path = azure_query( + node, + "SELECT data_paths[1] FROM system.tables WHERE name = 't_azure_broken_parts'", + ).strip() + + remote_path = azure_query( + node, + f"SELECT remote_path FROM system.remote_data_paths WHERE path || local_path = '{data_path}' || 'all_1_1_0/columns.txt'", + ).strip() + + client = get_azure_client(container_name, port) + client.delete_blob(remote_path) + + azure_query(node, "DETACH TABLE t_azure_broken_parts") + azure_query(node, "ATTACH TABLE t_azure_broken_parts") + + result = azure_query(node, "SELECT count() FROM t_azure_broken_parts").strip() + assert int(result) == 0 + + result = azure_query( + node, + "SELECT count() FROM system.detached_parts WHERE table = 't_azure_broken_parts'", + ).strip() + + assert int(result) == 1 diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 7d30265e4f86..aabc93406581 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -36,11 +36,11 @@ def cluster(): def azure_query( - node, query, expect_error="false", try_num=10, settings={}, query_on_retry=None + node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None ): for i in range(try_num): try: - if expect_error == "true": + if expect_error: return node.query_and_get_error(query, settings=settings) else: return node.query(query, settings=settings) @@ -793,7 +793,7 @@ def test_read_from_not_existing_container(cluster): f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" ) expected_err_msg = "container does not exist" - assert expected_err_msg in azure_query(node, query, expect_error="true") + assert expected_err_msg in azure_query(node, query, expect_error=True) def test_function_signatures(cluster): @@ -966,7 +966,7 @@ def test_union_schema_inference_mode(cluster): error = azure_query( node, f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference*.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') settings schema_inference_mode='union', describe_compact_output=1 format TSV", - expect_error="true", + expect_error=True, ) assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in error diff --git a/tests/integration/test_trace_collector_serverwide/__init__.py b/tests/integration/test_trace_collector_serverwide/__init__.py new file mode 100644 index 000000000000..e5a0d9b4834e --- /dev/null +++ b/tests/integration/test_trace_collector_serverwide/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_trace_collector_serverwide/configs/global_profiler.xml b/tests/integration/test_trace_collector_serverwide/configs/global_profiler.xml new file mode 100644 index 000000000000..5112d2671825 --- /dev/null +++ b/tests/integration/test_trace_collector_serverwide/configs/global_profiler.xml @@ -0,0 +1,4 @@ + + 10000000 + 10000000 + diff --git a/tests/integration/test_trace_collector_serverwide/test.py b/tests/integration/test_trace_collector_serverwide/test.py new file mode 100644 index 000000000000..9bd107ac3659 --- /dev/null +++ b/tests/integration/test_trace_collector_serverwide/test.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import pytest +import time + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance("node1", main_configs=["configs/global_profiler.xml"]) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def test_global_thread_profiler(start_cluster): + if node1.is_built_with_sanitizer(): + return + + node1.query( + "CREATE TABLE t (key UInt32, value String) Engine = MergeTree() ORDER BY key" + ) + + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + + time.sleep(5) + + node1.query("SYSTEM FLUSH LOGS") + + assert ( + int( + node1.query( + "SELECT count() FROM system.trace_log where trace_type='Real' and query_id = ''" + ).strip() + ) + > 0 + ) diff --git a/tests/performance/join_filter_pushdown_equivalent_sets.xml b/tests/performance/join_filter_pushdown_equivalent_sets.xml new file mode 100644 index 000000000000..caddcb295c96 --- /dev/null +++ b/tests/performance/join_filter_pushdown_equivalent_sets.xml @@ -0,0 +1,16 @@ + + CREATE TABLE test_table_1(id UInt64, value String) ENGINE=MergeTree ORDER BY id + CREATE TABLE test_table_2(id UInt64, value String) ENGINE=MergeTree ORDER BY id + + INSERT INTO test_table_1 SELECT number, number FROM numbers(5000000) + INSERT INTO test_table_2 SELECT number, number FROM numbers(5000000) + + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE rhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 AND rhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE rhs.id = 5 FORMAT Null + + DROP TABLE test_table_1 + DROP TABLE test_table_2 + diff --git a/tests/performance/scripts/README.md b/tests/performance/scripts/README.md index 0a0580c62a00..1a15189fe861 100644 --- a/tests/performance/scripts/README.md +++ b/tests/performance/scripts/README.md @@ -130,7 +130,7 @@ More stages are available, e.g. restart servers or run the tests. See the code. #### Run a single test on the already configured servers ``` -docker/test/performance-comparison/perf.py --host=localhost --port=9000 --runs=1 tests/performance/logical_functions_small.xml +tests/performance/scripts/perf.py --host=localhost --port=9000 --runs=1 tests/performance/logical_functions_small.xml ``` #### Run all tests on some custom configuration diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index 43b15ded93d3..078348cd20ff 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -286,3 +286,9 @@ v --show error: type should be const string --show error: index type should be integer --show error: key of map type should be String +\N +\N +Hello +Hello +Hello +Hello diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index e19dd17670e4..3d30ce841bac 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -326,3 +326,9 @@ SELECT JSONExtract('[]', JSONExtract('0', 'UInt256'), 'UInt256'); -- { serverErr SELECT '--show error: key of map type should be String'; SELECT JSONExtract('{"a": [100.0, 200], "b": [-100, 200.0, 300]}', 'Map(Int64, Array(Float64))'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":null}')), materialize('string_value'), 'LowCardinality(Nullable(String))'); +SELECT JSONExtract(materialize('{"string_value":null}'), materialize('string_value'), 'LowCardinality(Nullable(String))'); +SELECT JSONExtract(materialize('{"string_value":"Hello"}'), materialize('string_value'), 'LowCardinality(Nullable(String))') AS x; +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":"Hello"}')), materialize('string_value'), 'LowCardinality(Nullable(String))') AS x; +SELECT JSONExtract(materialize('{"string_value":"Hello"}'), materialize(toLowCardinality('string_value')), 'LowCardinality(Nullable(String))') AS x; +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":"Hello"}')), materialize(toLowCardinality('string_value')), 'LowCardinality(Nullable(String))') AS x; diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index 4c3e3ead4554..2db14fcdddf3 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -28,6 +28,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01056_window_view_proc_hop_watch") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index 9adff06442ed..2323ee5c8381 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -26,6 +26,8 @@ client1.expect(prompt) client1.send("SET window_view_heartbeat_interval = 1") client1.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS db_01059_event_hop_watch_strict_asc") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index bb40b1df2f07..db9e8cef6c55 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -28,6 +28,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01062_window_view_event_hop_watch_asc") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index 7f00130b184e..b8d5ff02d379 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -27,6 +27,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send( "CREATE DATABASE IF NOT EXISTS 01065_window_view_event_hop_watch_bounded" diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py index eb31b2ccbcf1..21c2e831afc5 100755 --- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py +++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py @@ -28,6 +28,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE 01069_window_view_proc_tumble_watch") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01070_window_view_watch_events.py b/tests/queries/0_stateless/01070_window_view_watch_events.py index 8aeff041cc1a..1cf7678a014b 100755 --- a/tests/queries/0_stateless/01070_window_view_watch_events.py +++ b/tests/queries/0_stateless/01070_window_view_watch_events.py @@ -28,6 +28,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01070_window_view_watch_events") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index c32e508c5a55..3f3dfe0cda8b 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -28,10 +28,14 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client3.send("SET allow_experimental_window_view = 1") client3.expect(prompt) client3.send("SET window_view_heartbeat_interval = 1") client3.expect(prompt) + client3.send("SET allow_experimental_analyzer = 0") + client3.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01078_window_view_alter_query_watch") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py index 12c8d2955918..9938ebcab984 100755 --- a/tests/queries/0_stateless/01082_window_view_watch_limit.py +++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py @@ -27,6 +27,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01082_window_view_watch_limit") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01087_table_function_generate.sql b/tests/queries/0_stateless/01087_table_function_generate.sql index 10657dbd63ac..ff7c3f3477db 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.sql +++ b/tests/queries/0_stateless/01087_table_function_generate.sql @@ -195,3 +195,5 @@ SELECT a, b, c, d, e, f, g, hex(h) FROM test_table_2 ORDER BY a, b, c, d, e, f, SELECT '-'; DROP TABLE IF EXISTS test_table_2; + +select * from generateRandom('x UInt64', Null, 10, 2) limit 2 format Null; diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql index c63c651fb0b8..d2d32e24eaab 100644 --- a/tests/queries/0_stateless/01592_long_window_functions1.sql +++ b/tests/queries/0_stateless/01592_long_window_functions1.sql @@ -1,5 +1,8 @@ -- Tags: long +-- test became more than an order of magnitude slower with max_bytes_before_external_sort=1 +set max_bytes_before_external_sort = 0; + drop table if exists stack; set max_insert_threads = 4; diff --git a/tests/queries/0_stateless/01601_accurate_cast.reference b/tests/queries/0_stateless/01601_accurate_cast.reference index dbf9666f4cd8..82138e6354a5 100644 --- a/tests/queries/0_stateless/01601_accurate_cast.reference +++ b/tests/queries/0_stateless/01601_accurate_cast.reference @@ -10,7 +10,6 @@ 1970-01-01 00:00:19 2023-05-30 1970-01-20 -\N true false true diff --git a/tests/queries/0_stateless/01601_accurate_cast.sql b/tests/queries/0_stateless/01601_accurate_cast.sql index d2ecede24023..471e4e34a4af 100644 --- a/tests/queries/0_stateless/01601_accurate_cast.sql +++ b/tests/queries/0_stateless/01601_accurate_cast.sql @@ -35,7 +35,7 @@ SELECT accurateCast('1xxx', 'Date'); -- { serverError CANNOT_PARSE_DATE } SELECT accurateCast('2023-05-30', 'Date'); SELECT accurateCast(19, 'Date'); -select accurateCast('test', 'Nullable(Bool)'); +select accurateCast('test', 'Nullable(Bool)'); -- { serverError CANNOT_PARSE_BOOL } select accurateCast('test', 'Bool'); -- { serverError CANNOT_PARSE_BOOL } select accurateCast('truex', 'Bool'); -- { serverError CANNOT_PARSE_BOOL } select accurateCast('xfalse', 'Bool'); -- { serverError CANNOT_PARSE_BOOL } diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 436d06c50768..1b9755a74d5d 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -180,12 +180,14 @@ Filter column: notEquals(__table1.number, 1_UInt8) > one condition of filter is pushed down before INNER JOIN Join Join -Filter column: notEquals(number, 1) +Filter column: and(notEquals(number, 1), notEquals(number, 2)) Join +Filter column: and(notEquals(b, 2), notEquals(b, 1)) > (analyzer) one condition of filter is pushed down before INNER JOIN Join Join -Filter column: notEquals(__table1.number, 1_UInt8) +Filter column: and(notEquals(__table1.number, 1_UInt8), notEquals(__table1.number, 2_UInt8)) +Filter column: and(notEquals(__table2.b, 2_UInt8), notEquals(__table2.b, 1_UInt8)) 3 3 > filter is pushed down before UNION Union diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index 5a5172642439..864dd69412a7 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -248,14 +248,14 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 -q " select number as a, r.b from numbers(4) as l any inner join ( select number + 2 as b from numbers(3) ) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" | - grep -o "Join\|Filter column: notEquals(number, 1)" + grep -o "Join\|Filter column: and(notEquals(number, 1), notEquals(number, 2))\|Filter column: and(notEquals(b, 2), notEquals(b, 1))" echo "> (analyzer) one condition of filter is pushed down before INNER JOIN" $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q " explain actions = 1 select number as a, r.b from numbers(4) as l any inner join ( select number + 2 as b from numbers(3) ) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" | - grep -o "Join\|Filter column: notEquals(__table1.number, 1_UInt8)" + grep -o "Join\|Filter column: and(notEquals(__table1.number, 1_UInt8), notEquals(__table1.number, 2_UInt8))\|Filter column: and(notEquals(__table2.b, 2_UInt8), notEquals(__table2.b, 1_UInt8))" $CLICKHOUSE_CLIENT -q " select number as a, r.b from numbers(4) as l any inner join ( select number + 2 as b from numbers(3) diff --git a/tests/queries/0_stateless/02263_format_insert_settings.reference b/tests/queries/0_stateless/02263_format_insert_settings.reference index 2bba75f6788b..ea8b78faf8c2 100644 --- a/tests/queries/0_stateless/02263_format_insert_settings.reference +++ b/tests/queries/0_stateless/02263_format_insert_settings.reference @@ -21,10 +21,6 @@ INSERT INTO foo FORMAT Values INSERT INTO foo SELECT 1 [oneline] insert into foo select 1 INSERT INTO foo SELECT 1 -[multi] insert into foo watch bar -INSERT INTO foo WATCH bar -[oneline] insert into foo watch bar -INSERT INTO foo WATCH bar [multi] insert into foo format tsv INSERT INTO foo FORMAT tsv [oneline] insert into foo format tsv @@ -41,12 +37,6 @@ SETTINGS max_threads = 1 SELECT 1 [oneline] insert into foo settings max_threads=1 select 1 INSERT INTO foo SETTINGS max_threads = 1 SELECT 1 -[multi] insert into foo settings max_threads=1 watch bar -INSERT INTO foo -SETTINGS max_threads = 1 -WATCH bar -[oneline] insert into foo settings max_threads=1 watch bar -INSERT INTO foo SETTINGS max_threads = 1 WATCH bar [multi] insert into foo settings max_threads=1 format tsv INSERT INTO foo SETTINGS max_threads = 1 diff --git a/tests/queries/0_stateless/02263_format_insert_settings.sh b/tests/queries/0_stateless/02263_format_insert_settings.sh index 49aa56d6c0a2..808ab23ee59b 100755 --- a/tests/queries/0_stateless/02263_format_insert_settings.sh +++ b/tests/queries/0_stateless/02263_format_insert_settings.sh @@ -40,12 +40,10 @@ $CLICKHOUSE_CLIENT -q 'drop table data_02263' run_format_both 'insert into foo values' run_format_both 'insert into foo select 1' -run_format_both 'insert into foo watch bar' run_format_both 'insert into foo format tsv' run_format_both 'insert into foo settings max_threads=1 values' run_format_both 'insert into foo settings max_threads=1 select 1' -run_format_both 'insert into foo settings max_threads=1 watch bar' run_format_both 'insert into foo settings max_threads=1 format tsv' run_format_both 'insert into foo select 1 settings max_threads=1' run_format_both 'insert into foo settings max_threads=1 select 1 settings max_threads=1' diff --git a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference index 717484d46704..a257755481e7 100644 --- a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference +++ b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference @@ -39,7 +39,3 @@ fuzzer issue \N \N \N -\N -\N -\N -\N diff --git a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql index b56ebc2b09dc..570fbcde01fa 100644 --- a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql +++ b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql @@ -24,7 +24,7 @@ select toIPv6OrNull(number % 2 ? '' : NULL) from numbers(2); select IPv6StringToNum(number % 2 ? '0000:0000:0000:0000:0000:0000:0000:0000' : NULL) from numbers(2); select 'fuzzer issue'; -SELECT CAST(if(number % 2, 'truetrue', NULL), 'Nullable(Bool)') FROM numbers(2); -SELECT CAST(if(number % 2, 'falsefalse', NULL), 'Nullable(Bool)') FROM numbers(2); +SELECT CAST(if(number % 2, 'truetrue', NULL), 'Nullable(Bool)') FROM numbers(2); -- {serverError CANNOT_PARSE_BOOL} +SELECT CAST(if(number % 2, 'falsefalse', NULL), 'Nullable(Bool)') FROM numbers(2); -- {serverError CANNOT_PARSE_BOOL} SELECT accurateCastOrNull(if(number % 2, NULL, 'truex'), 'Bool') FROM numbers(4); SELECT accurateCastOrNull(if(number % 2, 'truex', NULL), 'Bool') FROM numbers(4); diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.reference b/tests/queries/0_stateless/02343_analyzer_lambdas.reference index 8d29481c2555..62d9e9f47265 100644 --- a/tests/queries/0_stateless/02343_analyzer_lambdas.reference +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.reference @@ -27,3 +27,11 @@ Lambda untuple Lambda carrying 2 1 1 0 +Lambda legacy syntax +[2,3,4] +[2,3,4] +[2,3,4] +['hello','world'] +[2,3,4] +[2,3,4] 2 +[2,3,4] 2 1 diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.sql b/tests/queries/0_stateless/02343_analyzer_lambdas.sql index b90f7b32b57a..0c257cf6f18b 100644 --- a/tests/queries/0_stateless/02343_analyzer_lambdas.sql +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.sql @@ -65,5 +65,31 @@ SELECT 'Lambda carrying'; WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, 1), lambda(functor_2, 1); WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, id), lambda(functor_2, id) FROM test_table; + +SELECT 'Lambda legacy syntax'; + +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]); + +WITH 222 AS lambda +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]); + +SELECT arrayMap(lambda((x,), x + 1), [1, 2, 3]); + +SELECT arraySort(lambda((x, y), y), ['world', 'hello'], [2, 1]); + +WITH 222 AS lambda +SELECT arrayMap(lambda((x, ), x + 1), [1, 2, 3]); + +WITH x -> x + 1 AS lambda +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda(1); + +-- lambda(tuple(x), x + 1) parsed as lambda definion but not as call of lambda defined in WITH +WITH (x, y) -> y AS lambda +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda(tuple(x), x + 1), 1 AS x; -- { serverError BAD_ARGUMENTS } + +WITH (x, y) -> y AS lambda2 +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda2(tuple(x), x + 1), 1 AS x; + + DROP TABLE test_table_tuple; DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql index 48e84246d1c9..be4b64888ca2 100644 --- a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql +++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql @@ -127,3 +127,14 @@ SELECT arrayMap(x -> splitByChar(toString(id), arrayMap(x -> toString(1), [NULL] DROP TABLE test_table; -- { echoOff } + +SELECT + groupArray(number) AS counts, + arraySum(arrayMap(x -> (x + 1), counts)) as hello, + arrayMap(x -> (x / hello), counts) AS res +FROM numbers(1000000) FORMAT Null; + +SELECT + arrayWithConstant(pow(10,6), 1) AS nums, + arrayMap(x -> x, nums) AS m, + arrayMap(x -> x + arraySum(m), m) AS res FORMAT Null; diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference index 783d12fcf1a6..21ddf5d35123 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference @@ -8,3 +8,10 @@ \0\0\0\0\0 131231 131231 +1234 +1234 +{"b":131231} +\0\0\0\0 +1234567890 +18446744073709551615 +-9223372036854775807 diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql index cfc47e00cbac..bbb9f55062bc 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql @@ -6,3 +6,10 @@ SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(5)'); SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(6)'); SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(5))') FROM numbers(2); SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(6))') FROM numbers(2); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": "1234"}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": {"b": 131231} }'), 'a', 'LowCardinality(FixedString(12))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(10))'); +SELECT JSONExtract(materialize('{"a": 18446744073709551615}'), 'a', 'LowCardinality(FixedString(20))'); +SELECT JSONExtract(materialize('{"a": -9223372036854775807}'), 'a', 'LowCardinality(FixedString(20))'); diff --git a/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql b/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql index 5c45ee8aedd4..f7ddb8f8bda3 100644 --- a/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql +++ b/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql @@ -7,6 +7,7 @@ DROP TABLE IF EXISTS eligible_test2; -- enable query cache session-wide but also force it individually in each of below statements SET use_query_cache = true; +SET query_cache_system_table_handling = 'save'; -- check that SELECT statements create entries in the query cache ... SELECT 1 SETTINGS use_query_cache = true; diff --git a/tests/queries/0_stateless/02494_query_cache_explain.sql b/tests/queries/0_stateless/02494_query_cache_explain.sql index d12938181c2e..bf376b47fdb0 100644 --- a/tests/queries/0_stateless/02494_query_cache_explain.sql +++ b/tests/queries/0_stateless/02494_query_cache_explain.sql @@ -2,6 +2,7 @@ -- Tag no-parallel: Messes with internal cache SET allow_experimental_analyzer = 1; +SET query_cache_system_table_handling = 'save'; SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_secrets.reference b/tests/queries/0_stateless/02494_query_cache_secrets.reference index 306374eed4b1..82833f28369a 100644 --- a/tests/queries/0_stateless/02494_query_cache_secrets.reference +++ b/tests/queries/0_stateless/02494_query_cache_secrets.reference @@ -1,2 +1,2 @@ A2193552DCF8A9F99AC35F86BC4D2FFD -SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS use_query_cache = 1 +SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS use_query_cache = true diff --git a/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql b/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql index 4344d139d60b..6266996ac2eb 100644 --- a/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql +++ b/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql @@ -12,11 +12,10 @@ SYSTEM STOP MERGES t_cache_sparse; INSERT INTO t_cache_sparse SELECT number, number FROM numbers(10000); INSERT INTO t_cache_sparse SELECT number, 0 FROM numbers(10000); -SET use_query_cache = 1; SET max_threads = 1; -SELECT v FROM t_cache_sparse FORMAT Null; -SELECT v FROM t_cache_sparse FORMAT Null; +SELECT v FROM t_cache_sparse SETTINGS use_query_cache = 1, max_threads = 1 FORMAT Null; +SELECT v FROM t_cache_sparse SETTINGS use_query_cache = 1, max_threads = 1 FORMAT Null; SELECT count() FROM system.query_cache WHERE query LIKE 'SELECT v FROM t_cache_sparse%'; DROP TABLE t_cache_sparse; diff --git a/tests/queries/0_stateless/02494_query_cache_system_tables.reference b/tests/queries/0_stateless/02494_query_cache_system_tables.reference new file mode 100644 index 000000000000..e41e365766e5 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_system_tables.reference @@ -0,0 +1,13 @@ +The Default for query_cache_system_table_handling is = throw +0 +Check behavior of query_cache_system_table_handling = throw +0 +Check behavior of query_cache_system_table_handling = save +0 +1 +Check behavior of query_cache_system_table_handling = ignore +0 +0 +Other tests +0 +0 diff --git a/tests/queries/0_stateless/02494_query_cache_system_tables.sql b/tests/queries/0_stateless/02494_query_cache_system_tables.sql new file mode 100644 index 000000000000..7c9f01c4e91f --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_system_tables.sql @@ -0,0 +1,64 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; + +SELECT 'The Default for query_cache_system_table_handling is = throw'; +-- Test that the query cache rejects queries that involve system tables. +SELECT * FROM system.one SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +SELECT count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +SELECT 'Check behavior of query_cache_system_table_handling = throw'; +-- Test that the query cache rejects queries that involve system tables. +SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'throw'; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +SELECT count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +SELECT 'Check behavior of query_cache_system_table_handling = save'; +-- Test that the query cache saves the result of queries that involve system tables. +SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'save'; +SELECT count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +SELECT 'Check behavior of query_cache_system_table_handling = ignore'; +-- Test that the query cache ignores the result of queries that involve system tables. +SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'ignore'; +SELECT count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +SELECT 'Other tests'; + +-- Edge case which doesn't work well due to conceptual reasons (QueryCache is AST-based), test it anyways to have it documented. +USE system; +SELECT * FROM one SETTINGS use_query_cache = 1; -- doesn't throw but should + +-- This query uses system.zero internally. Since the query cache works at AST level it does not "see' system.zero and must not complain. +SELECT * SETTINGS use_query_cache = 1; + +-- information_schema is also treated as a system table +SELECT * FROM information_schema.tables SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +SELECT * FROM INFORMATION_SCHEMA.TABLES SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } + +-- System tables can be "hidden" inside e.g. table functions +SELECT * FROM clusterAllReplicas('test_shard_localhost', system.one) SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +SELECT * FROM clusterAllReplicas('test_shard_localhost', 'system.one') SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } + +-- Criminal edge case that a user creates a table named "system". The query cache must not reject queries against it. +DROP TABLE IF EXISTS system; +CREATE TABLE system (c UInt64) ENGINE = Memory; +SElECT * FROM system SETTINGS use_query_cache = 1; +DROP TABLE system; + +-- But queries against system.system are rejected. +DROP TABLE IF EXISTS system.system; +CREATE TABLE system.system (c UInt64) ENGINE = Memory; +SElECT * FROM system.system SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +DROP TABLE system.system; + +-- Cleanup +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh index bafab249b475..20b3efedd49e 100755 --- a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh +++ b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh @@ -25,7 +25,7 @@ function run_query_with_pure_parallel_replicas () { $CLICKHOUSE_CLIENT \ --query "$2" \ --query_id "${1}_disabled" \ - --max_parallel_replicas 0 + --max_parallel_replicas 1 $CLICKHOUSE_CLIENT \ --query "$2" \ @@ -50,7 +50,7 @@ function run_query_with_custom_key_parallel_replicas () { $CLICKHOUSE_CLIENT \ --query "$2" \ --query_id "${1}_disabled" \ - --max_parallel_replicas 0 + --max_parallel_replicas 1 $CLICKHOUSE_CLIENT \ --query "$2" \ diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.reference b/tests/queries/0_stateless/02802_with_cube_with_totals.reference index 206c32e562b0..c7b7b5704560 100644 --- a/tests/queries/0_stateless/02802_with_cube_with_totals.reference +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.reference @@ -1,35 +1,5 @@ ((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 ((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 ((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 \N diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.sql b/tests/queries/0_stateless/02802_with_cube_with_totals.sql index 168e4d61b681..77adb68eb4b7 100644 --- a/tests/queries/0_stateless/02802_with_cube_with_totals.sql +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.sql @@ -1,3 +1,2 @@ -set allow_experimental_analyzer=1; SELECT tuple((2147483648, (-0., 1.1754943508222875e-38, 2147483646, '-9223372036854775808', NULL))), toInt128(0.0001) GROUP BY ((256, toInt64(1.1754943508222875e-38), NULL), NULL, -0., ((65535, '-92233720368547758.07'), 0.9999), tuple(((1., 3.4028234663852886e38, '1', 0.5), NULL, tuple('0.1')))) WITH CUBE WITH TOTALS; SELECT NULL GROUP BY toUUID(NULL, '0', NULL, '0.0000065535'), 1 WITH CUBE WITH TOTALS; diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.reference b/tests/queries/0_stateless/02834_apache_arrow_abort.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.sql b/tests/queries/0_stateless/02834_apache_arrow_abort.sql new file mode 100644 index 000000000000..47db46f1e43a --- /dev/null +++ b/tests/queries/0_stateless/02834_apache_arrow_abort.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest +-- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library. + +INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference index df8198bc8568..866d6cb7ec38 100644 --- a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference @@ -7,4 +7,4 @@ 1 1 1 1 1 -1 1 +1 2 diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql index a299e50984f9..ea52df5d4b43 100644 --- a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + DROP TABLE IF EXISTS t1; CREATE TABLE t1 (key UInt8) ENGINE = Memory; diff --git a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh index d3252b29eb79..fe26784dab4d 100755 --- a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh +++ b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh @@ -5,33 +5,38 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Copy backups/with_broken_part.zip into the disk named "backups". -SRC_BACKUP_DIR=$CURDIR/backups -SRC_BACKUP_FILENAME=with_broken_part.zip +# Copies a test predefined backup from "/tests/queries/0_stateless/backups/" folder to the "backups" disk, +# returns the path to the backup relative to that disk. +function install_test_backup() +{ + local test_backup_filename="$1" + local test_backup_path="$CURDIR/backups/${test_backup_filename}" -BACKUPS_DISK=backups -BACKUPS_DIR=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='$BACKUPS_DISK'") + local backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") -if [ -z "$BACKUPS_DIR" ]; then - echo Disk \'$BACKUPS_DISK\' not found - exit 1 -fi + if [ -z "${backups_disk_root}" ]; then + echo Disk \'${backups_disk_root}\' not found + exit 1 + fi -BACKUP_FILENAME=$CLICKHOUSE_DATABASE/${SRC_BACKUP_FILENAME} -BACKUP_NAME="Disk('$BACKUPS_DISK', '$BACKUP_FILENAME')" + local install_path=${backups_disk_root}/${CLICKHOUSE_DATABASE}/${test_backup_filename} + mkdir -p "$(dirname "${install_path}")" + ln -s "${test_backup_path}" "${install_path}" -mkdir -p "$(dirname "$BACKUPS_DIR/$BACKUP_FILENAME")" -ln -s "$SRC_BACKUP_DIR/$SRC_BACKUP_FILENAME" "$BACKUPS_DIR/$BACKUP_FILENAME" + echo "${CLICKHOUSE_DATABASE}/${test_backup_filename}" +} + +backup_name="$(install_test_backup with_broken_part.zip)" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" # First try to restore with the setting `restore_broken_parts_as_detached` set to false. -$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME" 2>&1 | tr -d \\n | grep "data.bin doesn't exist" | grep "while restoring part all_2_2_0" > /dev/null && echo "OK" || echo "FAILED" +$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM Disk('backups', '${backup_name}')" 2>&1 | tr -d \\n | grep "data.bin doesn't exist" | grep "while restoring part all_2_2_0" > /dev/null && echo "OK" || echo "FAILED" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" # Then try to restore with the setting `restore_broken_parts_as_detached` set to true. -$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}' +$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM Disk('backups', '${backup_name}') SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}' $CLICKHOUSE_CLIENT --multiquery < t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND 2 2 2 2 3 3 3 33 \N \N \N \N +SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x; +2 2 2 2 +3 3 3 33 +\N \N \N \N +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; +2 2 2 2 +3 3 3 33 +\N \N \N \N SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; 2 2 2 2 \N \N \N \N diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 6a98a7bb57bb..5458370db8c8 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -14,6 +14,9 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) O SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x; +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; + SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql index 46d915343398..4d68786d7db7 100644 --- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql +++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql @@ -35,6 +35,6 @@ insert into test2 select * from numbers(10); insert into test2 select * from numbers(11, 20); explain indexes = 1 select * from test2 where a2 > 15 settings allow_experimental_analyzer = 0; -explain indexes = 1 select * from test2 where a2 > 15 settings allow_experimental_analyzer = 1; -- buggy, analyzer does not pick up index i +explain indexes = 1 select * from test2 where a2 > 15 settings allow_experimental_analyzer = 1; drop database 02911_support_alias_column_in_indices; diff --git a/tests/queries/0_stateless/02941_variant_type_1.sh b/tests/queries/0_stateless/02941_variant_type_1.sh index 773a8c4a5e43..4fb76532a058 100755 --- a/tests/queries/0_stateless/02941_variant_type_1.sh +++ b/tests/queries/0_stateless/02941_variant_type_1.sh @@ -14,7 +14,7 @@ function test1_insert() echo "test1 insert" $CH_CLIENT -nmq "insert into test select number, NULL from numbers(3); insert into test select number + 3, number from numbers(3); -insert into test select number + 6, 'str_' || toString(number) from numbers(3); +insert into test select number + 6, ('str_' || toString(number))::Variant(String) from numbers(3); insert into test select number + 9, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(3); insert into test select number + 12, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(3); insert into test select number + 15, range(number + 1)::Array(UInt64) from numbers(3);" @@ -40,7 +40,7 @@ function test2_insert() echo "test2 insert" $CH_CLIENT -nmq "insert into test select number, NULL from numbers(3); insert into test select number + 3, number % 2 ? NULL : number from numbers(3); -insert into test select number + 6, number % 2 ? NULL : 'str_' || toString(number) from numbers(3); +insert into test select number + 6, number % 2 ? NULL : ('str_' || toString(number))::Variant(String) from numbers(3); insert into test select number + 9, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(('lc_str_' || toString(number))::LowCardinality(String), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3); insert into test select number + 12, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3); insert into test select number + 15, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(range(number + 1)::Array(UInt64), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3);" @@ -64,7 +64,7 @@ select v.\`Array(UInt64)\`.size0 from test order by id;" function test3_insert() { echo "test3 insert" - $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST('str_' || toString(number), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number + 1)::Array(UInt64), type)) as res from numbers(18);" + $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST(('str_' || toString(number))::Variant(String), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number + 1)::Array(UInt64), type)) as res from numbers(18);" } function test3_select() diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index d1fa0a777c95..995b622b6bfb 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -14,7 +14,7 @@ function test4_insert() echo "test4 insert" $CH_CLIENT -nmq "insert into test select number, NULL from numbers(100000); insert into test select number + 100000, number from numbers(100000); -insert into test select number + 200000, 'str_' || toString(number) from numbers(100000); +insert into test select number + 200000, ('str_' || toString(number))::Variant(String) from numbers(100000); insert into test select number + 300000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(100000); insert into test select number + 400000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(100000); insert into test select number + 500000, range(number % 20 + 1)::Array(UInt64) from numbers(100000);" diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index a0efead280a3..9fbdf6de8a7a 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -15,7 +15,7 @@ function test5_insert() $CH_CLIENT -nmq " insert into test select number, NULL from numbers(200000); insert into test select number + 200000, number % 2 ? NULL : number from numbers(200000); -insert into test select number + 400000, number % 2 ? NULL : 'str_' || toString(number) from numbers(200000); +insert into test select number + 400000, number % 2 ? NULL : ('str_' || toString(number))::Variant(String) from numbers(200000); insert into test select number + 600000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(('lc_str_' || toString(number))::LowCardinality(String), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000); insert into test select number + 800000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000); insert into test select number + 1000000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(range(number % 20 + 1)::Array(UInt64), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000);" diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index 336540d1e793..f6eaf2fcc9a1 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -12,7 +12,7 @@ CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspic function test6_insert() { echo "test6 insert" - $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST('str_' || toString(number), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number % 20 + 1)::Array(UInt64), type)) as res from numbers(1200000);" + $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST(('str_' || toString(number))::Variant(String), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number % 20 + 1)::Array(UInt64), type)) as res from numbers(1200000);" } function test6_select() diff --git a/tests/queries/0_stateless/02942_variant_cast.reference b/tests/queries/0_stateless/02942_variant_cast.reference index f3fd7a9ba33b..d300ca655a60 100644 --- a/tests/queries/0_stateless/02942_variant_cast.reference +++ b/tests/queries/0_stateless/02942_variant_cast.reference @@ -6,7 +6,7 @@ \N Hello Hello -NULL +\N Hello Hello \N diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh index 9c0c872eb069..27950866e816 100755 --- a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh +++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh @@ -82,6 +82,8 @@ $CLICKHOUSE_CLIENT -q "optimize table $success_replica final settings optimize_t $CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " insert into $success_replica (key) values (2); -- part all_2_2_0 + -- Avoid 'Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet' + system sync replica $success_replica pull; optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1; -- part all_0_2_2_1 system sync replica $failed_replica pull; " diff --git a/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 b/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 index 5bf40f34f5c4..aba742fa64a9 100644 --- a/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 +++ b/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 @@ -1,6 +1,7 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-shared-merge-tree -- Tag no-parallel - due to static databases -- Tag no-fasttest - S3 is required +-- Tag no-shared-merge-tree - no reliable way to make SMT read-only in stateless test drop database if exists shard_0; drop database if exists shard_1; diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference index 6ed281c757a9..e8183f05f5db 100644 --- a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference +++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference @@ -1,2 +1,3 @@ 1 1 +1 diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql index 5ba0be399912..54d19264c452 100644 --- a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql +++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql @@ -34,4 +34,19 @@ FROM ) AS t ) SETTINGS optimize_uniq_to_count=1; +-- https://github.com/ClickHouse/ClickHouse/issues/62298 +DROP TABLE IF EXISTS users; +CREATE TABLE users +( + `id` Int64, + `name` String +) +ENGINE = ReplacingMergeTree +ORDER BY (id, name); + +INSERT INTO users VALUES (1, 'pufit'), (1, 'pufit2'), (1, 'pufit3'); + +SELECT uniqExact(id) FROM ( SELECT id FROM users WHERE id = 1 GROUP BY id, name ); + +DROP TABLE IF EXISTS users; DROP TABLE IF EXISTS tags; diff --git a/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh b/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh index d49f1c41c695..f857358a5eac 100755 --- a/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh +++ b/tests/queries/0_stateless/03001_backup_matview_after_modify_query.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -# Tags: no-ordinary-database, no-replicated-database -# Tag no-ordinary-database: TO DO +# Tags: no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql new file mode 100644 index 000000000000..499486713a60 --- /dev/null +++ b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql @@ -0,0 +1,5 @@ +drop table if exists test_d; +create table test_d engine=Distributed(test_cluster_two_shard_three_replicas_localhost, system, numbers); +select * from test_d limit 10 settings max_parallel_replicas = 0, prefer_localhost_replica = 0; --{clientError BAD_ARGUMENTS} +drop table test_d; + diff --git a/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.reference b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.reference new file mode 100644 index 000000000000..04ceb1934157 --- /dev/null +++ b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.reference @@ -0,0 +1,4 @@ +RESTORED +2024-02-22 07:00:00 00 +2024-02-22 07:00:01 11 +2024-02-22 07:00:02 22 diff --git a/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh new file mode 100755 index 000000000000..3a3d0edc38fd --- /dev/null +++ b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Copies a test predefined backup from "/tests/queries/0_stateless/backups/" folder to the "backups" disk, +# returns the path to the backup relative to that disk. +function install_test_backup() +{ + local test_backup_filename="$1" + local test_backup_path="$CURDIR/backups/${test_backup_filename}" + + local backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") + + if [ -z "${backups_disk_root}" ]; then + echo Disk \'${backups_disk_root}\' not found + exit 1 + fi + + local install_path=${backups_disk_root}/${CLICKHOUSE_DATABASE}/${test_backup_filename} + mkdir -p "$(dirname "${install_path}")" + ln -s "${test_backup_path}" "${install_path}" + + echo "${CLICKHOUSE_DATABASE}/${test_backup_filename}" +} + +backup_name="$(install_test_backup old_backup_with_matview_inner_table_metadata.zip)" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mv" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src" + +db="$CLICKHOUSE_DATABASE" +${CLICKHOUSE_CLIENT} -q "RESTORE DATABASE mydb AS ${db} FROM Disk('backups', '${backup_name}') SETTINGS allow_different_database_def=true" | grep -o "RESTORED" + +${CLICKHOUSE_CLIENT} -q "SELECT toDateTime(timestamp, 'UTC') AS ts, c12 FROM mv ORDER BY ts" + +$CLICKHOUSE_CLIENT --query "DROP TABLE mv" +$CLICKHOUSE_CLIENT --query "DROP TABLE src" diff --git a/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference new file mode 100644 index 000000000000..daaac9e30302 --- /dev/null +++ b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference @@ -0,0 +1,2 @@ +42 +42 diff --git a/tests/queries/0_stateless/03013_ignore_drop_queries_probability.sql b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.sql new file mode 100644 index 000000000000..5c7b99987761 --- /dev/null +++ b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.sql @@ -0,0 +1,18 @@ +create table test_memory (number UInt64) engine=Memory; +insert into test_memory select 42; +drop table test_memory settings ignore_drop_queries_probability=1; +select * from test_memory; +drop table test_memory; + +create table test_merge_tree (number UInt64) engine=MergeTree order by number; +insert into test_merge_tree select 42; +drop table test_merge_tree settings ignore_drop_queries_probability=1; +select * from test_merge_tree; +drop table test_merge_tree; + +create table test_join (number UInt64) engine=Join(ALL, LEFT, number); +insert into test_join select 42; +drop table test_join settings ignore_drop_queries_probability=1; +select * from test_join; +drop table test_join; + diff --git a/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.reference b/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.reference new file mode 100644 index 000000000000..bb0b1cf658d1 --- /dev/null +++ b/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.reference @@ -0,0 +1,3 @@ +0 +0 +0 diff --git a/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.sql b/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.sql new file mode 100644 index 000000000000..16ba3b155948 --- /dev/null +++ b/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.sql @@ -0,0 +1,7 @@ +select 0 as x +from remote('127.0.0.{1,2}', system.one) +group by x; + +select 0 as x +from remote('127.0.0.{1,2}', system.one) +order by x; diff --git a/tests/queries/0_stateless/03020_long_values_pretty_are_not_cut_if_single.sh b/tests/queries/0_stateless/03020_long_values_pretty_are_not_cut_if_single.sh index fa9e9f6d3e15..b66951d93f61 100755 --- a/tests/queries/0_stateless/03020_long_values_pretty_are_not_cut_if_single.sh +++ b/tests/queries/0_stateless/03020_long_values_pretty_are_not_cut_if_single.sh @@ -9,6 +9,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # But cutting it in the result of SHOW CREATE TABLE will be bad for a user. # That's why we control it with the setting `output_format_pretty_max_value_width_apply_for_single_value`. +# Make sure that system.metric_log exists +${CLICKHOUSE_CLIENT} --query "SELECT 1 FORMAT Null" +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + + ${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE system.metric_log" --format Pretty | grep -P '^COMMENT' ${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE system.metric_log" --format PrettyCompact | grep -P '^COMMENT' ${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE system.metric_log" --format PrettySpace | grep -P '^COMMENT' diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference index 4081b82a8f5f..17a17484a0cb 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference @@ -60,3 +60,9 @@ (7) (8) (9) +a b +a b +a a +a a + +a a diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql index 012da5475817..687101375429 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -5,3 +5,19 @@ SELECT tuple(tuple(number)) as x FROM numbers(10) GROUP BY (number, tuple(number select tuple(array(number)) as x FROM numbers(10) GROUP BY number, array(number) WITH ROLLUP order by x; SELECT tuple(number) AS x FROM numbers(10) GROUP BY GROUPING SETS (number) order by x; + +SELECT ignore(toFixedString('Lambda as function parameter', 28), toNullable(28), ignore(8)), sum(marks) FROM system.parts WHERE database = currentDatabase() GROUP BY GROUPING SETS ((2)) FORMAT Null settings optimize_injective_functions_in_group_by=1, optimize_group_by_function_keys=1, group_by_use_nulls=1; -- { serverError ILLEGAL_AGGREGATION } + +SELECT toLowCardinality(materialize('a' AS key)), 'b' AS value GROUP BY key WITH CUBE SETTINGS group_by_use_nulls = 1; + +SELECT tuple(tuple(number)) AS x +FROM numbers(10) +GROUP BY (number, (toString(x), number)) + WITH CUBE +SETTINGS group_by_use_nulls = 1 FORMAT Null; + +SELECT tuple(number + 1) AS x FROM numbers(10) GROUP BY number + 1, toString(x) WITH CUBE settings group_by_use_nulls=1 FORMAT Null; + +SELECT tuple(tuple(number)) AS x FROM numbers(10) WHERE toString(toUUID(tuple(number), NULL), x) GROUP BY number, (toString(x), number) WITH CUBE SETTINGS group_by_use_nulls = 1 FORMAT Null; + +SELECT materialize('a'), 'a' AS key GROUP BY key WITH CUBE WITH TOTALS SETTINGS group_by_use_nulls = 1; diff --git a/tests/queries/0_stateless/03031_clickhouse_local_input.reference b/tests/queries/0_stateless/03031_clickhouse_local_input.reference new file mode 100644 index 000000000000..a6feeef100d9 --- /dev/null +++ b/tests/queries/0_stateless/03031_clickhouse_local_input.reference @@ -0,0 +1,7 @@ +# foo +foo +# !foo +# bar +bar +# defaults +bam diff --git a/tests/queries/0_stateless/03031_clickhouse_local_input.sh b/tests/queries/0_stateless/03031_clickhouse_local_input.sh new file mode 100755 index 000000000000..6f59e9b97031 --- /dev/null +++ b/tests/queries/0_stateless/03031_clickhouse_local_input.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +tmp_file="$CUR_DIR/$CLICKHOUSE_DATABASE.txt" +echo '# foo' +$CLICKHOUSE_LOCAL --engine_file_truncate_on_insert=1 -n -q "insert into function file('$tmp_file', 'LineAsString', 'x String') select * from input('x String') format LineAsString" << timeout )); then + echo "Timeout while waiting for operation ${operation_id} to come to status ${expected_status}. The current status is ${current_status}." + exit 1 + fi + sleep 0.1 + done +} + +# Making a backup. +backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')" +backup_operation_id=$(start_async "BACKUP TABLE tbl TO ${backup_name} ASYNC") +wait_status ${backup_operation_id} "BACKUP_CREATED" + +# Restoring from that backup. +restore_operation_id=$(start_async "RESTORE TABLE tbl AS tbl2 FROM ${backup_name} ASYNC") +wait_status ${restore_operation_id} "RESTORED" + +# Check the result of that restoration. +${CLICKHOUSE_CLIENT} --query "SELECT * FROM tbl2" + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE tbl; +DROP TABLE tbl2; +" diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.reference b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.reference new file mode 100644 index 000000000000..fcb49fa99454 --- /dev/null +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.reference @@ -0,0 +1,7 @@ +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh new file mode 100755 index 000000000000..79c43048b890 --- /dev/null +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +disk_name="s3_cache" + +$CLICKHOUSE_CLIENT -nm --query " +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String) engine=MergeTree() ORDER BY tuple() SETTINGS disk = '$disk_name'; +INSERT INTO test SELECT randomString(1000); +" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" + +prev_max_size=$($CLICKHOUSE_CLIENT --query "SELECT max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") +$CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" + +config_path=/etc/clickhouse-server/config.d/storage_conf.xml +config_path_tmp=$config_path.tmp + +new_max_size=$($CLICKHOUSE_CLIENT --query "SELECT divide(max_size, 2) FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") +sed -i "s|$prev_max_size<\/max_size>|$new_max_size<\/max_size>|" $config_path + +# echo $prev_max_size +# echo $new_max_size + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='fatal'; +SYSTEM RELOAD CONFIG" + +$CLICKHOUSE_CLIENT --query "SELECT max_size == $new_max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" +$CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" +$CLICKHOUSE_CLIENT --query "SELECT current_size <= max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" + +sed -i "s|$new_max_size<\/max_size>|$prev_max_size<\/max_size>|" $config_path + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='fatal'; +SYSTEM RELOAD CONFIG" + +$CLICKHOUSE_CLIENT --query "SELECT max_size == $prev_max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" +$CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" +$CLICKHOUSE_CLIENT --query "SELECT current_size <= max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference b/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference new file mode 100644 index 000000000000..ca772aa7fd02 --- /dev/null +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference @@ -0,0 +1,20 @@ +TESTING MODIFY SMALLER BYTES +17408 +16384 +65536 +TESTING MODIFY SMALLER ROWS +1100 +1000 +500 +TESTING ADD SETTINGS +50 +1000 +1070 +1020 +1100 +TESTING ADD SETTINGS +50 +1000 +1020 +1100 +TESTING INVALID SETTINGS diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql new file mode 100644 index 000000000000..1507107c37fe --- /dev/null +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql @@ -0,0 +1,76 @@ +SET max_block_size = 65409; -- Default value + +SELECT 'TESTING MODIFY SMALLER BYTES'; +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 8192, max_bytes_to_keep = 32768; + +INSERT INTO memory SELECT * FROM numbers(0, 100); -- 1024 bytes +INSERT INTO memory SELECT * FROM numbers(0, 3000); -- 16384 bytes +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 17408 in total + +ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 4096, max_bytes_to_keep = 16384; +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 16384 in total after deleting + +INSERT INTO memory SELECT * FROM numbers(3000, 10000); -- 65536 bytes +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +SELECT 'TESTING MODIFY SMALLER ROWS'; +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 200, max_rows_to_keep = 2000; + +INSERT INTO memory SELECT * FROM numbers(0, 100); -- 100 rows +INSERT INTO memory SELECT * FROM numbers(100, 1000); -- 1000 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1100 in total + +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1000 in total after deleting + +INSERT INTO memory SELECT * FROM numbers(1000, 500); -- 500 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 500 in total after deleting + +SELECT 'TESTING ADD SETTINGS'; +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory; + +INSERT INTO memory SELECT * FROM numbers(0, 50); -- 50 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 50 in total + +INSERT INTO memory SELECT * FROM numbers(50, 950); -- 950 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1000 in total + +INSERT INTO memory SELECT * FROM numbers(2000, 70); -- 70 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1070 in total + +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1020 in total after deleting + +INSERT INTO memory SELECT * FROM numbers(3000, 1100); -- 1100 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1100 in total after deleting + +SELECT 'TESTING ADD SETTINGS'; +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory; +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; + +INSERT INTO memory SELECT * FROM numbers(0, 50); -- 50 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 50 in total + +INSERT INTO memory SELECT * FROM numbers(50, 950); -- 950 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1000 in total + +INSERT INTO memory SELECT * FROM numbers(2000, 70); -- 70 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1020 in total after deleting + +INSERT INTO memory SELECT * FROM numbers(3000, 1100); -- 1100 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1100 in total after deleting + +SELECT 'TESTING INVALID SETTINGS'; +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory; +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100; -- { serverError 452 } +ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 100; -- { serverError 452 } +ALTER TABLE memory MODIFY SETTING max_rows_to_keep = 1000; +ALTER TABLE memory MODIFY SETTING max_bytes_to_keep = 1000; + +DROP TABLE memory; + diff --git a/tests/queries/0_stateless/03032_string_to_variant_cast.reference b/tests/queries/0_stateless/03032_string_to_variant_cast.reference new file mode 100644 index 000000000000..6531e2206a57 --- /dev/null +++ b/tests/queries/0_stateless/03032_string_to_variant_cast.reference @@ -0,0 +1,13 @@ +42 UInt64 +abc String +\N None +[1,2,3] Array(UInt64) +[1, 2, 3 String +\N None +42 UInt64 +42 UInt64 +42 UInt64 +\N None +42 UInt64 +\N None +\N None diff --git a/tests/queries/0_stateless/03032_string_to_variant_cast.sql b/tests/queries/0_stateless/03032_string_to_variant_cast.sql new file mode 100644 index 000000000000..67a501b96283 --- /dev/null +++ b/tests/queries/0_stateless/03032_string_to_variant_cast.sql @@ -0,0 +1,17 @@ +set allow_experimental_variant_type=1; +select CAST('42', 'Variant(String, UInt64)') as v, variantType(v); +select CAST('abc', 'Variant(String, UInt64)') as v, variantType(v); +select CAST('null', 'Variant(String, UInt64)') as v, variantType(v); +select CAST('[1, 2, 3]', 'Variant(String, Array(UInt64))') as v, variantType(v); +select CAST('[1, 2, 3', 'Variant(String, Array(UInt64))') as v, variantType(v); +select CAST('42', 'Variant(Date)') as v, variantType(v); -- {serverError INCORRECT_DATA} +select accurateCastOrNull('42', 'Variant(Date)') as v, variantType(v); + +select CAST('42'::FixedString(2), 'Variant(String, UInt64)') as v, variantType(v); +select CAST('42'::LowCardinality(String), 'Variant(String, UInt64)') as v, variantType(v); +select CAST('42'::Nullable(String), 'Variant(String, UInt64)') as v, variantType(v); +select CAST(NULL::Nullable(String), 'Variant(String, UInt64)') as v, variantType(v); +select CAST('42'::LowCardinality(Nullable(String)), 'Variant(String, UInt64)') as v, variantType(v); +select CAST(NULL::LowCardinality(Nullable(String)), 'Variant(String, UInt64)') as v, variantType(v); +select CAST(NULL::LowCardinality(Nullable(FixedString(2))), 'Variant(String, UInt64)') as v, variantType(v); + diff --git a/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference new file mode 100644 index 000000000000..86a000598545 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference @@ -0,0 +1,3 @@ +UInt32 1 +UInt32 2 +UInt32 3 diff --git a/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql new file mode 100644 index 000000000000..9be1152bbbf3 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql @@ -0,0 +1,8 @@ +set allow_suspicious_low_cardinality_types=1; +drop table if exists test; +create table test (`x` LowCardinality(Nullable(UInt32)), `y` String) engine = MergeTree order by tuple(); +insert into test values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +create table m_table (x UInt32, y String) engine = Merge(currentDatabase(), 'test*'); +select toTypeName(x), x FROM m_table SETTINGS additional_table_filters = {'m_table':'x != 4'}, optimize_move_to_prewhere=1, allow_experimental_analyzer=1; +drop table test; + diff --git a/tests/queries/0_stateless/03033_analyzer_query_parameters.reference b/tests/queries/0_stateless/03033_analyzer_query_parameters.reference new file mode 100644 index 000000000000..6ed281c757a9 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_query_parameters.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/03033_analyzer_query_parameters.sh b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh new file mode 100755 index 000000000000..cf46067df993 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 1" +${CLICKHOUSE_LOCAL} --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 0" diff --git a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference new file mode 100644 index 000000000000..f599e28b8ab0 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference @@ -0,0 +1 @@ +10 diff --git a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql new file mode 100644 index 000000000000..22f103c9bd5d --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql @@ -0,0 +1,27 @@ +CREATE TABLE vecs_Float32 (v Array(Float32)) ENGINE=Memory; +INSERT INTO vecs_Float32 +SELECT v FROM ( + SELECT + number AS n, + [ + rand(n*10), rand(n*10+1), rand(n*10+2), rand(n*10+3), rand(n*10+4), rand(n*10+5), rand(n*10+6), rand(n*10+7), rand(n*10+8), rand(n*10+9), + rand(n*10+10), rand(n*10+11), rand(n*10+12), rand(n*10+13), rand(n*10+14), rand(n*10+15), rand(n*10+16), rand(n*10+17), rand(n*10+18), rand(n*10+19), + rand(n*10+20), rand(n*10+21), rand(n*10+22), rand(n*10+23), rand(n*10+24), rand(n*10+25), rand(n*10+26), rand(n*10+27), rand(n*10+28), rand(n*10+29), + rand(n*10+30), rand(n*10+31), rand(n*10+32), rand(n*10+33), rand(n*10+34), rand(n*10+35), rand(n*10+36), rand(n*10+37), rand(n*10+38), rand(n*10+39), + rand(n*10+40), rand(n*10+41), rand(n*10+42), rand(n*10+43), rand(n*10+44), rand(n*10+45), rand(n*10+46), rand(n*10+47), rand(n*10+48), rand(n*10+49), + rand(n*10+50), rand(n*10+51), rand(n*10+52), rand(n*10+53), rand(n*10+54), rand(n*10+55), rand(n*10+56), rand(n*10+57), rand(n*10+58), rand(n*10+59), + rand(n*10+60), rand(n*10+61), rand(n*10+62), rand(n*10+63), rand(n*10+64), rand(n*10+65), rand(n*10+66), rand(n*10+67), rand(n*10+68), rand(n*10+69), + rand(n*10+70), rand(n*10+71), rand(n*10+72), rand(n*10+73), rand(n*10+74), rand(n*10+75), rand(n*10+76), rand(n*10+77), rand(n*10+78), rand(n*10+79), + rand(n*10+80), rand(n*10+81), rand(n*10+82), rand(n*10+83), rand(n*10+84), rand(n*10+85), rand(n*10+86), rand(n*10+87), rand(n*10+88), rand(n*10+89), + rand(n*10+90), rand(n*10+91), rand(n*10+92), rand(n*10+93), rand(n*10+94), rand(n*10+95), rand(n*10+96), rand(n*10+97), rand(n*10+98), rand(n*10+99), + rand(n*10+100), rand(n*10+101), rand(n*10+102), rand(n*10+103), rand(n*10+104), rand(n*10+105), rand(n*10+106), rand(n*10+107), rand(n*10+108), rand(n*10+109), + rand(n*10+110), rand(n*10+111), rand(n*10+112), rand(n*10+113), rand(n*10+114), rand(n*10+115), rand(n*10+116), rand(n*10+117), rand(n*10+118), rand(n*10+119), + rand(n*10+120), rand(n*10+121), rand(n*10+122), rand(n*10+123), rand(n*10+124), rand(n*10+125), rand(n*10+126), rand(n*10+127), rand(n*10+128), rand(n*10+129), + rand(n*10+130), rand(n*10+131), rand(n*10+132), rand(n*10+133), rand(n*10+134), rand(n*10+135), rand(n*10+136), rand(n*10+137), rand(n*10+138), rand(n*10+139), + rand(n*10+140), rand(n*10+141), rand(n*10+142), rand(n*10+143), rand(n*10+144), rand(n*10+145), rand(n*10+146), rand(n*10+147), rand(n*10+148), rand(n*10+149) + ] AS v + FROM system.numbers + LIMIT 10 +); + +WITH (SELECT v FROM vecs_Float32 limit 1) AS a SELECT count(dp) FROM (SELECT dotProduct(a, v) AS dp FROM vecs_Float32); diff --git a/tests/queries/0_stateless/03033_cte_numbers_memory.reference b/tests/queries/0_stateless/03033_cte_numbers_memory.reference new file mode 100644 index 000000000000..8d2470dea442 --- /dev/null +++ b/tests/queries/0_stateless/03033_cte_numbers_memory.reference @@ -0,0 +1,10 @@ +0 +1 +2 +3 +4 +0 +1 +2 +3 +4 diff --git a/tests/queries/0_stateless/03033_cte_numbers_memory.sql b/tests/queries/0_stateless/03033_cte_numbers_memory.sql new file mode 100644 index 000000000000..66b11cbfaa5f --- /dev/null +++ b/tests/queries/0_stateless/03033_cte_numbers_memory.sql @@ -0,0 +1,16 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61238 +SET allow_experimental_analyzer=1; + +WITH +(SELECT number FROM system.numbers LIMIT 1) as w1, +(SELECT number FROM system.numbers LIMIT 1) as w2, +(SELECT number FROM system.numbers LIMIT 1) as w3, +(SELECT number FROM system.numbers LIMIT 1) as w4, +(SELECT number FROM system.numbers LIMIT 1) as w5, +(SELECT number FROM system.numbers LIMIT 1) as w6 +SELECT number FROM ( + SELECT number FROM system.numbers LIMIT 10 + UNION ALL + SELECT number FROM system.numbers LIMIT 10 +) +WHERE number < 5; diff --git a/tests/queries/0_stateless/03033_distinct_transform_const_columns.reference b/tests/queries/0_stateless/03033_distinct_transform_const_columns.reference new file mode 100644 index 000000000000..d05b1f927f4b --- /dev/null +++ b/tests/queries/0_stateless/03033_distinct_transform_const_columns.reference @@ -0,0 +1 @@ +0 0 diff --git a/tests/queries/0_stateless/03033_distinct_transform_const_columns.sql b/tests/queries/0_stateless/03033_distinct_transform_const_columns.sql new file mode 100644 index 000000000000..41df19ab64e8 --- /dev/null +++ b/tests/queries/0_stateless/03033_distinct_transform_const_columns.sql @@ -0,0 +1 @@ +SELECT DISTINCT COALESCE(COALESCE('') = toNullable('b3'), toUInt128(toNullable(2)), 2, 2, toLowCardinality(2), 2, 2, 2, toUInt128(toNullable(2)), materialize(2), toUInt128(2), 2, 2), COALESCE(COALESCE(COALESCE(materialize(''))) = 'b3', 2, 2, 2, toLowCardinality(2), toUInt128(2), 2, 2, 2, materialize(toUInt256(2)), 2, 2, 2) FROM numbers(100000); diff --git a/tests/queries/0_stateless/03033_hive_text_read_variable_fields.reference b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.reference new file mode 100644 index 000000000000..2e7c474620b8 --- /dev/null +++ b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.reference @@ -0,0 +1,2 @@ +1 3 0 +3 5 9 diff --git a/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh new file mode 100755 index 000000000000..9dba99be7c8a --- /dev/null +++ b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (a UInt16, b UInt32, c UInt32) engine=MergeTree order by a" +$CLICKHOUSE_CLIENT -q "insert into test_tbl from infile '$CURDIR/data_hive/fields_number_variable.txt' SETTINGS input_format_hive_text_fields_delimiter=',' FORMAT HIVETEXT" +$CLICKHOUSE_CLIENT -q "select * from test_tbl" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.reference b/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.reference new file mode 100644 index 000000000000..5782593a4550 --- /dev/null +++ b/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.reference @@ -0,0 +1 @@ +2 2 2 diff --git a/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.sql b/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.sql new file mode 100644 index 000000000000..84ab1d33c948 --- /dev/null +++ b/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.sql @@ -0,0 +1,21 @@ +-- Tags: no-parallel + +DROP FUNCTION IF EXISTS test_func_1; +CREATE FUNCTION test_func_1 AS (a, b, c) -> ((a + b) + c); + +DROP TABLE IF EXISTS t4_2; +CREATE TABLE t4_2 +( + `col1` Int64 NOT NULL COMMENT 'test', + `col2` Float64 NOT NULL, + `col3` Int64 NOT NULL, + INDEX ind4 test_func_1(col1, col3, col1) TYPE set(51) GRANULARITY 5 +) +ENGINE = MergeTree +ORDER BY col1 +; + +INSERT INTO t4_2 (col1, col2, col3) SELECT number, number, number FROM numbers(10); + +SELECT * FROM t4_2 WHERE test_func_1(col1, col3, col1) = 6 +SETTINGS force_data_skipping_indices = 'ind4'; diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference new file mode 100644 index 000000000000..08839f6bb296 --- /dev/null +++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference @@ -0,0 +1 @@ +200 diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql new file mode 100644 index 000000000000..25ec1c8fd80e --- /dev/null +++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql @@ -0,0 +1,19 @@ +create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; + +system stop merges t; + +-- for this part the first columns is useless, so we have to use both +insert into t select 42, number from numbers_mt(100); + +-- for this part the first columns is enough +insert into t select number, number from numbers_mt(100); + +-- force reloading index +detach table t; +attach table t; + +set merge_tree_min_bytes_for_concurrent_read=1, merge_tree_min_rows_for_concurrent_read=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=1.0, max_threads=4; + +-- the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough, +-- because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`) +select count() from t where not ignore(*); diff --git a/tests/queries/0_stateless/03033_scalars_context_data_race.reference b/tests/queries/0_stateless/03033_scalars_context_data_race.reference new file mode 100644 index 000000000000..f96ac0672183 --- /dev/null +++ b/tests/queries/0_stateless/03033_scalars_context_data_race.reference @@ -0,0 +1 @@ +105 diff --git a/tests/queries/0_stateless/03033_scalars_context_data_race.sql b/tests/queries/0_stateless/03033_scalars_context_data_race.sql new file mode 100644 index 000000000000..8c72bb53c725 --- /dev/null +++ b/tests/queries/0_stateless/03033_scalars_context_data_race.sql @@ -0,0 +1,104 @@ +DROP TABLE IF EXISTS test; +DROP TABLE IF EXISTS test_tmp; +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS view; + +CREATE TABLE test +( + `address` FixedString(20), + `deployer` FixedString(20), + `block_number` UInt256, + `block_hash` FixedString(32), + `block_timestamp` DateTime('UTC'), + `insertion_time` DateTime('UTC') +) +ENGINE = MergeTree +ORDER BY address +SETTINGS index_granularity = 8192; + +CREATE TABLE test_tmp as test; + +CREATE TABLE dst +( + `block_timestamp` AggregateFunction(max, Nullable(DateTime('UTC'))), + `block_hash` AggregateFunction(argMax, Nullable(FixedString(32)), DateTime('UTC')), + `block_number` AggregateFunction(argMax, Nullable(UInt256), DateTime('UTC')), + `deployer` AggregateFunction(argMax, Nullable(FixedString(20)), DateTime('UTC')), + `address` FixedString(20), + `name` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `symbol` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `decimals` AggregateFunction(argMax, Nullable(UInt8), DateTime('UTC')), + `is_proxy` AggregateFunction(argMax, Nullable(Bool), DateTime('UTC')), + `blacklist_flags` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `whitelist_flags` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `detected_standards` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `amended_type` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `comment` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `_sources` AggregateFunction(groupUniqArray, String), + `_updated_at` AggregateFunction(max, DateTime('UTC')), + `_active` AggregateFunction(argMax, Bool, DateTime('UTC')) +) +ENGINE = MergeTree +ORDER BY address +SETTINGS index_granularity = 8192; + +CREATE MATERIALIZED VIEW view TO dst +( + `block_timestamp` AggregateFunction(max, Nullable(DateTime('UTC'))), + `block_hash` AggregateFunction(argMax, Nullable(FixedString(32)), DateTime('UTC')), + `block_number` AggregateFunction(argMax, Nullable(UInt256), DateTime('UTC')), + `deployer` AggregateFunction(argMax, Nullable(FixedString(20)), DateTime('UTC')), + `address` FixedString(20), + `name` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `symbol` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `decimals` AggregateFunction(argMax, Nullable(UInt8), DateTime('UTC')), + `is_proxy` AggregateFunction(argMax, Nullable(Bool), DateTime('UTC')), + `blacklist_flags` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `whitelist_flags` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `detected_standards` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `amended_type` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `comment` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `_sources` AggregateFunction(groupUniqArray, String), + `_updated_at` AggregateFunction(max, DateTime('UTC')), + `_active` AggregateFunction(argMax, Bool, DateTime('UTC')) +) AS +(WITH ( + SELECT toDateTime('1970-01-01 00:00:00') + ) AS default_timestamp +SELECT + maxState(CAST(block_timestamp, 'Nullable(DateTime(\'UTC\'))')) AS block_timestamp, + argMaxState(CAST(block_hash, 'Nullable(FixedString(32))'), insertion_time) AS block_hash, + argMaxState(CAST(block_number, 'Nullable(UInt256)'), insertion_time) AS block_number, + argMaxState(CAST(deployer, 'Nullable(FixedString(20))'), insertion_time) AS deployer, + address, + argMaxState(CAST(NULL, 'Nullable(String)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS name, + argMaxState(CAST(NULL, 'Nullable(String)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS symbol, + argMaxState(CAST(NULL, 'Nullable(UInt8)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS decimals, + argMaxState(CAST(true, 'Nullable(Boolean)'), insertion_time) AS is_proxy, + argMaxState(CAST('[]', 'Array(Nullable(String))'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS blacklist_flags, + argMaxState(CAST('[]', 'Array(Nullable(String))'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS whitelist_flags, + argMaxState(CAST('[]', 'Array(Nullable(String))'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS detected_standards, + argMaxState(CAST(NULL, 'Nullable(String)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS amended_type, + argMaxState(CAST(NULL, 'Nullable(String)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS comment, + groupUniqArrayState('tokens_proxy_deployments') AS _sources, + maxState(insertion_time) AS _updated_at, + argMaxState(true, CAST(default_timestamp, 'DateTime(\'UTC\')')) AS _active +FROM test +WHERE insertion_time > toDateTime('2024-03-14 11:38:09') +GROUP BY address); + +set max_insert_threads=4; +insert into test_tmp select * from generateRandom() limit 24; +insert into test_tmp select * from generateRandom() limit 25; +insert into test_tmp select * from generateRandom() limit 26; +insert into test_tmp select * from generateRandom() limit 30; + +INSERT INTO test(address, deployer, block_number, block_hash, block_timestamp, insertion_time) SELECT * FROM test_tmp; + +select count() from test; + +DROP TABLE test; +DROP TABLE test_tmp; +DROP TABLE dst; +DROP TABLE view; + diff --git a/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.reference b/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.reference new file mode 100644 index 000000000000..f0ce16499393 --- /dev/null +++ b/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.reference @@ -0,0 +1,9 @@ +(3,2,1) +(2,1,0) +(0,0,0) +(3,2,1) +(2,1,0) +(3,2,1) +(0,0,0) +(0,1,1) +(1,0,1) diff --git a/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.sql b/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.sql new file mode 100644 index 000000000000..2cb7e726a4bf --- /dev/null +++ b/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.sql @@ -0,0 +1,13 @@ +SELECT tupleIntDiv((15, 10, 5), (0, 0, 0)); -- { serverError ILLEGAL_DIVISION } +SELECT tupleIntDiv((15, 10, 5), (5, 5, 5)); +SELECT tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5)); +SELECT tupleIntDivOrZero((5, 10, 15), (0, 0, 0)); -- no error thrown for zero divisors +SELECT tupleIntDivByNumber((15, 10, 5), 0); -- { serverError ILLEGAL_DIVISION } +SELECT tupleIntDivByNumber((15, 10, 5), 5); +SELECT tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8); +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 5); +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 0); -- no error thrown for zero divisors +SELECT tupleModulo((15, 10, 5), (0, 3, 2)); -- { serverError ILLEGAL_DIVISION } +SELECT tupleModulo((15, 10, 5), (5, 3, 2)); +SELECT tupleModuloByNumber((15, 10, 5), 0); -- { serverError ILLEGAL_DIVISION } +SELECT tupleModuloByNumber((15, 10, 5), 2); \ No newline at end of file diff --git a/tests/queries/0_stateless/03033_with_fill_interpolate.reference b/tests/queries/0_stateless/03033_with_fill_interpolate.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03033_with_fill_interpolate.sql b/tests/queries/0_stateless/03033_with_fill_interpolate.sql new file mode 100644 index 000000000000..0ec0050a9221 --- /dev/null +++ b/tests/queries/0_stateless/03033_with_fill_interpolate.sql @@ -0,0 +1,28 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/55794 +SET allow_experimental_analyzer=1; +DROP TABLE IF EXISTS 03033_example_table; + +CREATE TABLE 03033_example_table +( + ColumnA Int64, + ColumnB Int64, + ColumnC Int64 +) +ENGINE = MergeTree() +ORDER BY ColumnA; + +WITH +helper AS ( + SELECT + * + FROM + 03033_example_table + ORDER BY + ColumnA WITH FILL INTERPOLATE ( + ColumnB AS ColumnC, + ColumnC AS ColumnA + ) +) +SELECT ColumnB FROM helper; + +DROP TABLE IF EXISTS 03033_example_table; diff --git a/tests/queries/0_stateless/03034_json_extract_variant.reference b/tests/queries/0_stateless/03034_json_extract_variant.reference new file mode 100644 index 000000000000..8d78d639fd26 --- /dev/null +++ b/tests/queries/0_stateless/03034_json_extract_variant.reference @@ -0,0 +1,5 @@ +42 UInt32 +Hello String +[1,2,3] Array(UInt32) +{'a':42,'b':'Hello','c':[1,2,3]} +[('a',42),('b','Hello'),('c',[1,2,3])] Array(Tuple(String, Variant(Array(UInt32), String, UInt32))) diff --git a/tests/queries/0_stateless/03034_json_extract_variant.sql b/tests/queries/0_stateless/03034_json_extract_variant.sql new file mode 100644 index 000000000000..54d5bed9582c --- /dev/null +++ b/tests/queries/0_stateless/03034_json_extract_variant.sql @@ -0,0 +1,6 @@ +select JSONExtract('{"a" : 42}', 'a', 'Variant(String, UInt32)') as v, variantType(v); +select JSONExtract('{"a" : "Hello"}', 'a', 'Variant(String, UInt32)') as v, variantType(v); +select JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Variant(String, Array(UInt32))') as v, variantType(v); +select JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))'); +select JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') as v, toTypeName(v); + diff --git a/tests/queries/0_stateless/03034_normalized_ast.reference b/tests/queries/0_stateless/03034_normalized_ast.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03034_normalized_ast.sql b/tests/queries/0_stateless/03034_normalized_ast.sql new file mode 100644 index 000000000000..385af4e2c34c --- /dev/null +++ b/tests/queries/0_stateless/03034_normalized_ast.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/49472 +SET allow_experimental_analyzer=1; +SELECT + concat(database, table) AS name, + count() +FROM clusterAllReplicas(test_shard_localhost, system.tables) +WHERE database=currentDatabase() +GROUP BY name +FORMAT Null; diff --git a/tests/queries/0_stateless/03035_alias_column_bug_distributed.reference b/tests/queries/0_stateless/03035_alias_column_bug_distributed.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql new file mode 100644 index 000000000000..74463743b011 --- /dev/null +++ b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql @@ -0,0 +1,44 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/44414 +SET allow_experimental_analyzer=1; +DROP TABLE IF EXISTS alias_bug; +DROP TABLE IF EXISTS alias_bug_dist; +CREATE TABLE alias_bug +( + `src` String, + `theAlias` String ALIAS trimBoth(src) +) +ENGINE = MergeTree() +ORDER BY src; + +CREATE TABLE alias_bug_dist +AS alias_bug +ENGINE = Distributed('test_shard_localhost', currentDatabase(), 'alias_bug', rand()); + +INSERT INTO alias_bug VALUES ('SOURCE1'); + +-- OK +SELECT theAlias,CAST(NULL, 'Nullable(String)') AS src FROM alias_bug LIMIT 1 FORMAT Null; + +-- Not OK +SELECT theAlias,CAST(NULL, 'Nullable(String)') AS src FROM alias_bug_dist LIMIT 1 FORMAT Null; + +DROP TABLE IF EXISTS alias_bug; +DROP TABLE IF EXISTS alias_bug_dist; +CREATE TABLE alias_bug +( + `s` String, + `src` String, + `theAlias` String ALIAS trimBoth(src) +) +ENGINE = MergeTree() +ORDER BY src; + +CREATE TABLE alias_bug_dist +AS alias_bug +ENGINE = Distributed('test_shard_localhost', currentDatabase(), 'alias_bug', rand()); + +-- Unknown identifier +SELECT CAST(123, 'String') AS src,theAlias FROM alias_bug_dist LIMIT 1 FORMAT Null; + +DROP TABLE IF EXISTS alias_bug; +DROP TABLE IF EXISTS alias_bug_dist; diff --git a/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.reference b/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.reference new file mode 100644 index 000000000000..be07c950fea2 --- /dev/null +++ b/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.reference @@ -0,0 +1,12 @@ +Row 1: +────── +max(time): 2021-01-01 00:00:59.000 +max(toNullable(time)): 2021-01-01 00:00:59.000 +min(time): 2021-01-01 00:00:00.000 +min(toNullable(time)): 2021-01-01 00:00:00.000 +argMax(value, time): -1 +argMax(value, toNullable(time)): -1 +argMin(value, time): 0 +argMin(value, toNullable(time)): 0 +argMinIf(value, toNullable(time), notEquals(time, '2021-01-01 00:00:00.000')): 1 +argMaxIf(value, toNullable(time), notEquals(time, '2021-01-01 00:00:59.000')): -2 diff --git a/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.sql b/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.sql new file mode 100644 index 000000000000..deb580b90404 --- /dev/null +++ b/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.sql @@ -0,0 +1,26 @@ +CREATE TABLE IF NOT EXISTS test +( + `value` Float64 CODEC(Delta, LZ4), + `uuid` LowCardinality(String), + `time` DateTime64(3, 'UTC') CODEC(DoubleDelta, LZ4) +) +ENGINE = MergeTree() +ORDER BY uuid; + + +INSERT INTO test (uuid, time, value) +VALUES ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:00.000',0), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:09.000',1), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:10.000',2), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:19.000',3), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:20.000',2), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:29.000',1), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:30.000',0), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:39.000',-1), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:40.000',-2), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:49.000',-3), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:50.000',-2), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:59.000',-1), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:01:00.000',0); + +SELECT + max(time), + max(toNullable(time)), + min(time), + min(toNullable(time)), + argMax(value, time), + argMax(value, toNullable(time)), + argMin(value, time), + argMin(value, toNullable(time)), + argMinIf(value, toNullable(time), time != '2021-01-01 00:00:00.000'), + argMaxIf(value, toNullable(time), time != '2021-01-01 00:00:59.000'), +FROM test +WHERE (time >= fromUnixTimestamp64Milli(1609459200000, 'UTC')) AND (time < fromUnixTimestamp64Milli(1609459260000, 'UTC')) FORMAT Vertical; diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.reference b/tests/queries/0_stateless/03035_max_insert_threads_support.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh new file mode 100755 index 000000000000..1e6bfb414d80 --- /dev/null +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" + +$CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" + EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC +" | grep -o MaterializingTransform | wc -l + +DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") +rm $DATA_FILE_PATH diff --git a/tests/queries/0_stateless/03035_morton_encode_no_rows.reference b/tests/queries/0_stateless/03035_morton_encode_no_rows.reference new file mode 100644 index 000000000000..dc8bb19a028d --- /dev/null +++ b/tests/queries/0_stateless/03035_morton_encode_no_rows.reference @@ -0,0 +1,2 @@ +4294967286 +4294967286 diff --git a/tests/queries/0_stateless/03035_morton_encode_no_rows.sql b/tests/queries/0_stateless/03035_morton_encode_no_rows.sql new file mode 100644 index 000000000000..2663b1ac2126 --- /dev/null +++ b/tests/queries/0_stateless/03035_morton_encode_no_rows.sql @@ -0,0 +1,2 @@ +SELECT mortonEncode(materialize((1, 1)), 65534, 65533); +SELECT mortonEncode((1, 1), 65534, 65533); diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference new file mode 100644 index 000000000000..00740e6380f4 --- /dev/null +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference @@ -0,0 +1,710 @@ +-- { echoOn } + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 3 + ALIAS id :: 0 -> __table1.id UInt64 : 4 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 4, 6_UInt8 :: 2) -> equals(__table1.id, 6_UInt8) UInt8 : 1 + FUNCTION equals(__table1.id : 4, 5_UInt8 :: 3) -> equals(__table1.id, 5_UInt8) UInt8 : 2 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 2, equals(__table1.id, 6_UInt8) :: 1) -> and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) UInt8 : 3 + Positions: 3 4 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 3 + ALIAS id :: 0 -> __table2.id UInt64 : 4 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 4, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + FUNCTION equals(__table2.id : 4, 6_UInt8 :: 3) -> equals(__table2.id, 6_UInt8) UInt8 : 2 + FUNCTION and(equals(__table2.id, 6_UInt8) :: 2, equals(__table2.id, 5_UInt8) :: 1) -> and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) UInt8 : 3 + Positions: 3 4 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: LEFT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT :: 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 4) -> equals(__table2.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: LEFT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT :: 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: RIGHT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: RIGHT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT :: 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: FULL + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT :: 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 4) -> equals(__table2.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: FULL + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) (removed) + Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 5 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 6 + FUNCTION equals(__table2.id : 3, 6_UInt8 :: 5) -> equals(__table2.id, 6_UInt8) UInt8 : 4 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 6, equals(__table2.id, 6_UInt8) :: 4) -> and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: FULL + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql new file mode 100644 index 000000000000..9627b55e6337 --- /dev/null +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql @@ -0,0 +1,131 @@ +SET allow_experimental_analyzer = 1; +SET optimize_move_to_prewhere = 0; + +DROP TABLE IF EXISTS test_table_1; +CREATE TABLE test_table_1 +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +CREATE TABLE test_table_2 +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table_1 SELECT number, number FROM numbers(10); +INSERT INTO test_table_2 SELECT number, number FROM numbers(10); + +-- { echoOn } + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +-- { echoOff } + +DROP TABLE test_table_1; +DROP TABLE test_table_2; diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference new file mode 100644 index 000000000000..8820bb7cb9f7 --- /dev/null +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference @@ -0,0 +1,40 @@ +Parquet +a UInt64 +a_nullable Nullable(UInt64) +Arrow +a UInt64 +a_nullable Nullable(UInt64) +Parquet +b Array(Nullable(UInt64)) +b_nullable Array(Nullable(UInt64)) +Arrow +b Array(Nullable(UInt64)) +b_nullable Array(Nullable(UInt64)) +Parquet +c Tuple(\n a UInt64,\n b String) +c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +Arrow +c Tuple(\n a UInt64,\n b String) +c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +Parquet +d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) +Arrow +d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) +Parquet +e Map(UInt64, Nullable(String)) +e_nullable Map(UInt64, Nullable(String)) +Arrow +e Map(UInt64, Nullable(String)) +e_nullable Map(UInt64, Nullable(String)) +Parquet +f Map(UInt64, Map(UInt64, Nullable(String))) +f_nullables Map(UInt64, Map(UInt64, Nullable(String))) +Arrow +f Map(UInt64, Map(UInt64, Nullable(String))) +f_nullables Map(UInt64, Map(UInt64, Nullable(String))) +Parquet +g String +g_nullable Nullable(String) +Arrow +g LowCardinality(String) +g_nullable LowCardinality(String) diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh new file mode 100755 index 000000000000..bdd641e2b943 --- /dev/null +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.data + +formats="Parquet Arrow" + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, a_nullable Nullable(UInt64)', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('b Array(UInt64), b_nullable Array(Nullable(UInt64))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('c Tuple(a UInt64, b String), c_nullable Tuple(a Nullable(UInt64), b Nullable(String))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('d Tuple(a UInt64, b Tuple(a UInt64, b String), d_nullable Tuple(a UInt64, b Tuple(a Nullable(UInt64), b Nullable(String))))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('e Map(UInt64, String), e_nullable Map(UInt64, Nullable(String))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('f Map(UInt64, Map(UInt64, String)), f_nullables Map(UInt64, Map(UInt64, Nullable(String)))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('g LowCardinality(String), g_nullable LowCardinality(Nullable(String))', 42) limit 10 settings output_format_arrow_low_cardinality_as_dictionary=1, allow_suspicious_low_cardinality_types=1 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.reference b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference new file mode 100644 index 000000000000..2599763b762f --- /dev/null +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference @@ -0,0 +1 @@ +[4,5,6] diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.sql b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql new file mode 100644 index 000000000000..8b9ebb775a37 --- /dev/null +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (A Array(Int64)) Engine = MergeTree ORDER BY tuple(); +INSERT INTO t VALUES ([1,2,3]), ([4,5,6]), ([7,8,9]); + +SELECT * FROM t PREWHERE arrayExists(x -> x = 5, A); + +DROP TABLE t; diff --git a/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference new file mode 100644 index 000000000000..251d054748a5 --- /dev/null +++ b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference @@ -0,0 +1 @@ +Unknown function diff --git a/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh new file mode 100755 index 000000000000..e0a145d84566 --- /dev/null +++ b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE test (s String) ENGINE = Memory" + +# Calling an unknown function should not lead to creation of a 'user_defined' directory in the current directory +${CLICKHOUSE_CLIENT} --query "INSERT INTO test VALUES (xyz('abc'))" 2>&1 | grep -o -F 'Unknown function' + +ls -ld user_defined 2> /dev/null + +${CLICKHOUSE_CLIENT} --query "DROP TABLE test" diff --git a/tests/queries/0_stateless/03036_with_numbers.reference b/tests/queries/0_stateless/03036_with_numbers.reference new file mode 100644 index 000000000000..7b36cc96f5ec --- /dev/null +++ b/tests/queries/0_stateless/03036_with_numbers.reference @@ -0,0 +1,20 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/03036_with_numbers.sql b/tests/queries/0_stateless/03036_with_numbers.sql new file mode 100644 index 000000000000..3463ce826e2d --- /dev/null +++ b/tests/queries/0_stateless/03036_with_numbers.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/13843 +SET allow_experimental_analyzer=1; +WITH 10 AS n +SELECT * +FROM numbers(n); + +WITH cast(10, 'UInt64') AS n +SELECT * +FROM numbers(n); diff --git a/tests/queries/0_stateless/03037_dot_product_overflow.reference b/tests/queries/0_stateless/03037_dot_product_overflow.reference new file mode 100644 index 000000000000..573541ac9702 --- /dev/null +++ b/tests/queries/0_stateless/03037_dot_product_overflow.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03037_dot_product_overflow.sql b/tests/queries/0_stateless/03037_dot_product_overflow.sql new file mode 100644 index 000000000000..94d5eba62552 --- /dev/null +++ b/tests/queries/0_stateless/03037_dot_product_overflow.sql @@ -0,0 +1,2 @@ +select ignore(dotProduct(materialize([9223372036854775807, 1]), materialize([-3, 1]))); + diff --git a/tests/queries/0_stateless/03037_union_view.reference b/tests/queries/0_stateless/03037_union_view.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03037_union_view.sql b/tests/queries/0_stateless/03037_union_view.sql new file mode 100644 index 000000000000..3ea81b829bab --- /dev/null +++ b/tests/queries/0_stateless/03037_union_view.sql @@ -0,0 +1,31 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/55803 +SET allow_experimental_analyzer=1; +DROP TABLE IF EXISTS broken_table; +DROP TABLE IF EXISTS broken_view; + +CREATE TABLE broken_table +( + start DateTime64(6), + end DateTime64(6), +) +ENGINE = ReplacingMergeTree(start) +ORDER BY (start); + +CREATE VIEW broken_view as +SELECT + t.start as start, + t.end as end, + cast(datediff('second', t.start, t.end) as float) as total_sec +FROM broken_table t FINAL +UNION ALL +SELECT + null as start, + null as end, + null as total_sec; + +SELECT v.start, v.total_sec +FROM broken_view v FINAL +WHERE v.start IS NOT NULL; + +DROP TABLE IF EXISTS broken_table; +DROP TABLE IF EXISTS broken_view; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.reference b/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql b/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql new file mode 100644 index 000000000000..08fafd6ddfa8 --- /dev/null +++ b/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql @@ -0,0 +1,2 @@ +select * from numbers(1, 10, 0); -- {serverError BAD_ARGUMENTS} + diff --git a/tests/queries/0_stateless/03038_ambiguous_column.reference b/tests/queries/0_stateless/03038_ambiguous_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03038_ambiguous_column.sql b/tests/queries/0_stateless/03038_ambiguous_column.sql new file mode 100644 index 000000000000..9df3cd9bc9bd --- /dev/null +++ b/tests/queries/0_stateless/03038_ambiguous_column.sql @@ -0,0 +1,42 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/48308 +SET allow_experimental_analyzer=1; +DROP TABLE IF EXISTS 03038_table; + +CREATE TABLE 03038_table +( + `time` DateTime +) +ENGINE = MergeTree +ORDER BY time; + +SELECT * +FROM +( + SELECT + toUInt64(time) AS time, + toHour(03038_table.time) + FROM 03038_table +) +ORDER BY time ASC; + +WITH subquery AS ( + SELECT + toUInt64(time) AS time, + toHour(03038_table.time) + FROM 03038_table +) +SELECT * +FROM subquery +ORDER BY subquery.time ASC; + +SELECT * +FROM +( + SELECT + toUInt64(time) AS time, + toHour(03038_table.time) AS hour + FROM 03038_table +) +ORDER BY time ASC, hour; + +DROP TABLE IF EXISTS 03038_table; diff --git a/tests/queries/0_stateless/03039_unknown_identifier_window_function.reference b/tests/queries/0_stateless/03039_unknown_identifier_window_function.reference new file mode 100644 index 000000000000..405da28a5798 --- /dev/null +++ b/tests/queries/0_stateless/03039_unknown_identifier_window_function.reference @@ -0,0 +1,40 @@ +0 10 +1 10 +2 10 +3 10 +4 10 +5 10 +6 10 +7 10 +8 10 +9 10 +0 10 +1 10 +2 10 +3 10 +4 10 +5 10 +6 10 +7 10 +8 10 +9 10 +0 10 0 +1 10 1 +2 10 2 +3 10 3 +4 10 4 +5 10 5 +6 10 6 +7 10 7 +8 10 8 +9 10 9 +0 10 +1 10 +2 10 +3 10 +4 10 +5 10 +6 10 +7 10 +8 10 +9 10 diff --git a/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql b/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql new file mode 100644 index 000000000000..640d217d2f96 --- /dev/null +++ b/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql @@ -0,0 +1,35 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/45535 +SET allow_experimental_analyzer=1; + +SELECT + *, + count() OVER () AS c +FROM numbers(10) +ORDER BY toString(number); + + +WITH + toString(number) as str +SELECT + *, + count() OVER () AS c +FROM numbers(10) +ORDER BY str; + +SELECT + *, + count() OVER () AS c, + toString(number) as str +FROM numbers(10) +ORDER BY str; + + +WITH + test AS ( + SELECT + *, + count() OVER () AS c + FROM numbers(10) + ) +SELECT * FROM test +ORDER BY toString(number); diff --git a/tests/queries/0_stateless/03040_alias_column_join.reference b/tests/queries/0_stateless/03040_alias_column_join.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03040_alias_column_join.sql b/tests/queries/0_stateless/03040_alias_column_join.sql new file mode 100644 index 000000000000..54f579c0feb1 --- /dev/null +++ b/tests/queries/0_stateless/03040_alias_column_join.sql @@ -0,0 +1,14 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/44365 +SET allow_experimental_analyzer=1; +DROP TABLE IF EXISTS 03040_test; + +CREATE TABLE 03040_test +( + id UInt64, + val String alias 'value: '||toString(id) +) ENGINE = MergeTree +ORDER BY tuple(); + +SELECT val FROM 03040_test t GROUP BY val; + +DROP TABLE IF EXISTS 03040_test; diff --git a/tests/queries/0_stateless/03040_array_sum_and_join.reference b/tests/queries/0_stateless/03040_array_sum_and_join.reference new file mode 100644 index 000000000000..d81fd9a2f69c --- /dev/null +++ b/tests/queries/0_stateless/03040_array_sum_and_join.reference @@ -0,0 +1,5 @@ +79 name1 42.7027027027027 +62 name2 33.513513513513516 +44 name3 23.783783783783786 +[[1,2],[1,2]] +[(3,[1,2]),(4,[1,2])] diff --git a/tests/queries/0_stateless/03040_array_sum_and_join.sql b/tests/queries/0_stateless/03040_array_sum_and_join.sql new file mode 100644 index 000000000000..9aeddc9f7652 --- /dev/null +++ b/tests/queries/0_stateless/03040_array_sum_and_join.sql @@ -0,0 +1,27 @@ +SET allow_experimental_analyzer=1; + +select t.1 as cnt, + t.2 as name, + t.3 as percent +from ( + select arrayJoin(result) as t + from ( + select [ + (79, 'name1'), + (62, 'name2'), + (44, 'name3') + ] as data, + arraySum(arrayMap(t -> t.1, data)) as total, + arrayMap(t -> + tuple(t.1, t.2, + multiIf(total = 0, 0, t.1 > 0 and t.1 < 10, -1.0, + (toFloat32(t.1) / toFloat32(total)) * 100) + ), + data + ) as result + ) + ); + +SELECT arrayMap(x -> arrayMap(x -> (x.1), [(1, 1), (2, 2)]), [(3, 3), (4, 4)]); + +SELECT arrayMap(x -> (x.1, arrayMap(x -> (x.1), [(1, 1), (2, 2)])), [(3, 3), (4, 4)]); diff --git a/tests/queries/0_stateless/03041_analyzer_gigachad_join.reference b/tests/queries/0_stateless/03041_analyzer_gigachad_join.reference new file mode 100644 index 000000000000..a859a6005123 --- /dev/null +++ b/tests/queries/0_stateless/03041_analyzer_gigachad_join.reference @@ -0,0 +1 @@ +123456789 111 222 diff --git a/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql b/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql new file mode 100644 index 000000000000..7906e65f8b8b --- /dev/null +++ b/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql @@ -0,0 +1,15 @@ +SET allow_experimental_analyzer=1; +CREATE TABLE IF NOT EXISTS first engine = MergeTree PARTITION BY (inn, toYYYYMM(received)) ORDER BY (inn, sessionId) +AS SELECT now() AS received, '123456789' AS inn, '42' AS sessionId; + +CREATE TABLE IF NOT EXISTS second engine = MergeTree PARTITION BY (inn, toYYYYMM(received)) ORDER BY (inn, sessionId) +AS SELECT now() AS received, '123456789' AS inn, '42' AS sessionId, '111' AS serial, '222' AS reg; + +SELECT alias_first.inn, arrayFirst(t -> isNotNull(t), regInfo.1), arrayFirst(t -> isNotNull(t), regInfo.2) + FROM first AS alias_first + INNER JOIN ( + SELECT alias_second.inn, alias_second.sessionId, groupArray((serial, reg)) AS regInfo + FROM second AS alias_second + GROUP BY inn, sessionId + ) AS resp ON (alias_first.inn = resp.inn) AND (alias_first.sessionId = resp.sessionId) +WHERE if('123456789' IS NOT NULL, alias_first.inn = '123456789', 1) diff --git a/tests/queries/0_stateless/03041_select_with_query_result.reference b/tests/queries/0_stateless/03041_select_with_query_result.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03041_select_with_query_result.sql b/tests/queries/0_stateless/03041_select_with_query_result.sql new file mode 100644 index 000000000000..061223b43e13 --- /dev/null +++ b/tests/queries/0_stateless/03041_select_with_query_result.sql @@ -0,0 +1,42 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/44153 +SET allow_experimental_analyzer=1; +DROP TABLE IF EXISTS parent; +DROP TABLE IF EXISTS join_table_1; +DROP TABLE IF EXISTS join_table_2; + +CREATE TABLE parent( + a_id Int64, + b_id Int64, + c_id Int64, + created_at Int64 +) +ENGINE=MergeTree() +ORDER BY (a_id, b_id, c_id, created_at); + +CREATE TABLE join_table_1( + a_id Int64, + b_id Int64 +) +ENGINE=MergeTree() +ORDER BY (a_id, b_id); + +CREATE TABLE join_table_2( + c_id Int64, + created_at Int64 +) +ENGINE=MergeTree() +ORDER BY (c_id, created_at); + +WITH with_table as ( + SELECT p.a_id, p.b_id, p.c_id FROM parent p + LEFT JOIN join_table_1 jt1 ON jt1.a_id = p.a_id AND jt1.b_id = p.b_id + LEFT JOIN join_table_2 jt2 ON jt2.c_id = p.c_id + WHERE + p.a_id = 0 AND (jt2.c_id = 0 OR p.created_at = 0) +) +SELECT p.a_id, p.b_id, COUNT(*) as f_count FROM with_table +GROUP BY p.a_id, p.b_id; + +DROP TABLE IF EXISTS parent; +DROP TABLE IF EXISTS join_table_1; +DROP TABLE IF EXISTS join_table_2; diff --git a/tests/queries/0_stateless/03042_analyzer_alias_join.reference b/tests/queries/0_stateless/03042_analyzer_alias_join.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03042_analyzer_alias_join.sql b/tests/queries/0_stateless/03042_analyzer_alias_join.sql new file mode 100644 index 000000000000..dac3b6a4983e --- /dev/null +++ b/tests/queries/0_stateless/03042_analyzer_alias_join.sql @@ -0,0 +1,21 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/14978 +SET allow_experimental_analyzer=1; +CREATE TABLE test1(id UInt64, t1value UInt64) ENGINE=MergeTree ORDER BY tuple(); +CREATE TABLE test2(id UInt64, t2value String) ENGINE=MergeTree ORDER BY tuple(); + +SELECT NULL AS t2value +FROM test1 t1 +LEFT JOIN ( + SELECT id, t2value FROM test2 +) t2 +ON t1.id=t2.id +WHERE t2.t2value='test'; + +-- workaround should work too +SELECT NULL AS _svalue +FROM test1 t1 +LEFT JOIN ( + SELECT id, t2value FROM test2 +) t2 +ON t1.id=t2.id +WHERE t2.t2value='test'; diff --git a/tests/queries/0_stateless/03042_not_found_column_c1.reference b/tests/queries/0_stateless/03042_not_found_column_c1.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03042_not_found_column_c1.sql b/tests/queries/0_stateless/03042_not_found_column_c1.sql new file mode 100644 index 000000000000..b4dce2af4895 --- /dev/null +++ b/tests/queries/0_stateless/03042_not_found_column_c1.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/42399 +SET allow_experimental_analyzer=1; + +CREATE TABLE IF NOT EXISTS t0 (c0 Int32) ENGINE = Memory() ; +CREATE TABLE t1 (c0 Int32, c1 Int32, c2 Int32) ENGINE = Memory() ; +CREATE TABLE t2 (c0 String, c1 String MATERIALIZED (c2), c2 Int32) ENGINE = Memory() ; +CREATE TABLE t3 (c0 String, c1 String, c2 String) ENGINE = Log() ; +CREATE TABLE IF NOT EXISTS t4 (c0 Int32) ENGINE = Log() ; +SELECT t3.c1, t3.c2, t1.c1, t1.c0, t2.c2, t0.c0, t1.c2, t2.c1, t4.c0 FROM t3, t0, t1, t2, t4; diff --git a/tests/queries/0_stateless/03043_group_array_result_is_expected.reference b/tests/queries/0_stateless/03043_group_array_result_is_expected.reference new file mode 100644 index 000000000000..d43aa556dce1 --- /dev/null +++ b/tests/queries/0_stateless/03043_group_array_result_is_expected.reference @@ -0,0 +1 @@ +['2021-07-01','2021-07-02','2021-07-03','2021-07-04','2021-07-05','2021-07-06','2021-07-07','2021-07-08','2021-07-09','2021-07-10','2021-07-11','2021-07-12','2021-07-13','2021-07-14','2021-07-15','2021-07-16','2021-07-17','2021-07-18','2021-07-19','2021-07-20','2021-07-21','2021-07-22','2021-07-23','2021-07-24','2021-07-25','2021-07-26','2021-07-27','2021-07-28','2021-07-29'] 29 diff --git a/tests/queries/0_stateless/03043_group_array_result_is_expected.sql b/tests/queries/0_stateless/03043_group_array_result_is_expected.sql new file mode 100644 index 000000000000..5311927ae3cf --- /dev/null +++ b/tests/queries/0_stateless/03043_group_array_result_is_expected.sql @@ -0,0 +1,45 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/27115 +SET allow_experimental_analyzer=1; +drop table if exists fill_ex; + +create table fill_ex ( + eventDate Date , + storeId String +) +engine = ReplacingMergeTree() +partition by toYYYYMM(eventDate) +order by (storeId,eventDate); + +insert into fill_ex (eventDate,storeId) values ('2021-07-16','s') ('2021-07-17','ee'); + +select + groupArray(key) as keys, + count() as c +from + ( + select + *, + eventDate as key + from + ( + select + eventDate + from + ( + select + eventDate + from + fill_ex final + where + eventDate >= toDate('2021-07-01') + and eventDate= 2019; + +DROP TABLE IF EXISTS ab_12_aaa; +DROP TABLE IF EXISTS ab_12_bbb; diff --git a/tests/queries/0_stateless/03049_analyzer_group_by_alias.reference b/tests/queries/0_stateless/03049_analyzer_group_by_alias.reference new file mode 100644 index 000000000000..dac5487d4455 --- /dev/null +++ b/tests/queries/0_stateless/03049_analyzer_group_by_alias.reference @@ -0,0 +1,7 @@ +1 5 +2 4 +1 +2 +1 +1 +2 diff --git a/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql b/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql new file mode 100644 index 000000000000..d25babe6788b --- /dev/null +++ b/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql @@ -0,0 +1,23 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/7520 +SET allow_experimental_analyzer=1; +CREATE TABLE test (`a` UInt32, `b` UInt32) ENGINE = Memory; + +INSERT INTO test VALUES (1,2), (1,3), (2,4); + +-- 1 5 +-- 2 4 + +WITH + a as key +SELECT + a as k1, + sum(b) as k2 +FROM + test +GROUP BY + key +ORDER BY k1, k2; + +WITH a as key SELECT key as k1 FROM test GROUP BY key ORDER BY key; + +WITH a as key SELECT key as k1 FROM test ORDER BY key; diff --git a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.reference b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql new file mode 100644 index 000000000000..938f270b9e4c --- /dev/null +++ b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql @@ -0,0 +1,14 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/54317 +SET allow_experimental_analyzer=1; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; + +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; +USE {CLICKHOUSE_DATABASE:Identifier}; + +CREATE TABLE l (y String) Engine Memory; +CREATE TABLE r (d Date, y String, ty UInt16 MATERIALIZED toYear(d)) Engine Memory; +select * from l L left join r R on L.y = R.y where R.ty >= 2019; +select * from l left join r on l.y = r.y where r.ty >= 2019; +select * from {CLICKHOUSE_DATABASE:Identifier}.l left join {CLICKHOUSE_DATABASE:Identifier}.r on l.y = r.y where r.ty >= 2019; + +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/03050_select_one_one_one.reference b/tests/queries/0_stateless/03050_select_one_one_one.reference new file mode 100644 index 000000000000..85e6138dc5d6 --- /dev/null +++ b/tests/queries/0_stateless/03050_select_one_one_one.reference @@ -0,0 +1,2 @@ +1 1 1 +1 1 1 diff --git a/tests/queries/0_stateless/03050_select_one_one_one.sql b/tests/queries/0_stateless/03050_select_one_one_one.sql new file mode 100644 index 000000000000..28a55e0c4715 --- /dev/null +++ b/tests/queries/0_stateless/03050_select_one_one_one.sql @@ -0,0 +1,4 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/36973 +SET allow_experimental_analyzer=1; +SELECT 1, 1, 1; +SELECT * FROM (SELECT 1, 1, 1); diff --git a/tests/queries/0_stateless/03051_many_ctes.reference b/tests/queries/0_stateless/03051_many_ctes.reference new file mode 100644 index 000000000000..487b1165348b --- /dev/null +++ b/tests/queries/0_stateless/03051_many_ctes.reference @@ -0,0 +1,4 @@ +2 +2 +2 +2 diff --git a/tests/queries/0_stateless/03051_many_ctes.sql b/tests/queries/0_stateless/03051_many_ctes.sql new file mode 100644 index 000000000000..d4e613bd279e --- /dev/null +++ b/tests/queries/0_stateless/03051_many_ctes.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/40955 +SET allow_experimental_analyzer=1; +WITH toInt64(2) AS new_x SELECT new_x AS x FROM (SELECT 1 AS x) t; +WITH toInt64(2) AS new_x SELECT * replace(new_x as x) FROM (SELECT 1 AS x) t; +SELECT 2 AS x FROM (SELECT 1 AS x) t; +SELECT * replace(2 as x) FROM (SELECT 1 AS x) t; diff --git a/tests/queries/0_stateless/03052_query_hash_includes_aliases.reference b/tests/queries/0_stateless/03052_query_hash_includes_aliases.reference new file mode 100644 index 000000000000..570d7be9c4bf --- /dev/null +++ b/tests/queries/0_stateless/03052_query_hash_includes_aliases.reference @@ -0,0 +1,2 @@ +(1,1) (1,0) +(3,4) (3,11) diff --git a/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql b/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql new file mode 100644 index 000000000000..24e9ab0f36e9 --- /dev/null +++ b/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql @@ -0,0 +1,32 @@ +-- https://github.com/ClickHouse/ClickHouse/pull/40065 +SET allow_experimental_analyzer=1; + +SELECT +( + SELECT + 1 AS number, + number + FROM numbers(1) +) AS s, +( + SELECT + 1, + number + FROM numbers(1) +) AS s2; + +SELECT +( + SELECT + 1 + 2 AS number, + 1 + number AS b + FROM system.numbers + LIMIT 10, 1 +), +( + SELECT + 1 + 2 AS number2, + 1 + number AS b + FROM system.numbers + LIMIT 10, 1 +); diff --git a/tests/queries/0_stateless/03053_analyzer_join_alias.reference b/tests/queries/0_stateless/03053_analyzer_join_alias.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03053_analyzer_join_alias.sql b/tests/queries/0_stateless/03053_analyzer_join_alias.sql new file mode 100644 index 000000000000..894b8af7c6f4 --- /dev/null +++ b/tests/queries/0_stateless/03053_analyzer_join_alias.sql @@ -0,0 +1,44 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/23104 +SET allow_experimental_analyzer=1; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; + +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.base +( +`id` UInt64, +`id2` UInt64, +`d` UInt64, +`value` UInt64 +) +ENGINE=MergeTree() +PARTITION BY d +ORDER BY (id,id2,d); + +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.derived1 +( + `id1` UInt64, + `d1` UInt64, + `value1` UInt64 +) +ENGINE = MergeTree() +PARTITION BY d1 +ORDER BY (id1, d1); + +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.derived2 +( + `id2` UInt64, + `d2` UInt64, + `value2` UInt64 +) +ENGINE = MergeTree() +PARTITION BY d2 +ORDER BY (id2, d2); + +SELECT + base.id AS `base.id`, + derived2.id2 AS `derived2.id2`, + derived2.value2 AS `derived2.value2`, + derived1.value1 AS `derived1.value1` +FROM {CLICKHOUSE_DATABASE:Identifier}.base AS base +LEFT JOIN {CLICKHOUSE_DATABASE:Identifier}.derived2 AS derived2 ON base.id2 = derived2.id2 +LEFT JOIN {CLICKHOUSE_DATABASE:Identifier}.derived1 AS derived1 ON base.id = derived1.id1; diff --git a/tests/queries/0_stateless/03054_analyzer_join_alias.reference b/tests/queries/0_stateless/03054_analyzer_join_alias.reference new file mode 100644 index 000000000000..f599e28b8ab0 --- /dev/null +++ b/tests/queries/0_stateless/03054_analyzer_join_alias.reference @@ -0,0 +1 @@ +10 diff --git a/tests/queries/0_stateless/03054_analyzer_join_alias.sql b/tests/queries/0_stateless/03054_analyzer_join_alias.sql new file mode 100644 index 000000000000..e124aa33a9b3 --- /dev/null +++ b/tests/queries/0_stateless/03054_analyzer_join_alias.sql @@ -0,0 +1,13 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/21584 +SET allow_experimental_analyzer=1; +SELECT count() +FROM +( + SELECT number AS key_1 + FROM numbers(15) +) AS x +ALL INNER JOIN +( + SELECT number AS key_1 + FROM numbers(10) +) AS z ON key_1 = z.key_1; diff --git a/tests/queries/0_stateless/03055_analyzer_subquery_group_array.reference b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.reference new file mode 100644 index 000000000000..d05b1f927f4b --- /dev/null +++ b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.reference @@ -0,0 +1 @@ +0 0 diff --git a/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql new file mode 100644 index 000000000000..25b6dcb3564a --- /dev/null +++ b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/23344 +SET allow_experimental_analyzer=1; +SELECT logTrace(repeat('Hello', 100)), ignore(*) +FROM ( + SELECT ignore((SELECT groupArrayState(([number], [number])) FROM numbers(19000))) +) diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference new file mode 100644 index 000000000000..72749c905a31 --- /dev/null +++ b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference @@ -0,0 +1 @@ +1 1 1 diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql new file mode 100644 index 000000000000..de471c1a0911 --- /dev/null +++ b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql @@ -0,0 +1,17 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/22627 +SET allow_experimental_analyzer=1; +WITH + x AS + ( + SELECT 1 AS a + ), + xx AS + ( + SELECT * + FROM x + , x AS x1 + , x AS x2 + ) +SELECT * +FROM xx +WHERE a = 1; diff --git a/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.reference b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.reference new file mode 100644 index 000000000000..3e43e6addc00 --- /dev/null +++ b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.reference @@ -0,0 +1 @@ +1000 100000 diff --git a/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql new file mode 100644 index 000000000000..2217af327fa2 --- /dev/null +++ b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql @@ -0,0 +1,13 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/10276 +SET allow_experimental_analyzer=1; +SELECT + sum(x.n) as n, + sum(z.n) as n2 +FROM +( + SELECT 1000 AS n,1 as id +) AS x +join (select 10000 as n,1 as id) as y +on x.id = y.id +left join (select 100000 as n,1 as id) as z +on x.id = z.id; diff --git a/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.reference b/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql b/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql new file mode 100644 index 000000000000..3cce77f02403 --- /dev/null +++ b/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql @@ -0,0 +1,26 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/4567 +SET allow_experimental_analyzer=1; +DROP TABLE IF EXISTS fact; +DROP TABLE IF EXISTS animals; +DROP TABLE IF EXISTS colors; + +create table fact(id Int64, animal_key Int64, color_key Int64) Engine = MergeTree order by tuple(); +insert into fact values (1,1,1),(2,2,2); + +create table animals(animal_key UInt64, animal_name String) Engine = MergeTree order by tuple(); +insert into animals values (0, 'unknown'); + +create table colors(color_key UInt64, color_name String) Engine = MergeTree order by tuple(); +insert into colors values (0, 'unknown'); + + +select id, animal_name, a.animal_key, color_name, color_key +from fact a + left join (select toInt64(animal_key) animal_key, animal_name from animals) b on (a.animal_key = b.animal_key) + left join (select toInt64(color_key) color_key, color_name from colors) c on (a.color_key = c.color_key); -- { serverError AMBIGUOUS_IDENTIFIER } + +select id, animal_name, animal_key, color_name, color_key +from fact a + left join (select toInt64(animal_key) animal_key, animal_name from animals) b on (a.animal_key = b.animal_key) + left join (select toInt64(color_key) color_key, color_name from colors) c on (a.color_key = c.color_key); -- { serverError AMBIGUOUS_IDENTIFIER } + diff --git a/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.reference b/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql b/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql new file mode 100644 index 000000000000..27782462075e --- /dev/null +++ b/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/17710 +SET allow_experimental_analyzer=1; +CREATE TABLE id_val(id UInt32, val UInt32) ENGINE = Memory; +CREATE TABLE id_val_join0(id UInt32, val UInt8) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 0; +CREATE TABLE id_val_join1(id UInt32, val UInt8) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1; + +SELECT * FROM id_val ANY LEFT JOIN id_val_join0 USING (id) SETTINGS join_use_nulls = 0; + +SELECT * FROM id_val ANY LEFT JOIN id_val_join1 USING (id) SETTINGS join_use_nulls = 1; diff --git a/tests/queries/0_stateless/03060_analyzer_regular_view_alias.reference b/tests/queries/0_stateless/03060_analyzer_regular_view_alias.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql b/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql new file mode 100644 index 000000000000..f8cd8690ee50 --- /dev/null +++ b/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql @@ -0,0 +1,16 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/11068 +SET allow_experimental_analyzer=1; +create table vt(datetime_value DateTime, value Float64) Engine=Memory; + +create view computed_datum_hours as +SELECT + toStartOfHour(b.datetime_value) AS datetime_desc, + sum(b.value) AS value +FROM vt AS b +GROUP BY toStartOfHour(b.datetime_value); + +SELECT + toStartOfHour(b.datetime_value) AS datetime_desc, + sum(b.value) AS value +FROM vt AS b +GROUP BY toStartOfHour(b.datetime_value); diff --git a/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.reference b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.reference new file mode 100644 index 000000000000..af98bcd63975 --- /dev/null +++ b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.reference @@ -0,0 +1,10 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 diff --git a/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql new file mode 100644 index 000000000000..6fee6d1f73d3 --- /dev/null +++ b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql @@ -0,0 +1,8 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/24395 +SET allow_experimental_analyzer=1; +CREATE TABLE xxxx_yyy (key UInt32, key_b ALIAS key) ENGINE=MergeTree() ORDER BY key; +INSERT INTO xxxx_yyy SELECT number FROM numbers(10); + +SELECT * +FROM xxxx_yyy AS a +INNER JOIN xxxx_yyy AS b ON a.key = b.key_b; diff --git a/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.reference b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.reference new file mode 100644 index 000000000000..d496ccad6b62 --- /dev/null +++ b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.reference @@ -0,0 +1,2 @@ +abc 0 0 0 1 +abc 0 0 0 1 diff --git a/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql new file mode 100644 index 000000000000..9748175e4d4a --- /dev/null +++ b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql @@ -0,0 +1,13 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/23416 +SET allow_experimental_analyzer=1; +create table test (TOPIC String, PARTITION UInt64, OFFSET UInt64, ID UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03062', 'r2') ORDER BY (TOPIC, PARTITION, OFFSET); + +create table test_join (TOPIC String, PARTITION UInt64, OFFSET UInt64) ENGINE = Join(ANY, LEFT, `TOPIC`, `PARTITION`) SETTINGS join_any_take_last_row = 1; + +insert into test values('abc',0,0,0); + +insert into test_join values('abc',0,1); + +select *, joinGet('test_join', 'OFFSET', TOPIC, PARTITION) from test; + +select * from test any left join test_join using (TOPIC, PARTITION); diff --git a/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.reference b/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql b/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql new file mode 100644 index 000000000000..7eab1fa846a6 --- /dev/null +++ b/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql @@ -0,0 +1,17 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/23162 +SET allow_experimental_analyzer=1; +CREATE TABLE t1 ( k Int64, x Int64) ENGINE = Memory; + +CREATE TABLE t2( x Int64 ) ENGINE = Memory; + +create table s (k Int64, d DateTime) Engine=Memory; + +SELECT * FROM t1 +INNER JOIN s ON t1.k = s.k +INNER JOIN t2 ON t2.x = t1.x +WHERE (t1.d >= now()); -- { serverError UNKNOWN_IDENTIFIER } + +SELECT * FROM t1 +INNER JOIN s ON t1.k = s.k +WHERE (t1.d >= now()); -- { serverError UNKNOWN_IDENTIFIER } + diff --git a/tests/queries/0_stateless/03064_analyzer_named_subqueries.reference b/tests/queries/0_stateless/03064_analyzer_named_subqueries.reference new file mode 100644 index 000000000000..556d825db42a --- /dev/null +++ b/tests/queries/0_stateless/03064_analyzer_named_subqueries.reference @@ -0,0 +1 @@ +2 1 diff --git a/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql b/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql new file mode 100644 index 000000000000..59ebb9d9af3c --- /dev/null +++ b/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/25655 +SET allow_experimental_analyzer=1; +SELECT + sum(t.b) / 1 a, + sum(t.a) +FROM ( SELECT 1 a, 2 b ) t; diff --git a/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.reference b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.reference new file mode 100644 index 000000000000..594a6a2deeb9 --- /dev/null +++ b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.reference @@ -0,0 +1,2 @@ +1 3 +2 4 diff --git a/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql new file mode 100644 index 000000000000..7e6befe181ef --- /dev/null +++ b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql @@ -0,0 +1,3 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/11757 +SET allow_experimental_analyzer=1; +select * from (select [1, 2] a) aa cross join (select [3, 4] b) bb array join aa.a, bb.b; diff --git a/tests/queries/0_stateless/03066_analyzer_global_with_statement.reference b/tests/queries/0_stateless/03066_analyzer_global_with_statement.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03066_analyzer_global_with_statement.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql b/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql new file mode 100644 index 000000000000..8983be242c38 --- /dev/null +++ b/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql @@ -0,0 +1,8 @@ +SET allow_experimental_analyzer=1; +WITH 0 AS test +SELECT * +FROM +( + SELECT 1 AS test +) +SETTINGS enable_global_with_statement = 1 diff --git a/tests/queries/0_stateless/03067_analyzer_complex_alias_join.reference b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.reference new file mode 100644 index 000000000000..6192a595f1e5 --- /dev/null +++ b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.reference @@ -0,0 +1 @@ +key \N diff --git a/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql new file mode 100644 index 000000000000..052a9eaf734b --- /dev/null +++ b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql @@ -0,0 +1,10 @@ +SET allow_experimental_analyzer=1; +with d as (select 'key'::Varchar(255) c, 'x'::Varchar(255) s) +SELECT r1, c as r2 +FROM ( + SELECT t as s, c as r1 + FROM ( SELECT 'y'::Varchar(255) as t, 'x'::Varchar(255) as s) t1 + LEFT JOIN d USING (s) + ) t2 +LEFT JOIN d using (s) +SETTINGS join_use_nulls=1; diff --git a/tests/queries/0_stateless/03068_analyzer_distributed_join.reference b/tests/queries/0_stateless/03068_analyzer_distributed_join.reference new file mode 100644 index 000000000000..1444d39d9578 --- /dev/null +++ b/tests/queries/0_stateless/03068_analyzer_distributed_join.reference @@ -0,0 +1,2 @@ +localhost 9000 0 0 0 +localhost 9000 0 0 0 diff --git a/tests/queries/0_stateless/03068_analyzer_distributed_join.sql b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql new file mode 100644 index 000000000000..542380feb7c5 --- /dev/null +++ b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql @@ -0,0 +1,58 @@ +-- Tags: no-replicated-database +-- Closes: https://github.com/ClickHouse/ClickHouse/issues/6571 + +SET allow_experimental_analyzer=1; +CREATE TABLE LINEITEM_shard ON CLUSTER test_shard_localhost +( + L_ORDERKEY UInt64, + L_COMMITDATE UInt32, + L_RECEIPTDATE UInt32 +) +ENGINE = MergeTree() +ORDER BY L_ORDERKEY; + +CREATE TABLE LINEITEM AS LINEITEM_shard +ENGINE = Distributed('test_shard_localhost', currentDatabase(), LINEITEM_shard, rand()); + +CREATE TABLE ORDERS_shard ON CLUSTER test_shard_localhost +( + O_ORDERKEY UInt64, + O_ORDERPRIORITY UInt32 +) +ENGINE = MergeTree() +ORDER BY O_ORDERKEY; + +CREATE TABLE ORDERS AS ORDERS_shard +ENGINE = Distributed('test_shard_localhost', currentDatabase(), ORDERS_shard, rand()); + +SET joined_subquery_requires_alias=0; + +select + O_ORDERPRIORITY, + count(*) as order_count +from ORDERS JOIN ( + select L_ORDERKEY + from + LINEITEM_shard + group by L_ORDERKEY + having any(L_COMMITDATE < L_RECEIPTDATE) +) on O_ORDERKEY=L_ORDERKEY +group by O_ORDERPRIORITY +order by O_ORDERPRIORITY +limit 1; + +SET joined_subquery_requires_alias=1; + +select + O_ORDERPRIORITY, + count(*) as order_count +from ORDERS JOIN ( + select L_ORDERKEY + from + LINEITEM_shard + group by L_ORDERKEY + having any(L_COMMITDATE < L_RECEIPTDATE) +) AS x on O_ORDERKEY=L_ORDERKEY +group by O_ORDERPRIORITY +order by O_ORDERPRIORITY +limit 1; diff --git a/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.reference b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.reference new file mode 100644 index 000000000000..1191247b6d9a --- /dev/null +++ b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql new file mode 100644 index 000000000000..09d2985fe601 --- /dev/null +++ b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/4432 +SET allow_experimental_analyzer=1; +WITH [1, 2] AS zz +SELECT x +FROM system.one +ARRAY JOIN zz AS x diff --git a/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.reference b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.reference new file mode 100644 index 000000000000..595b4d6b5b85 --- /dev/null +++ b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.reference @@ -0,0 +1,25 @@ +0 25 +1 25 +2 25 +3 25 +4 25 +5 25 +6 25 +7 25 +8 25 +9 25 +10 25 +11 25 +12 25 +13 25 +14 25 +15 25 +16 25 +17 25 +18 25 +19 25 +20 25 +21 25 +22 25 +23 25 +24 25 diff --git a/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql new file mode 100644 index 000000000000..7aadab2ca736 --- /dev/null +++ b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/8259 +SET allow_experimental_analyzer=1; +with + (select 25) as something +select *, something +from numbers(toUInt64(assumeNotNull(something))); diff --git a/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.reference b/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql b/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql new file mode 100644 index 000000000000..e2eb758d6495 --- /dev/null +++ b/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql @@ -0,0 +1,12 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/9233 +SET allow_experimental_analyzer=1; +SELECT * +FROM +( + SELECT + [1, 2, 3] AS x, + [4, 5, 6] AS y +) +ARRAY JOIN + x, + Y; -- { serverError UNKNOWN_IDENTIFIER } diff --git a/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.reference b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.reference new file mode 100644 index 000000000000..573541ac9702 --- /dev/null +++ b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql new file mode 100644 index 000000000000..e2846033913f --- /dev/null +++ b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql @@ -0,0 +1,3 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/14699 +SET allow_experimental_analyzer=1; +select * from (select number from numbers(1)) where not ignore(*); diff --git a/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.reference b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.reference new file mode 100644 index 000000000000..ca6280d2dbf4 --- /dev/null +++ b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.reference @@ -0,0 +1,2 @@ +1 1997-02-01 +2 1997-02-01 diff --git a/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql new file mode 100644 index 000000000000..5599324c62b1 --- /dev/null +++ b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/27068 +SET allow_experimental_analyzer=1; +CREATE TABLE test ( id String, create_time DateTime ) ENGINE = MergeTree ORDER BY id; + +insert into test values(1,'1970-02-01 00:00:00'); +insert into test values(2,'1970-02-01 00:00:00'); +insert into test values(3,'1970-03-01 00:00:00'); + +select id,'1997-02-01' as create_time from test where test.create_time='1970-02-01 00:00:00' ORDER BY id diff --git a/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.reference b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.reference new file mode 100644 index 000000000000..aa47d0d46d47 --- /dev/null +++ b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql new file mode 100644 index 000000000000..4df5f6f48e6f --- /dev/null +++ b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql @@ -0,0 +1,7 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/28687 +SET allow_experimental_analyzer=1; +create view alias (dummy int, n alias dummy) as select * from system.one; + +select n from alias; + +select * from alias where n=0; diff --git a/tests/queries/0_stateless/03075_analyzer_subquery_alias.reference b/tests/queries/0_stateless/03075_analyzer_subquery_alias.reference new file mode 100644 index 000000000000..556d825db42a --- /dev/null +++ b/tests/queries/0_stateless/03075_analyzer_subquery_alias.reference @@ -0,0 +1 @@ +2 1 diff --git a/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql b/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql new file mode 100644 index 000000000000..416815e761b5 --- /dev/null +++ b/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql @@ -0,0 +1,11 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/28777 +SET allow_experimental_analyzer=1; +SELECT + sum(q0.a2) AS a1, + sum(q0.a1) AS a9 +FROM +( + SELECT + 1 AS a1, + 2 AS a2 +) AS q0; diff --git a/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.reference b/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql b/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql new file mode 100644 index 000000000000..7ac9fe6b4464 --- /dev/null +++ b/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql @@ -0,0 +1,52 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/29734 +SET allow_experimental_analyzer=1; +SELECT * +FROM +( + SELECT 1 AS x +) AS a +INNER JOIN +( + SELECT + 1 AS x, + 2 AS y +) AS b ON (a.x = b.x) AND (a.y = b.y); -- { serverError UNKNOWN_IDENTIFIER } + + + +SELECT * +FROM +( + SELECT 1 AS x +) AS a +INNER JOIN +( + SELECT + 1 AS x, + 2 AS y +) AS b ON (a.x = b.x) AND (a.y = b.y) +INNER JOIN +( + SELECT 3 AS x +) AS c ON a.x = c.x; -- { serverError UNKNOWN_IDENTIFIER } + + +SELECT * +FROM +( + SELECT number AS x + FROM numbers(10) +) AS a +INNER JOIN +( + SELECT + number AS x, + number AS y + FROM numbers(10) +) AS b ON (a.x = b.x) AND (a.y = b.y) +INNER JOIN +( + SELECT number AS x + FROM numbers(10) +) AS c ON a.x = c.x; -- { serverError UNKNOWN_IDENTIFIER } + diff --git a/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.reference b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.reference new file mode 100644 index 000000000000..6b134f711d46 --- /dev/null +++ b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.reference @@ -0,0 +1,21 @@ +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +1 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 diff --git a/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql new file mode 100644 index 000000000000..5a181023c57b --- /dev/null +++ b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql @@ -0,0 +1,23 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/33825 +SET allow_experimental_analyzer=1; +CREATE TABLE t1 (i Int64, j Int64) ENGINE = Memory; +INSERT INTO t1 SELECT number, number FROM system.numbers LIMIT 10; +SELECT + (SELECT max(i) FROM t1) as i, + (SELECT max(i) FROM t1) as j, + (SELECT max(i) FROM t1) as k, + (SELECT max(i) FROM t1) as l +FROM t1; + +SELECT 1; + +WITH ( + SELECT max(i) + FROM t1 + ) AS value +SELECT + value AS i, + value AS j, + value AS k, + value AS l +FROM t1; diff --git a/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.reference b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.reference new file mode 100644 index 000000000000..b2c49b655d40 --- /dev/null +++ b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.reference @@ -0,0 +1,21 @@ +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +1 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 diff --git a/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql new file mode 100644 index 000000000000..d91a9ed106dd --- /dev/null +++ b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql @@ -0,0 +1,17 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/33825 +SET allow_experimental_analyzer=1; +CREATE TABLE t2 (first_column Int64, second_column Int64) ENGINE = Memory; +INSERT INTO t2 SELECT number, number FROM system.numbers LIMIT 10; + + +SELECT ( + SELECT 111111111111 + ) AS first_column +FROM t2; + +SELECT 1; + +SELECT ( + SELECT 2222222222 + ) AS second_column +FROM t2; diff --git a/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.reference b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.reference new file mode 100644 index 000000000000..faff07c519f8 --- /dev/null +++ b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.reference @@ -0,0 +1,2 @@ +\N 1 +str diff --git a/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql new file mode 100644 index 000000000000..955d3b49a004 --- /dev/null +++ b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql @@ -0,0 +1,13 @@ +SET allow_experimental_analyzer=1; +CREATE TABLE testdata (`1` String) ENGINE=MergeTree ORDER BY tuple(); +INSERT INTO testdata VALUES ('testdata'); + +SELECT * +FROM ( + SELECT if(isValidUTF8(`1`), NULL, 'error!') AS error_message, + if(error_message IS NULL, 1, 0) AS valid + FROM testdata +) +WHERE valid; + +select * from (select 'str' as `1`) where 1; diff --git a/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.reference b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.reference new file mode 100644 index 000000000000..ccf161abe8d6 --- /dev/null +++ b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.reference @@ -0,0 +1,3 @@ +0 0 +0 0 + 0 0 diff --git a/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql new file mode 100644 index 000000000000..01ab868f9eab --- /dev/null +++ b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql @@ -0,0 +1,28 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/35652 +SET allow_experimental_analyzer=1; +CREATE TABLE test ( + id UInt64 +) +ENGINE = MergeTree() +SAMPLE BY intHash32(id) +ORDER BY intHash32(id); + +SELECT + any(id), + any(id) AS id +FROM test +SETTINGS prefer_column_name_to_alias = 1; + +SELECT + any(_sample_factor), + any(_sample_factor) AS _sample_factor +FROM test +SETTINGS prefer_column_name_to_alias = 1; + +SELECT + any(_partition_id), + any(_sample_factor), + any(_partition_id) AS _partition_id, + any(_sample_factor) AS _sample_factor +FROM test +SETTINGS prefer_column_name_to_alias = 1; diff --git a/tests/queries/0_stateless/03080_incorrect_join_with_merge.reference b/tests/queries/0_stateless/03080_incorrect_join_with_merge.reference new file mode 100644 index 000000000000..51993f072d58 --- /dev/null +++ b/tests/queries/0_stateless/03080_incorrect_join_with_merge.reference @@ -0,0 +1,2 @@ +2 +2 diff --git a/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql new file mode 100644 index 000000000000..7682e6ce8667 --- /dev/null +++ b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql @@ -0,0 +1,68 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/29838 +SET allow_experimental_analyzer=1; +SET distributed_foreground_insert=1; + +CREATE TABLE first_table_lr +( + id String, + id2 String +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_03080/alter', 'r1') +ORDER BY id; + + +CREATE TABLE first_table +( + id String, + id2 String +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), 'first_table_lr'); + + +CREATE TABLE second_table_lr +( + id String, + id2 String +) ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE second_table +( + id String, + id2 String +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), 'second_table_lr'); + +INSERT INTO first_table VALUES ('1', '2'), ('3', '4'); +INSERT INTO second_table VALUES ('1', '2'), ('3', '4'); + +CREATE TABLE two_tables +( + id String, + id2 String +) +ENGINE = Merge(currentDatabase(), '^(first_table)$'); + +SELECT + count() +FROM first_table as s +GLOBAL ANY JOIN second_table as f USING (id) +WHERE + f.id2 GLOBAL IN ( + SELECT + id2 + FROM second_table + GROUP BY id2 + ); + +SELECT + count() +FROM two_tables as s +GLOBAL ANY JOIN second_table as f USING (id) +WHERE + f.id2 GLOBAL IN ( + SELECT + id2 + FROM second_table + GROUP BY id2 + ); diff --git a/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.reference b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.reference new file mode 100644 index 000000000000..bf1b89262365 --- /dev/null +++ b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.reference @@ -0,0 +1,2 @@ +2020-01-01 +9 diff --git a/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql new file mode 100644 index 000000000000..e6a540dc5df7 --- /dev/null +++ b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql @@ -0,0 +1,19 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/36189 +SET allow_experimental_analyzer=1; +CREATE TABLE test +( + `dt` Date, + `text` String +) +ENGINE = MergeTree +ORDER BY dt; + +insert into test values ('2020-01-01', 'text1'), ('2019-01-01', 'text2'), ('1900-01-01', 'text3'); + +WITH max(dt) AS maxDt +SELECT maxDt +FROM test; + +WITH max(number) AS maxDt +SELECT maxDt +FROM numbers(10); diff --git a/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.reference b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.reference new file mode 100644 index 000000000000..ad3f3d53ab58 --- /dev/null +++ b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.reference @@ -0,0 +1,2 @@ +pk1 2 +pk1 2 diff --git a/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql new file mode 100644 index 000000000000..8f17248ed0d6 --- /dev/null +++ b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql @@ -0,0 +1,31 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/39634 +SET allow_experimental_analyzer=1; +CREATE TABLE test1 +( + `pk` String, + `x.y` Decimal(18, 4) +) +ENGINE = MergeTree() +ORDER BY (pk); + +CREATE TABLE test2 +( + `pk` String, + `x.y` Decimal(18, 4) +) +ENGINE = MergeTree() +ORDER BY (pk); + +INSERT INTO test1 SELECT 'pk1', 1; + +INSERT INTO test2 SELECT 'pk1', 2; + +SELECT t1.pk, t2.x.y +FROM test1 t1 +LEFT JOIN test2 t2 + on t1.pk = t2.pk; + +SELECT t1.pk, t2.`x.y` +FROM test1 t1 +LEFT JOIN test2 t2 + on t1.pk = t2.pk; diff --git a/tests/queries/0_stateless/03084_analyzer_join_column_alias.reference b/tests/queries/0_stateless/03084_analyzer_join_column_alias.reference new file mode 100644 index 000000000000..acbb84063471 --- /dev/null +++ b/tests/queries/0_stateless/03084_analyzer_join_column_alias.reference @@ -0,0 +1 @@ +2023-01-01 diff --git a/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql b/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql new file mode 100644 index 000000000000..930726898b5e --- /dev/null +++ b/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql @@ -0,0 +1,24 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/47432 +SET allow_experimental_analyzer=1; +create table t1 +engine = MergeTree() +order by tuple() +as +select 1 as user_id, 2 as level; + + +create table t2 +engine = MergeTree() +order by tuple() +as +select 1 as user_id, 'website' as event_source, '2023-01-01 00:00:00'::DateTime as timestamp; + + +alter table t2 +add column date Date alias toDate(timestamp); + +SELECT + any(t2.date) as any_val +FROM t1 AS t1 +LEFT JOIN t2 as t2 + ON (t1.user_id = t2.user_id); diff --git a/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.reference b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.reference new file mode 100644 index 000000000000..804f12662b87 --- /dev/null +++ b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.reference @@ -0,0 +1 @@ +String 1 diff --git a/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql new file mode 100644 index 000000000000..fd67194b08b5 --- /dev/null +++ b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql @@ -0,0 +1,5 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/54910 +SET allow_experimental_analyzer=1; +SELECT toTypeName(stat_standard_id) AS stat_standard_id_1, count(1) AS value +FROM ( SELECT 'string value' AS stat_standard_id ) +GROUP BY stat_standard_id_1 LIMIT 1 diff --git a/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.reference b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.reference new file mode 100644 index 000000000000..c8b2d51ae531 --- /dev/null +++ b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.reference @@ -0,0 +1,2 @@ +1 2 +2 2 diff --git a/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql new file mode 100644 index 000000000000..31747328d1fc --- /dev/null +++ b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql @@ -0,0 +1,13 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/57321 +SET allow_experimental_analyzer=1; +SELECT + ver, + max(ver) OVER () AS ver_max +FROM +( + SELECT 1 AS ver + UNION ALL + SELECT 2 AS ver +) +GROUP BY ver +ORDER BY ver; diff --git a/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.reference b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.reference new file mode 100644 index 000000000000..7660873d1031 --- /dev/null +++ b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.reference @@ -0,0 +1 @@ +[1] diff --git a/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql new file mode 100644 index 000000000000..6546e50c99e0 --- /dev/null +++ b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql @@ -0,0 +1,16 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/59154 +SET allow_experimental_analyzer=1; +SELECT * +FROM +( + WITH + assumeNotNull(( + SELECT 0.9 + )) AS TUNING, + ELEMENT_QUERY AS + ( + SELECT quantiles(TUNING)(1) + ) + SELECT * + FROM ELEMENT_QUERY +); diff --git a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.reference b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql new file mode 100644 index 000000000000..e6f1ed81f91b --- /dev/null +++ b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql @@ -0,0 +1,13 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61014 +SET allow_experimental_analyzer=1; + +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +create database {CLICKHOUSE_DATABASE:Identifier}; + +create table {CLICKHOUSE_DATABASE:Identifier}.a (i int) engine = Log(); + +select + {CLICKHOUSE_DATABASE:Identifier}.a.i +from + {CLICKHOUSE_DATABASE:Identifier}.a, + {CLICKHOUSE_DATABASE:Identifier}.a as x; diff --git a/tests/queries/0_stateless/03089_analyzer_alias_replacement.reference b/tests/queries/0_stateless/03089_analyzer_alias_replacement.reference new file mode 100644 index 000000000000..2f1b638ff548 --- /dev/null +++ b/tests/queries/0_stateless/03089_analyzer_alias_replacement.reference @@ -0,0 +1,2 @@ +1 +4 diff --git a/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql b/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql new file mode 100644 index 000000000000..069da5fdd65b --- /dev/null +++ b/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61950 +SET allow_experimental_analyzer=1; + +with dummy + 1 as dummy select dummy from system.one; + +WITH dummy + 3 AS dummy +SELECT dummy + 1 AS y +FROM system.one +SETTINGS enable_global_with_statement = 1; diff --git a/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.reference b/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.reference new file mode 100644 index 000000000000..573541ac9702 --- /dev/null +++ b/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.sql b/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.sql new file mode 100644 index 000000000000..c35f33782ff2 --- /dev/null +++ b/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.sql @@ -0,0 +1,17 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/55647 +SET allow_experimental_analyzer=1; + +SELECT +* +FROM ( + SELECT * + FROM system.one +) a +JOIN ( + SELECT * + FROM system.one +) b USING dummy +JOIN ( + SELECT * + FROM system.one +) c USING dummy diff --git a/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.reference b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.reference new file mode 100644 index 000000000000..ce45f6636b2e --- /dev/null +++ b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.reference @@ -0,0 +1,4 @@ +1 0 + +using asterisk 1 0 +using field name 1 0 diff --git a/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql new file mode 100644 index 000000000000..599275c66e86 --- /dev/null +++ b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql @@ -0,0 +1,31 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61947 +SET allow_experimental_analyzer=1; + +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; + +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`1-1` (field Int8) ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` (field Int8) ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.`2-1` (field Int8) ENGINE = Memory; + +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.`1-1` VALUES (1); + +SELECT * +FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` ON {CLICKHOUSE_DATABASE:Identifier}.`1-1`.field = {CLICKHOUSE_DATABASE_1:Identifier}.`1-1`.field; + +SELECT ''; + +SELECT * FROM +( +SELECT 'using asterisk', {CLICKHOUSE_DATABASE:Identifier}.`1-1`.*, {CLICKHOUSE_DATABASE_1:Identifier}.`1-1`.* +FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` USING field +UNION ALL +SELECT 'using field name', {CLICKHOUSE_DATABASE:Identifier}.`1-1`.field, {CLICKHOUSE_DATABASE_1:Identifier}.`1-1`.field +FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` USING field +) +ORDER BY ALL; diff --git a/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.reference b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql new file mode 100644 index 000000000000..10d18324c3c4 --- /dev/null +++ b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql @@ -0,0 +1,18 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61947 +SET allow_experimental_analyzer=1; + +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; + +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`1-1` (field Int8) ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.`2-1` (field Int8) ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.`3-1` (field Int8) ENGINE = Memory; + +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.`1-1` VALUES (1); + +SELECT {CLICKHOUSE_DATABASE:Identifier}.`1-1`.* +FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`2-1` ON {CLICKHOUSE_DATABASE:Identifier}.`1-1`.field = {CLICKHOUSE_DATABASE_1:Identifier}.`2-1`.field +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`3-1` ON {CLICKHOUSE_DATABASE_1:Identifier}.`2-1`.field = {CLICKHOUSE_DATABASE_1:Identifier}.`3-1`.field; diff --git a/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.reference b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql new file mode 100644 index 000000000000..463922c4e29a --- /dev/null +++ b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql @@ -0,0 +1,77 @@ +-- Bug 37909 + +SELECT + v_date AS vDate, + round(sum(v_share)) AS v_sum +FROM +( + WITH + ( + SELECT rand() % 10000 + ) AS dummy_1, + ( + SELECT rand() % 10000 + ) AS dummy_2, + ( + SELECT rand() % 10000 + ) AS dummy_3, + _v AS + ( + SELECT + xxHash64(rand()) % 100000 AS d_id, + toDate(parseDateTimeBestEffort('2022-01-01') + (rand() % 2600000)) AS v_date + FROM numbers(1000000) + ORDER BY d_id ASC + ), + _i AS + ( + SELECT xxHash64(rand()) % 40000 AS d_id + FROM numbers(1000000) + ), + not_i AS + ( + SELECT + NULL AS v_date, + d_id, + 0 AS v_share + FROM _i + LIMIT 100 + ) + SELECT * + FROM + ( + SELECT + d_id, + v_date, + v_share + FROM not_i + UNION ALL + SELECT + d_id, + v_date, + 1 AS v_share + FROM + ( + SELECT + d_id, + arrayJoin(groupArray(v_date)) AS v_date + FROM + ( + SELECT + v_date, + d_id + FROM _v + UNION ALL + SELECT + NULL AS v_date, + d_id + FROM _i + ) + GROUP BY d_id + ) + ) + WHERE (v_date >= '2022-05-08') AND (v_date <= '2022-06-07') +) +/* WHERE (v_date >= '2022-05-08') AND (v_date <= '2022-06-07') placing condition has same effect */ +GROUP BY vDate +ORDER BY vDate ASC diff --git a/tests/queries/0_stateless/03093_filter_push_down_crash.reference b/tests/queries/0_stateless/03093_filter_push_down_crash.reference new file mode 100644 index 000000000000..bf98540f4b3d --- /dev/null +++ b/tests/queries/0_stateless/03093_filter_push_down_crash.reference @@ -0,0 +1,5 @@ +1 \N 1 +1 \N 1 +1 \N 1 +1 \N 1 +1 \N 1 diff --git a/tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 b/tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 new file mode 100644 index 000000000000..2cbbd89ca0cb --- /dev/null +++ b/tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 @@ -0,0 +1,11 @@ +{% for join_algorithm in ['default', 'full_sorting_merge', 'hash', 'partial_merge', 'grace_hash'] -%} + +SET join_algorithm = '{{ join_algorithm }}'; + +SELECT * +FROM (SELECT 1 AS key) AS t1 +JOIN (SELECT NULL, 1 AS key) AS t2 +ON t1.key = t2.key +WHERE t1.key ORDER BY key; + +{% endfor -%} diff --git a/tests/queries/0_stateless/backups/old_backup_with_matview_inner_table_metadata.zip b/tests/queries/0_stateless/backups/old_backup_with_matview_inner_table_metadata.zip new file mode 100644 index 000000000000..a2476da7dedf Binary files /dev/null and b/tests/queries/0_stateless/backups/old_backup_with_matview_inner_table_metadata.zip differ diff --git a/tests/queries/0_stateless/data_hive/fields_number_variable.txt b/tests/queries/0_stateless/data_hive/fields_number_variable.txt new file mode 100644 index 000000000000..b4e037978b90 --- /dev/null +++ b/tests/queries/0_stateless/data_hive/fields_number_variable.txt @@ -0,0 +1,2 @@ +1,3 +3,5,9 \ No newline at end of file diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 8aa2a463c477..9f7776f5201d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -29,6 +29,13 @@ Alexey AnyEvent AppleClang Approximative +arrayDotProduct +arrayEnumerateDenseRanked +arrayEnumerateUniqRanked +arrayFirstOrNull +arrayLastOrNull +arrayPartialShuffle +arrayShuffle ArrayJoin ArrowStream AsyncInsertCacheSize @@ -176,6 +183,8 @@ CompiledExpressionCacheCount ComplexKeyCache ComplexKeyDirect ComplexKeyHashed +Composable +composable Config ConnectionDetails Const @@ -452,6 +461,9 @@ Khanna KittenHouse Klickhouse Kolmogorov +Konstantin +kostik +kostikConsistentHash Korzeniewski Kubernetes LDAP @@ -554,6 +566,17 @@ Mongodb mortonDecode mortonEncode MsgPack +multiSearchAllPositionsCaseInsensitive +multiSearchAllPositionsCaseInsensitiveUTF +multiSearchAnyCaseInsensitive +multiSearchAnyCaseInsensitiveUTF +multiSearchAnyUTF +multiSearchFirstIndexCaseInsensitive +multiSearchFirstIndexCaseInsensitiveUTF +multiSearchFirstIndexUTF +multiSearchFirstPositionCaseInsensitive +multiSearchFirstPositionCaseInsensitiveUTF +multiSearchFirstPositionUTF MultiPolygon Multiline Multiqueries @@ -655,6 +678,7 @@ OTLP OUTFILE ObjectId Observability +Oblakov Octonica Ok OnTime @@ -675,6 +699,7 @@ PCRE PRCP PREWHERE PROCESSLIST +PROXYv PSUN PagerDuty ParallelFormattingOutputFormatThreads @@ -860,6 +885,7 @@ Soundex SpanKind Spearman's SquaredDistance +SquaredNorm StartTLS StartTime StartupSystemTables @@ -989,6 +1015,7 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch +unshuffled Updatable Uppercased Uptime @@ -1935,6 +1962,7 @@ mmap mmapped modularization moduloOrZero +moduli mongodb monthName moscow @@ -2646,6 +2674,12 @@ tupleMultiplyByNumber tupleNegate tuplePlus tupleToNameValuePairs +tupleIntDiv +tupleIntDivByNumber +tupleIntDivOrZero +tupleIntDivOrZeroByNumber +tupleModulo +tupleModuloByNumber turbostat txt typename @@ -2760,6 +2794,7 @@ wordShingleSimHashUTF wordshingleMinHash writability wrt +wyHash xcode xeus xkcd diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index ca1a23a99db4..060a0107c1e3 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.3.2.23-lts 2024-04-03 v24.3.1.2672-lts 2024-03-27 v24.2.2.71-stable 2024-03-15 v24.2.1.2248-stable 2024-02-29 diff --git a/utils/postprocess-traces/postprocess-traces.pl b/utils/postprocess-traces/postprocess-traces.pl index 3e50f64d864e..1c198908580c 100755 --- a/utils/postprocess-traces/postprocess-traces.pl +++ b/utils/postprocess-traces/postprocess-traces.pl @@ -13,9 +13,9 @@ sub process_stacktrace my $group = \$grouped_stacks; for my $frame (reverse @current_stack) { + $group = \$$group->{children}{$frame}; $$group->{count} ||= 0; ++$$group->{count}; - $group = \$$group->{children}{$frame}; } @current_stack = (); @@ -47,7 +47,7 @@ sub print_group for my $key (sort { $group->{children}{$b}{count} <=> $group->{children}{$a}{count} } keys %{$group->{children}}) { - my $count = $group->{count}; + my $count = $group->{children}{$key}{count}; print(('| ' x $level) . $count . (' ' x (5 - (length $count))) . $key . "\n"); print_group($group->{children}{$key}, $level + 1); }