From ee2ec2f1c29144053dd8a4543a885f929aec6dc5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Apr 2024 13:39:58 +0000 Subject: [PATCH 01/90] Remove double-conversion submodule --- .gitmodules | 3 --- contrib/double-conversion | 1 - 2 files changed, 4 deletions(-) delete mode 160000 contrib/double-conversion diff --git a/.gitmodules b/.gitmodules index a618104f3642..b6a5c5824b8b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -22,9 +22,6 @@ [submodule "contrib/capnproto"] path = contrib/capnproto url = https://github.com/ClickHouse/capnproto -[submodule "contrib/double-conversion"] - path = contrib/double-conversion - url = https://github.com/google/double-conversion [submodule "contrib/re2"] path = contrib/re2 url = https://github.com/google/re2 diff --git a/contrib/double-conversion b/contrib/double-conversion deleted file mode 160000 index cf2f0f3d547d..000000000000 --- a/contrib/double-conversion +++ /dev/null @@ -1 +0,0 @@ -Subproject commit cf2f0f3d547dc73b4612028a155b80536902ba02 From 732c215a27c02d66bea48c27c8bab6ebfbf4b5a4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Apr 2024 13:40:40 +0000 Subject: [PATCH 02/90] Add ClickHouse double-conversion submodule --- .gitmodules | 3 +++ contrib/double-conversion | 1 + 2 files changed, 4 insertions(+) create mode 160000 contrib/double-conversion diff --git a/.gitmodules b/.gitmodules index b6a5c5824b8b..6c8de2fce645 100644 --- a/.gitmodules +++ b/.gitmodules @@ -366,3 +366,6 @@ [submodule "contrib/idna"] path = contrib/idna url = https://github.com/ada-url/idna.git +[submodule "contrib/double-conversion"] + path = contrib/double-conversion + url = https://github.com/ClickHouse/double-conversion.git diff --git a/contrib/double-conversion b/contrib/double-conversion new file mode 160000 index 000000000000..cf2f0f3d547d --- /dev/null +++ b/contrib/double-conversion @@ -0,0 +1 @@ +Subproject commit cf2f0f3d547dc73b4612028a155b80536902ba02 From 7d87adc91ac3941deb0fa94dbe6d8237c35434ad Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Apr 2024 13:51:36 +0000 Subject: [PATCH 03/90] Upgrade double-conversion to 3.3.0 --- contrib/double-conversion-cmake/CMakeLists.txt | 17 +++++++++-------- src/IO/ReadHelpers.h | 1 - src/IO/readFloatText.h | 1 - 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/contrib/double-conversion-cmake/CMakeLists.txt b/contrib/double-conversion-cmake/CMakeLists.txt index dc5b1719abfe..4bea86985a1a 100644 --- a/contrib/double-conversion-cmake/CMakeLists.txt +++ b/contrib/double-conversion-cmake/CMakeLists.txt @@ -1,14 +1,15 @@ SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/double-conversion") add_library(_double-conversion -"${LIBRARY_DIR}/double-conversion/bignum.cc" -"${LIBRARY_DIR}/double-conversion/bignum-dtoa.cc" -"${LIBRARY_DIR}/double-conversion/cached-powers.cc" -"${LIBRARY_DIR}/double-conversion/diy-fp.cc" -"${LIBRARY_DIR}/double-conversion/double-conversion.cc" -"${LIBRARY_DIR}/double-conversion/fast-dtoa.cc" -"${LIBRARY_DIR}/double-conversion/fixed-dtoa.cc" -"${LIBRARY_DIR}/double-conversion/strtod.cc") + "${LIBRARY_DIR}/double-conversion/bignum-dtoa.cc" + "${LIBRARY_DIR}/double-conversion/bignum.cc" + "${LIBRARY_DIR}/double-conversion/cached-powers.cc" + "${LIBRARY_DIR}/double-conversion/double-to-string.cc" + "${LIBRARY_DIR}/double-conversion/fast-dtoa.cc" + "${LIBRARY_DIR}/double-conversion/fixed-dtoa.cc" + "${LIBRARY_DIR}/double-conversion/string-to-double.cc" + "${LIBRARY_DIR}/double-conversion/strtod.cc" +) target_include_directories(_double-conversion SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}") diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ca568c469b40..36831fd11711 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -41,7 +41,6 @@ #include #include -#include static constexpr auto DEFAULT_MAX_STRING_SIZE = 1_GiB; diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index 597f0a06fb96..d1652784cc2a 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -4,7 +4,6 @@ #include #include #include -#include #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunneeded-internal-declaration" From 60392f7b9d1ab445090844c3448d97262581ed49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 2 Apr 2024 16:24:35 +0200 Subject: [PATCH 04/90] Add test for #35215 --- .../0_stateless/03033_virtual_column_override.reference | 1 + tests/queries/0_stateless/03033_virtual_column_override.sql | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/03033_virtual_column_override.reference create mode 100644 tests/queries/0_stateless/03033_virtual_column_override.sql diff --git a/tests/queries/0_stateless/03033_virtual_column_override.reference b/tests/queries/0_stateless/03033_virtual_column_override.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03033_virtual_column_override.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03033_virtual_column_override.sql b/tests/queries/0_stateless/03033_virtual_column_override.sql new file mode 100644 index 000000000000..49258bbb5337 --- /dev/null +++ b/tests/queries/0_stateless/03033_virtual_column_override.sql @@ -0,0 +1,3 @@ +DROP TABLE IF EXISTS override_test; +CREATE TABLE override_test (_part UInt32) ENGINE = MergeTree ORDER BY tuple() AS SELECT 1; +SELECT _part FROM override_test; From 427ad784e8a07c204492006e47446d0c3deff76d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Apr 2024 16:04:24 +0000 Subject: [PATCH 05/90] Actually bump the submodule --- contrib/double-conversion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/double-conversion b/contrib/double-conversion index cf2f0f3d547d..4f7a25d8ced8 160000 --- a/contrib/double-conversion +++ b/contrib/double-conversion @@ -1 +1 @@ -Subproject commit cf2f0f3d547dc73b4612028a155b80536902ba02 +Subproject commit 4f7a25d8ced8c7cf6eee6fd09d6788eaa23c9afe From 98ac8031e09eb45ac63b51f467b99f73fc8accaa Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 3 Apr 2024 19:49:23 +0200 Subject: [PATCH 06/90] add fault injection --- docker/test/stress/run.sh | 1 + programs/server/Server.cpp | 4 +++ src/Common/ThreadPool.cpp | 30 +++++++++++++++++++ src/Common/ThreadPool.h | 15 ++++++++++ src/Core/ServerSettings.h | 1 + .../cannot_allocate_thread_injection.xml | 3 ++ tests/config/install.sh | 6 ++++ 7 files changed, 60 insertions(+) create mode 100644 tests/config/config.d/cannot_allocate_thread_injection.xml diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 6c6caf872e9b..81cc61c90bc4 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -215,6 +215,7 @@ stop_server export USE_S3_STORAGE_FOR_MERGE_TREE=1 export RANDOMIZE_OBJECT_KEY_TYPE=1 export ZOOKEEPER_FAULT_INJECTION=1 +export THREAD_POOL_FAULT_INJECTION=1 configure # But we still need default disk because some tables loaded only into it diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 450e1696c115..071847c34582 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1569,6 +1569,8 @@ try new_server_settings.http_connections_store_limit, }); + CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); + ProfileEvents::increment(ProfileEvents::MainConfigLoads); /// Must be the last. @@ -2058,6 +2060,8 @@ try startup_watch.stop(); ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds()); + CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability); + try { global_context->startClusterDiscovery(); diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 3c2e6228421e..1adf716be24c 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -202,6 +202,9 @@ ReturnType ThreadPoolImpl::scheduleImpl(Job job, Priority priority, std: /// Check if there are enough threads to process job. if (threads.size() < std::min(max_threads, scheduled_jobs + 1)) { + if (CannotAllocateThreadFaultInjector::injectFault()) + return on_error("fault injected"); + try { threads.emplace_front(); @@ -541,3 +544,30 @@ void GlobalThreadPool::shutdown() the_instance->finalize(); } } + +CannotAllocateThreadFaultInjector & CannotAllocateThreadFaultInjector::instance() +{ + static CannotAllocateThreadFaultInjector ins; + return ins; +} + +void CannotAllocateThreadFaultInjector::setFaultProbability(double probability) +{ + auto & ins = instance(); + std::lock_guard lock(ins.mutex); + ins.enabled = 0 < probability && probability <= 1; + if (ins.enabled) + ins.random.emplace(probability); + else + ins.random.reset(); +} + +bool CannotAllocateThreadFaultInjector::injectFault() +{ + auto & ins = instance(); + if (!ins.enabled.load(std::memory_order_relaxed)) + return false; + + std::lock_guard lock(ins.mutex); + return ins.random && (*ins.random)(ins.rndgen); +} diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 31e4eabf63b8..191a8f6271d7 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -10,8 +10,10 @@ #include #include #include +#include #include +#include #include #include @@ -324,3 +326,16 @@ using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl; /// To make sure the tracing context is correctly propagated, we explicitly disable context propagation(including initialization and de-initialization) at underlying worker level. /// using ThreadPool = ThreadPoolImpl; + +/// Enables fault injections globally for all thread pools +class CannotAllocateThreadFaultInjector +{ + std::atomic_bool enabled = false; + std::mutex mutex; + pcg64_fast rndgen; + std::optional random; + static CannotAllocateThreadFaultInjector & instance(); +public: + static void setFaultProbability(double probability); + static bool injectFault(); +}; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 6608a35a5a2c..8fbf4749d499 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -41,6 +41,7 @@ namespace DB M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \ M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \ M(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \ + M(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \ M(Int32, max_connections, 1024, "Max server connections.", 0) \ M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \ M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \ diff --git a/tests/config/config.d/cannot_allocate_thread_injection.xml b/tests/config/config.d/cannot_allocate_thread_injection.xml new file mode 100644 index 000000000000..42bc0589b115 --- /dev/null +++ b/tests/config/config.d/cannot_allocate_thread_injection.xml @@ -0,0 +1,3 @@ + + 0.01 + diff --git a/tests/config/install.sh b/tests/config/install.sh index 652d25a0a35b..5da64bf4e696 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -132,6 +132,12 @@ else ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/ fi +if [[ -n "$THREAD_POOL_FAULT_INJECTION" ]] && [[ "$THREAD_POOL_FAULT_INJECTION" -eq 1 ]]; then + ln -sf $SRC_PATH/config.d/cannot_allocate_thread_injection.xml $DEST_SERVER_PATH/config.d/ +else + rm -f $DEST_SERVER_PATH/config.d/cannot_allocate_thread_injection.xml ||: +fi + # We randomize creating the snapshot on exit for Keeper to test out using older snapshots value=$(($RANDOM % 2)) sed --follow-symlinks -i "s|[01]|$value|" $DEST_SERVER_PATH/config.d/keeper_port.xml From c53b20a77070841289c018c66ae806cc74db832e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 3 Apr 2024 20:57:12 +0200 Subject: [PATCH 07/90] fix --- programs/server/Server.cpp | 3 ++- src/Common/AsyncLoader.cpp | 1 + src/Common/ThreadPool.cpp | 15 +++++++++++++++ src/Common/ThreadPool.h | 5 +++++ src/Storages/MergeTree/MergeTreeData.cpp | 2 ++ ...1947_multiple_pipe_read_sample_data_ZbApel.tsv | 7 +++++++ tests/clickhouse-test | 1 + 7 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 071847c34582..f918826130fb 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1569,7 +1569,8 @@ try new_server_settings.http_connections_store_limit, }); - CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); + if (global_context->isServerCompletelyStarted()) + CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); ProfileEvents::increment(ProfileEvents::MainConfigLoads); diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 3bec30893b9c..9607333b9f74 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -873,6 +873,7 @@ void AsyncLoader::spawn(Pool & pool, std::unique_lock & lock) ALLOW_ALLOCATIONS_IN_SCOPE; if (log_events) LOG_DEBUG(log, "Spawn loader worker #{} in {}", pool.workers, pool.name); + auto blocker = CannotAllocateThreadFaultInjector::blockFaultInjections(); pool.thread_pool->scheduleOrThrowOnError([this, &pool] { worker(pool); }); }); } diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 1adf716be24c..0877f8aa55e1 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -568,6 +568,21 @@ bool CannotAllocateThreadFaultInjector::injectFault() if (!ins.enabled.load(std::memory_order_relaxed)) return false; + if (ins.block_fault_injections) + return false; + std::lock_guard lock(ins.mutex); return ins.random && (*ins.random)(ins.rndgen); } + +thread_local bool CannotAllocateThreadFaultInjector::block_fault_injections = false; + +scope_guard CannotAllocateThreadFaultInjector::blockFaultInjections() +{ + auto & ins = instance(); + if (!ins.enabled.load(std::memory_order_relaxed)) + return {}; + + ins.block_fault_injections = true; + return [&ins](){ ins.block_fault_injections = false; }; +} diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 191a8f6271d7..7591832bbff9 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -334,8 +334,13 @@ class CannotAllocateThreadFaultInjector std::mutex mutex; pcg64_fast rndgen; std::optional random; + + static thread_local bool block_fault_injections; + static CannotAllocateThreadFaultInjector & instance(); public: static void setFaultProbability(double probability); static bool injectFault(); + + static scope_guard blockFaultInjections(); }; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8faed72b198d..2db360f91838 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1909,6 +1909,8 @@ try auto runner = threadPoolCallbackRunner(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts"); std::vector> parts_futures; + auto blocker = CannotAllocateThreadFaultInjector::blockFaultInjections(); + while (true) { ThreadFuzzer::maybeInjectSleep(); diff --git a/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv b/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv new file mode 100644 index 000000000000..ab35653b8ddd --- /dev/null +++ b/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv @@ -0,0 +1,7 @@ +0 BBB +1 BBB +2 BBB +3 BBB +4 AAA +5 BBB +6 AAA diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 624512058bcf..e461942114ba 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -907,6 +907,7 @@ class MergeTreeSettingsRandomizer: 1, 32 * 1024 * 1024 ), "cache_populated_by_fetch": lambda: random.randint(0, 1), + "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100) } @staticmethod From ea16070117b2535d180ec5dc1d6edffa0b77b767 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 3 Apr 2024 19:17:46 +0000 Subject: [PATCH 08/90] Automatic style fix --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index e461942114ba..9cfd087bd672 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -907,7 +907,7 @@ class MergeTreeSettingsRandomizer: 1, 32 * 1024 * 1024 ), "cache_populated_by_fetch": lambda: random.randint(0, 1), - "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100) + "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100), } @staticmethod From 724f9b8a3eb3a72845928e5ec2d21018cf0cc62c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Apr 2024 20:14:33 +0200 Subject: [PATCH 09/90] Update ReadBufferFromPocoSocket.cpp --- src/IO/ReadBufferFromPocoSocket.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index d399721d0603..26cdee4140c8 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -83,21 +83,21 @@ bool ReadBufferFromPocoSocket::nextImpl() } catch (const Poco::Net::NetException & e) { - throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), peer_address.toString()); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket (peer: {}, local: {})", e.displayText(), peer_address.toString(), socket.address().toString()); } catch (const Poco::TimeoutException &) { - throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded while reading from socket ({}, {} ms)", - peer_address.toString(), + throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded while reading from socket (peer: {}, local: {}, {} ms)", + peer_address.toString(), socket.address().toString(), socket.impl()->getReceiveTimeout().totalMilliseconds()); } catch (const Poco::IOException & e) { - throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), peer_address.toString()); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket (peer: {}, local: {})", e.displayText(), peer_address.toString(), socket.address().toString()); } if (bytes_read < 0) - throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket ({})", peer_address.toString()); + throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket (peer: {}, local: {})", peer_address.toString(), socket.address().toString()); if (read_event != ProfileEvents::end()) ProfileEvents::increment(read_event, bytes_read); From 5a897bc43e6fbf83ad8143400adf6944c04616dc Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Apr 2024 20:16:26 +0200 Subject: [PATCH 10/90] Update Connection.cpp --- src/Client/Connection.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 180942e6b838..e5ac7ad66b9d 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -195,6 +195,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) out = std::make_shared(*socket); out->setAsyncCallback(async_callback); connected = true; + setDescription(); sendHello(); receiveHello(timeouts.handshake_timeout); @@ -1225,6 +1226,12 @@ void Connection::setDescription() if (host != ip_address) description += ", " + ip_address; } + + if (const auto * socket_ = getSocket()) + { + description += ", local address: "; + description += socket_->address().toString(); + } } From 5db9fbed52c8c3f31b202fdd68d2d0117541d31e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Apr 2024 22:32:57 +0200 Subject: [PATCH 11/90] cancel tasks on exception --- src/Backups/BackupEntriesCollector.cpp | 15 +- src/Backups/BackupIO_AzureBlobStorage.cpp | 8 +- src/Backups/BackupIO_S3.cpp | 10 +- src/Backups/RestorerFromBackup.cpp | 2 +- src/Common/ThreadPool.cpp | 6 +- src/Common/ThreadPoolTaskTracker.cpp | 4 +- src/Common/ThreadPoolTaskTracker.h | 6 +- src/Common/threadPoolCallbackRunner.h | 135 +++++++++++++- src/Databases/DatabaseReplicated.cpp | 15 +- src/Disks/IO/ThreadPoolReader.cpp | 2 +- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 2 +- .../IO/WriteBufferFromAzureBlobStorage.cpp | 2 +- .../IO/WriteBufferFromAzureBlobStorage.h | 2 +- .../ObjectStorageIteratorAsync.h | 4 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 8 +- src/Formats/FormatFactory.cpp | 2 +- .../copyAzureBlobStorageFile.cpp | 8 +- .../copyAzureBlobStorageFile.h | 4 +- src/IO/ParallelReadBuffer.cpp | 4 +- src/IO/ParallelReadBuffer.h | 6 +- src/IO/S3/copyS3File.cpp | 12 +- src/IO/S3/copyS3File.h | 4 +- src/IO/WriteBufferFromS3.cpp | 2 +- src/IO/WriteBufferFromS3.h | 2 +- src/IO/tests/gtest_writebuffer_s3.cpp | 4 +- src/Interpreters/AsynchronousInsertQueue.cpp | 17 +- src/Storages/MergeTree/MergeTreeData.cpp | 171 +++++++----------- .../MergeTree/MergeTreeMarksLoader.cpp | 2 +- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 2 +- src/Storages/MergeTree/MergeTreeSource.cpp | 4 +- src/Storages/StorageAzureBlob.cpp | 2 +- src/Storages/StorageAzureBlob.h | 2 +- src/Storages/StorageBuffer.cpp | 9 +- src/Storages/StorageDistributed.cpp | 39 ++-- src/Storages/StorageS3.cpp | 10 +- src/Storages/StorageS3.h | 2 +- src/Storages/StorageURL.cpp | 1 - .../System/StorageSystemDetachedParts.cpp | 24 +-- ..._multiple_pipe_read_sample_data_ZbApel.tsv | 7 - tests/clickhouse-test | 1 + 40 files changed, 303 insertions(+), 259 deletions(-) delete mode 100644 tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index c71ce195388c..016190535a96 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -786,20 +786,15 @@ void BackupEntriesCollector::makeBackupEntriesForTablesData() if (backup_settings.structure_only) return; - std::vector> futures; + ThreadPoolCallbackRunnerLocal runner(threadpool, "BackupCollect"); for (const auto & table_name : table_infos | boost::adaptors::map_keys) { - futures.push_back(scheduleFromThreadPool([&]() + runner([&]() { makeBackupEntriesForTableData(table_name); - }, threadpool, "BackupCollect")); - } - /// Wait for all tasks. - for (auto & future : futures) - future.wait(); - /// Make sure there is no exception. - for (auto & future : futures) - future.get(); + }); + } + runner.waitForAllToFinishAndRethrowFirstError(); } void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableName & table_name) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index b9b208e321cf..fac6e441e5af 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -121,7 +121,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, /* dest_path */ blob_path[0], settings, read_settings, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupRDAzure"), + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupRDAzure"), /* for_disk_azure_blob_storage= */ true); return file_size; @@ -178,7 +178,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu fs::path(configuration.blob_path) / path_in_backup, settings, read_settings, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWRAzure")); + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); return; /// copied! } } @@ -201,14 +201,14 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St /* dest_path */ destination, settings, read_settings, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWRAzure"), + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure"), /* for_disk_azure_blob_storage= */ true); } void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) { copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWRAzure")); + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 2063af2061cc..70e50e443c1b 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -191,7 +191,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s read_settings, blob_storage_log, object_attributes, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupReaderS3"), + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupReaderS3"), /* for_disk_s3= */ true); return file_size; @@ -259,7 +259,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src read_settings, blob_storage_log, {}, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWriterS3")); + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWriterS3")); return; /// copied! } } @@ -283,14 +283,14 @@ void BackupWriterS3::copyFile(const String & destination, const String & source, read_settings, blob_storage_log, {}, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWriterS3")); + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWriterS3")); } void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) { copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, s3_settings.request_settings, blob_storage_log, {}, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWriterS3")); + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWriterS3")); } BackupWriterS3::~BackupWriterS3() = default; @@ -325,7 +325,7 @@ std::unique_ptr BackupWriterS3::writeFile(const String & file_name) s3_settings.request_settings, blob_storage_log, std::nullopt, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWriterS3"), + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWriterS3"), write_settings); } diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index ed1d5b8a103d..6504444feff2 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -231,7 +231,7 @@ void RestorerFromBackup::schedule(std::function && task_, const char * t checkIsQueryCancelled(); - auto future = scheduleFromThreadPool( + auto future = scheduleFromThreadPoolUnsafe( [this, task = std::move(task_)]() mutable { if (exception_caught) diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 0877f8aa55e1..b778362f4904 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -183,6 +183,9 @@ ReturnType ThreadPoolImpl::scheduleImpl(Job job, Priority priority, std: { std::unique_lock lock(mutex); + if (CannotAllocateThreadFaultInjector::injectFault()) + return on_error("fault injected"); + auto pred = [this] { return !queue_size || scheduled_jobs < queue_size || shutdown; }; if (wait_microseconds) /// Check for optional. Condition is true if the optional is set and the value is zero. @@ -202,9 +205,6 @@ ReturnType ThreadPoolImpl::scheduleImpl(Job job, Priority priority, std: /// Check if there are enough threads to process job. if (threads.size() < std::min(max_threads, scheduled_jobs + 1)) { - if (CannotAllocateThreadFaultInjector::injectFault()) - return on_error("fault injected"); - try { threads.emplace_front(); diff --git a/src/Common/ThreadPoolTaskTracker.cpp b/src/Common/ThreadPoolTaskTracker.cpp index 10207eb62967..61d34801f7a5 100644 --- a/src/Common/ThreadPoolTaskTracker.cpp +++ b/src/Common/ThreadPoolTaskTracker.cpp @@ -10,7 +10,7 @@ namespace ProfileEvents namespace DB { -TaskTracker::TaskTracker(ThreadPoolCallbackRunner scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_) +TaskTracker::TaskTracker(ThreadPoolCallbackRunnerUnsafe scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_) : is_async(bool(scheduler_)) , scheduler(scheduler_ ? std::move(scheduler_) : syncRunner()) , max_tasks_inflight(max_tasks_inflight_) @@ -22,7 +22,7 @@ TaskTracker::~TaskTracker() safeWaitAll(); } -ThreadPoolCallbackRunner TaskTracker::syncRunner() +ThreadPoolCallbackRunnerUnsafe TaskTracker::syncRunner() { return [](Callback && callback, int64_t) mutable -> std::future { diff --git a/src/Common/ThreadPoolTaskTracker.h b/src/Common/ThreadPoolTaskTracker.h index 72591648d304..84bc3344fe34 100644 --- a/src/Common/ThreadPoolTaskTracker.h +++ b/src/Common/ThreadPoolTaskTracker.h @@ -23,10 +23,10 @@ class TaskTracker public: using Callback = std::function; - TaskTracker(ThreadPoolCallbackRunner scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_); + TaskTracker(ThreadPoolCallbackRunnerUnsafe scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_); ~TaskTracker(); - static ThreadPoolCallbackRunner syncRunner(); + static ThreadPoolCallbackRunnerUnsafe syncRunner(); bool isAsync() const; @@ -50,7 +50,7 @@ class TaskTracker void collectFinishedFutures(bool propagate_exceptions) TSA_REQUIRES(mutex); const bool is_async; - ThreadPoolCallbackRunner scheduler; + ThreadPoolCallbackRunnerUnsafe scheduler; const size_t max_tasks_inflight; using FutureList = std::list>; diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h index 6f7892ae4bb4..cec07bbd8922 100644 --- a/src/Common/threadPoolCallbackRunner.h +++ b/src/Common/threadPoolCallbackRunner.h @@ -11,11 +11,16 @@ namespace DB /// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously. template > -using ThreadPoolCallbackRunner = std::function(Callback &&, Priority)>; +using ThreadPoolCallbackRunnerUnsafe = std::function(Callback &&, Priority)>; + +/// NOTE When using ThreadPoolCallbackRunnerUnsafe you MUST ensure that all async tasks are finished +/// before any objects they may use are destroyed. +/// A common mistake is capturing some some local objects in lambda and passing it to the runner. +/// In case of exception, these local objects will be destroyed before scheduled tasks are finished. /// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrowOnError()'. template > -ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) +ThreadPoolCallbackRunnerUnsafe threadPoolCallbackRunnerUnsafe(ThreadPool & pool, const std::string & thread_name) { return [my_pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](Callback && callback, Priority priority) mutable -> std::future { @@ -54,10 +59,132 @@ ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & } template -std::future scheduleFromThreadPool(T && task, ThreadPool & pool, const std::string & thread_name, Priority priority = {}) +std::future scheduleFromThreadPoolUnsafe(T && task, ThreadPool & pool, const std::string & thread_name, Priority priority = {}) { - auto schedule = threadPoolCallbackRunner(pool, thread_name); + auto schedule = threadPoolCallbackRunnerUnsafe(pool, thread_name); return schedule(std::move(task), priority); /// NOLINT } +/// NOTE It's still not completely safe. +/// When creating a runner on stack, you MUST make sure that it's created (and destroyed) before local objects captured by task lambda. + +template > +class ThreadPoolCallbackRunnerLocal +{ + ThreadPool & pool; + std::string thread_name; + + enum TaskState + { + SCHEDULED = 0, + RUNNING = 1, + FINISHED = 2, + CANCELLED = 3, + }; + + struct Task + { + std::future future; + std::atomic state = SCHEDULED; + }; + + /// NOTE It will leak for a global object with long lifetime + std::vector> tasks; + + void cancelScheduledTasks() + { + for (auto & task : tasks) + { + TaskState expected = SCHEDULED; + task->state.compare_exchange_strong(expected, CANCELLED); + } + } + +public: + ThreadPoolCallbackRunnerLocal(ThreadPool & pool_, const std::string & thread_name_) + : pool(pool_) + , thread_name(thread_name_) + { + } + + ~ThreadPoolCallbackRunnerLocal() + { + cancelScheduledTasks(); + waitForAllToFinish(); + } + + void operator() (Callback && callback, Priority priority = {}) + { + auto & task = tasks.emplace_back(std::make_shared()); + + auto task_func = std::make_shared>( + [task, thread_group = CurrentThread::getGroup(), my_thread_name = thread_name, my_callback = std::move(callback)]() mutable -> Result + { + TaskState expected = SCHEDULED; + if (!task->state.compare_exchange_strong(expected, RUNNING)) + { + if (expected == CANCELLED) + return; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state {} when running a task in {}", expected, my_thread_name); + } + + SCOPE_EXIT_SAFE( + { + expected = RUNNING; + if (!task->state.compare_exchange_strong(expected, FINISHED)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state {} when finishing a task in {}", expected, my_thread_name); + }); + + if (thread_group) + CurrentThread::attachToGroup(thread_group); + + SCOPE_EXIT_SAFE( + { + { + /// Release all captured resources before detaching thread group + /// Releasing has to use proper memory tracker which has been set here before callback + + [[maybe_unused]] auto tmp = std::move(my_callback); + } + + if (thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + }); + + setThreadName(my_thread_name.data()); + + return my_callback(); + }); + + task->future = task_func->get_future(); + + /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". + /// Note: calling method scheduleOrThrowOnError in intentional, because we don't want to throw exceptions + /// in critical places where this callback runner is used (e.g. loading or deletion of parts) + pool.scheduleOrThrowOnError([my_task = std::move(task_func)]{ (*my_task)(); }, priority); + } + + void waitForAllToFinish() + { + for (const auto & task : tasks) + { + TaskState state = task->state; + /// It can be cancelled only when waiting in dtor + if (state == CANCELLED) + continue; + task->future.wait(); + } + } + + void waitForAllToFinishAndRethrowFirstError() + { + waitForAllToFinish(); + for (auto & task : tasks) + task->future.get(); + + tasks.clear(); + } + +}; + } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 59b3e52e139e..80281d5d2bcb 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1098,8 +1098,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep auto allow_concurrent_table_creation = getContext()->getServerSettings().max_database_replicated_create_table_thread_pool_size > 1; auto tables_to_create_by_level = tables_dependencies.getTablesSplitByDependencyLevel(); - auto create_tables_runner = threadPoolCallbackRunner(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables"); - std::vector> create_table_futures; + ThreadPoolCallbackRunnerLocal runner(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables"); for (const auto & tables_to_create : tables_to_create_by_level) { @@ -1131,20 +1130,12 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep }; if (allow_concurrent_table_creation) - create_table_futures.push_back(create_tables_runner(task, Priority{0})); + runner(std::move(task)); else task(); } - /// First wait for all tasks to finish. - for (auto & future : create_table_futures) - future.wait(); - - /// Now rethrow the first exception if any. - for (auto & future : create_table_futures) - future.get(); - - create_table_futures.clear(); + runner.waitForAllToFinishAndRethrowFirstError(); } LOG_INFO(log, "All tables are created successfully"); diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp index bb295643726c..4713e20ccc87 100644 --- a/src/Disks/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -203,7 +203,7 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMiss); - auto schedule = threadPoolCallbackRunner(*pool, "ThreadPoolRead"); + auto schedule = threadPoolCallbackRunnerUnsafe(*pool, "ThreadPoolRead"); return schedule([request, fd]() -> Result { diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 590fc4c4656b..2df087e941f9 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -106,7 +106,7 @@ std::future ThreadPoolRemoteFSReader::submit(Reques } ProfileEventTimeIncrement elapsed(ProfileEvents::ThreadpoolReaderSubmit); - return scheduleFromThreadPool( + return scheduleFromThreadPoolUnsafe( [request, this]() -> Result { return execute(request, /*seek_performed=*/true); }, *pool, "VFSRead", request.priority); } diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 05b93dd1fa34..d407ec59394c 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -44,7 +44,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( size_t buf_size_, const WriteSettings & write_settings_, std::shared_ptr settings_, - ThreadPoolCallbackRunner schedule_) + ThreadPoolCallbackRunnerUnsafe schedule_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , log(getLogger("WriteBufferFromAzureBlobStorage")) , buffer_allocation_policy(createBufferAllocationPolicy(*settings_)) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 6e10c07b255b..0989eb7bfb0a 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -36,7 +36,7 @@ class WriteBufferFromAzureBlobStorage : public WriteBufferFromFileBase size_t buf_size_, const WriteSettings & write_settings_, std::shared_ptr settings_, - ThreadPoolCallbackRunner schedule_ = {}); + ThreadPoolCallbackRunnerUnsafe schedule_ = {}); ~WriteBufferFromAzureBlobStorage() override; diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h index 5f63e5f6e8a5..7fdb02bdfe2b 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h @@ -19,7 +19,7 @@ class IObjectStorageIteratorAsync : public IObjectStorageIterator CurrentMetrics::Metric threads_scheduled_metric, const std::string & thread_name) : list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1) - , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, thread_name)) + , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, thread_name)) { } @@ -53,7 +53,7 @@ class IObjectStorageIteratorAsync : public IObjectStorageIterator mutable std::recursive_mutex mutex; ThreadPool list_objects_pool; - ThreadPoolCallbackRunner list_objects_scheduler; + ThreadPoolCallbackRunnerUnsafe list_objects_scheduler; std::future outcome_future; RelativePathsWithMetadata current_batch; RelativePathsWithMetadata::iterator current_batch_iterator; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index b343b73f7bd6..77dd93395ba2 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -247,9 +247,9 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files"); auto settings_ptr = s3_settings.get(); - ThreadPoolCallbackRunner scheduler; + ThreadPoolCallbackRunnerUnsafe scheduler; if (write_settings.s3_allow_parallel_part_upload) - scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); + scheduler = threadPoolCallbackRunnerUnsafe(getThreadPoolWriter(), "VFSWrite"); auto blob_storage_log = BlobStorageLogWriter::create(disk_name); @@ -461,7 +461,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT auto current_client = dest_s3->client.get(); auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); - auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); + auto scheduler = threadPoolCallbackRunnerUnsafe(getThreadPoolWriter(), "S3ObjStor_copy"); try { copyS3File( current_client, @@ -503,7 +503,7 @@ void S3ObjectStorage::copyObject( // NOLINT auto current_client = client.get(); auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); - auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); + auto scheduler = threadPoolCallbackRunnerUnsafe(getThreadPoolWriter(), "S3ObjStor_copy"); copyS3File(current_client, uri.bucket, object_from.remote_path, diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 8cbb1b9e5639..3dccd8c6b319 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -437,7 +437,7 @@ std::unique_ptr FormatFactory::wrapReadBufferIfNeeded( settings.max_download_buffer_size); res = wrapInParallelReadBufferIfSupported( - buf, threadPoolCallbackRunner(getIOThreadPool().get(), "ParallelRead"), + buf, threadPoolCallbackRunnerUnsafe(getIOThreadPool().get(), "ParallelRead"), max_download_threads, settings.max_download_buffer_size, file_size); } diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 4714c7959278..ef8c01f4b5ec 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -45,7 +45,7 @@ namespace const String & dest_container_for_logging_, const String & dest_blob_, std::shared_ptr settings_, - ThreadPoolCallbackRunner schedule_, + ThreadPoolCallbackRunnerUnsafe schedule_, bool for_disk_azure_blob_storage_, const Poco::Logger * log_) : create_read_buffer(create_read_buffer_) @@ -72,7 +72,7 @@ namespace const String & dest_container_for_logging; const String & dest_blob; std::shared_ptr settings; - ThreadPoolCallbackRunner schedule; + ThreadPoolCallbackRunnerUnsafe schedule; bool for_disk_azure_blob_storage; const Poco::Logger * log; size_t max_single_part_upload_size; @@ -269,7 +269,7 @@ void copyDataToAzureBlobStorageFile( const String & dest_container_for_logging, const String & dest_blob, std::shared_ptr settings, - ThreadPoolCallbackRunner schedule, + ThreadPoolCallbackRunnerUnsafe schedule, bool for_disk_azure_blob_storage) { UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyDataToAzureBlobStorageFile")}; @@ -288,7 +288,7 @@ void copyAzureBlobStorageFile( const String & dest_blob, std::shared_ptr settings, const ReadSettings & read_settings, - ThreadPoolCallbackRunner schedule, + ThreadPoolCallbackRunnerUnsafe schedule, bool for_disk_azure_blob_storage) { diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 1433f8d18ba0..170a3d7f6aae 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -31,7 +31,7 @@ void copyAzureBlobStorageFile( const String & dest_blob, std::shared_ptr settings, const ReadSettings & read_settings, - ThreadPoolCallbackRunner schedule_ = {}, + ThreadPoolCallbackRunnerUnsafe schedule_ = {}, bool for_disk_azure_blob_storage = false); @@ -48,7 +48,7 @@ void copyDataToAzureBlobStorageFile( const String & dest_container_for_logging, const String & dest_blob, std::shared_ptr settings, - ThreadPoolCallbackRunner schedule_ = {}, + ThreadPoolCallbackRunnerUnsafe schedule_ = {}, bool for_disk_azure_blob_storage = false); } diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index cdeb8a186351..5718830db645 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -42,7 +42,7 @@ struct ParallelReadBuffer::ReadWorker }; ParallelReadBuffer::ParallelReadBuffer( - SeekableReadBuffer & input_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers_, size_t range_step_, size_t file_size_) + SeekableReadBuffer & input_, ThreadPoolCallbackRunnerUnsafe schedule_, size_t max_working_readers_, size_t range_step_, size_t file_size_) : SeekableReadBuffer(nullptr, 0) , max_working_readers(max_working_readers_) , schedule(std::move(schedule_)) @@ -293,7 +293,7 @@ void ParallelReadBuffer::finishAndWait() } std::unique_ptr wrapInParallelReadBufferIfSupported( - ReadBuffer & buf, ThreadPoolCallbackRunner schedule, size_t max_working_readers, + ReadBuffer & buf, ThreadPoolCallbackRunnerUnsafe schedule, size_t max_working_readers, size_t range_step, size_t file_size) { auto * seekable = dynamic_cast(&buf); diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index daac11903995..cfeec2b3677d 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -28,7 +28,7 @@ class ParallelReadBuffer : public SeekableReadBuffer, public WithFileSize bool nextImpl() override; public: - ParallelReadBuffer(SeekableReadBuffer & input, ThreadPoolCallbackRunner schedule_, size_t max_working_readers, size_t range_step_, size_t file_size); + ParallelReadBuffer(SeekableReadBuffer & input, ThreadPoolCallbackRunnerUnsafe schedule_, size_t max_working_readers, size_t range_step_, size_t file_size); ~ParallelReadBuffer() override { finishAndWait(); } @@ -63,7 +63,7 @@ class ParallelReadBuffer : public SeekableReadBuffer, public WithFileSize size_t max_working_readers; std::atomic_size_t active_working_readers{0}; - ThreadPoolCallbackRunner schedule; + ThreadPoolCallbackRunnerUnsafe schedule; SeekableReadBuffer & input; size_t file_size; @@ -94,7 +94,7 @@ class ParallelReadBuffer : public SeekableReadBuffer, public WithFileSize /// If `buf` is a SeekableReadBuffer with supportsReadAt() == true, creates a ParallelReadBuffer /// from it. Otherwise returns nullptr; std::unique_ptr wrapInParallelReadBufferIfSupported( - ReadBuffer & buf, ThreadPoolCallbackRunner schedule, size_t max_working_readers, + ReadBuffer & buf, ThreadPoolCallbackRunnerUnsafe schedule, size_t max_working_readers, size_t range_step, size_t file_size); } diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index b780c1fc08f6..3b1f25ed9949 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -58,7 +58,7 @@ namespace const String & dest_key_, const S3Settings::RequestSettings & request_settings_, const std::optional> & object_metadata_, - ThreadPoolCallbackRunner schedule_, + ThreadPoolCallbackRunnerUnsafe schedule_, bool for_disk_s3_, BlobStorageLogWriterPtr blob_storage_log_, const LoggerPtr log_) @@ -84,7 +84,7 @@ namespace const S3Settings::RequestSettings & request_settings; const S3Settings::RequestSettings::PartUploadSettings & upload_settings; const std::optional> & object_metadata; - ThreadPoolCallbackRunner schedule; + ThreadPoolCallbackRunnerUnsafe schedule; bool for_disk_s3; BlobStorageLogWriterPtr blob_storage_log; const LoggerPtr log; @@ -467,7 +467,7 @@ namespace const String & dest_key_, const S3Settings::RequestSettings & request_settings_, const std::optional> & object_metadata_, - ThreadPoolCallbackRunner schedule_, + ThreadPoolCallbackRunnerUnsafe schedule_, bool for_disk_s3_, BlobStorageLogWriterPtr blob_storage_log_) : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyDataToS3File")) @@ -650,7 +650,7 @@ namespace const S3Settings::RequestSettings & request_settings_, const ReadSettings & read_settings_, const std::optional> & object_metadata_, - ThreadPoolCallbackRunner schedule_, + ThreadPoolCallbackRunnerUnsafe schedule_, bool for_disk_s3_, BlobStorageLogWriterPtr blob_storage_log_) : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyS3File")) @@ -856,7 +856,7 @@ void copyDataToS3File( const S3Settings::RequestSettings & settings, BlobStorageLogWriterPtr blob_storage_log, const std::optional> & object_metadata, - ThreadPoolCallbackRunner schedule, + ThreadPoolCallbackRunnerUnsafe schedule, bool for_disk_s3) { CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3, blob_storage_log}; @@ -876,7 +876,7 @@ void copyS3File( const ReadSettings & read_settings, BlobStorageLogWriterPtr blob_storage_log, const std::optional> & object_metadata, - ThreadPoolCallbackRunner schedule, + ThreadPoolCallbackRunnerUnsafe schedule, bool for_disk_s3) { if (settings.allow_native_copy) diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index 5eb6f702473a..d5da4d260b15 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -42,7 +42,7 @@ void copyS3File( const ReadSettings & read_settings, BlobStorageLogWriterPtr blob_storage_log, const std::optional> & object_metadata = std::nullopt, - ThreadPoolCallbackRunner schedule_ = {}, + ThreadPoolCallbackRunnerUnsafe schedule_ = {}, bool for_disk_s3 = false); /// Copies data from any seekable source to S3. @@ -60,7 +60,7 @@ void copyDataToS3File( const S3Settings::RequestSettings & settings, BlobStorageLogWriterPtr blob_storage_log, const std::optional> & object_metadata = std::nullopt, - ThreadPoolCallbackRunner schedule_ = {}, + ThreadPoolCallbackRunnerUnsafe schedule_ = {}, bool for_disk_s3 = false); } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 5e898dec9b84..e41867ce225b 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -94,7 +94,7 @@ WriteBufferFromS3::WriteBufferFromS3( const S3Settings::RequestSettings & request_settings_, BlobStorageLogWriterPtr blob_log_, std::optional> object_metadata_, - ThreadPoolCallbackRunner schedule_, + ThreadPoolCallbackRunnerUnsafe schedule_, const WriteSettings & write_settings_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , bucket(bucket_) diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index e7a06f251158..1df559b252ce 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -41,7 +41,7 @@ class WriteBufferFromS3 final : public WriteBufferFromFileBase const S3Settings::RequestSettings & request_settings_, BlobStorageLogWriterPtr blob_log_, std::optional> object_metadata_ = std::nullopt, - ThreadPoolCallbackRunner schedule_ = {}, + ThreadPoolCallbackRunnerUnsafe schedule_ = {}, const WriteSettings & write_settings_ = {}); ~WriteBufferFromS3() override; diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index d9cb486c09e4..447b72ed7c6e 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -452,7 +452,7 @@ struct UploadPartFailIngection: InjectionModel struct BaseSyncPolicy { virtual ~BaseSyncPolicy() = default; - virtual DB::ThreadPoolCallbackRunner getScheduler() { return {}; } + virtual DB::ThreadPoolCallbackRunnerUnsafe getScheduler() { return {}; } virtual void execute(size_t) {} virtual void setAutoExecute(bool) {} @@ -465,7 +465,7 @@ struct SimpleAsyncTasks : BaseSyncPolicy bool auto_execute = false; std::deque> queue; - DB::ThreadPoolCallbackRunner getScheduler() override + DB::ThreadPoolCallbackRunnerUnsafe getScheduler() override { return [this] (std::function && operation, size_t /*priority*/) { diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index c05d1b8f979b..fbbfaa5f7522 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -281,10 +281,19 @@ void AsynchronousInsertQueue::scheduleDataProcessingJob( /// Wrap 'unique_ptr' with 'shared_ptr' to make this /// lambda copyable and allow to save it to the thread pool. - pool.scheduleOrThrowOnError( - [this, key, global_context, shard_num, my_data = std::make_shared(std::move(data))]() mutable - { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); }, - priority); + auto data_shared = std::make_shared(std::move(data)); + try + { + pool.scheduleOrThrowOnError( + [this, key, global_context, shard_num, my_data = data_shared]() mutable + { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); }, + priority); + } + catch (...) + { + for (auto & entry : (**data_shared).entries) + entry->finish(std::current_exception()); + } } void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2db360f91838..6d6bbddfb6a9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1502,20 +1502,6 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPartWithRetries( UNREACHABLE(); } -/// Wait for all tasks to finish and rethrow the first exception if any. -/// The tasks access local variables of the caller function, so we can't just rethrow the first exception until all other tasks are finished. -void waitForAllToFinishAndRethrowFirstError(std::vector> & futures) -{ - /// First wait for all tasks to finish. - for (auto & future : futures) - future.wait(); - - /// Now rethrow the first exception if any. - for (auto & future : futures) - future.get(); - - futures.clear(); -} std::vector MergeTreeData::loadDataPartsFromDisk(PartLoadingTreeNodes & parts_to_load) { @@ -1526,83 +1512,67 @@ std::vector MergeTreeData::loadDataPartsFromDisk( /// Shuffle all the parts randomly to possible speed up loading them from JBOD. std::shuffle(parts_to_load.begin(), parts_to_load.end(), thread_local_rng); - auto runner = threadPoolCallbackRunner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); - std::vector> parts_futures; - std::mutex part_select_mutex; std::mutex part_loading_mutex; std::vector loaded_parts; - try + ThreadPoolCallbackRunnerLocal runner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); + while (true) { - while (true) + bool are_parts_to_load_empty = false; { - bool are_parts_to_load_empty = false; - { - std::lock_guard lock(part_select_mutex); - are_parts_to_load_empty = parts_to_load.empty(); - } + std::lock_guard lock(part_select_mutex); + are_parts_to_load_empty = parts_to_load.empty(); + } - if (are_parts_to_load_empty) - { - /// Wait for all scheduled tasks. - waitForAllToFinishAndRethrowFirstError(parts_futures); + if (are_parts_to_load_empty) + { + /// Wait for all scheduled tasks. + runner.waitForAllToFinishAndRethrowFirstError(); - /// At this point it is possible, that some other parts appeared in the queue for processing (parts_to_load), - /// because we added them from inside the pool. - /// So we need to recheck it. - } + /// At this point it is possible, that some other parts appeared in the queue for processing (parts_to_load), + /// because we added them from inside the pool. + /// So we need to recheck it. + } - PartLoadingTree::NodePtr current_part; - { - std::lock_guard lock(part_select_mutex); - if (parts_to_load.empty()) - break; + PartLoadingTree::NodePtr current_part; + { + std::lock_guard lock(part_select_mutex); + if (parts_to_load.empty()) + break; - current_part = parts_to_load.back(); - parts_to_load.pop_back(); - } + current_part = parts_to_load.back(); + parts_to_load.pop_back(); + } - parts_futures.push_back(runner( - [&, part = std::move(current_part)]() + runner( + [&, part = std::move(current_part)]() + { + /// Pass a separate mutex to guard the set of parts, because this lambda + /// is called concurrently but with already locked @data_parts_mutex. + auto res = loadDataPartWithRetries( + part->info, part->name, part->disk, + DataPartState::Active, part_loading_mutex, loading_parts_initial_backoff_ms, + loading_parts_max_backoff_ms, loading_parts_max_tries); + + part->is_loaded = true; + bool is_active_part = res.part->getState() == DataPartState::Active; + + /// If part is broken or duplicate or should be removed according to transaction + /// and it has any covered parts then try to load them to replace this part. + if (!is_active_part && !part->children.empty()) { - /// Pass a separate mutex to guard the set of parts, because this lambda - /// is called concurrently but with already locked @data_parts_mutex. - auto res = loadDataPartWithRetries( - part->info, part->name, part->disk, - DataPartState::Active, part_loading_mutex, loading_parts_initial_backoff_ms, - loading_parts_max_backoff_ms, loading_parts_max_tries); - - part->is_loaded = true; - bool is_active_part = res.part->getState() == DataPartState::Active; - - /// If part is broken or duplicate or should be removed according to transaction - /// and it has any covered parts then try to load them to replace this part. - if (!is_active_part && !part->children.empty()) - { - std::lock_guard lock{part_select_mutex}; - for (const auto & [_, node] : part->children) - parts_to_load.push_back(node); - } - - { - std::lock_guard lock(part_loading_mutex); - loaded_parts.push_back(std::move(res)); - } - }, Priority{0})); - } - } - catch (...) - { - /// Wait for all scheduled tasks - /// A future becomes invalid after .get() call - /// + .wait() method is used not to throw any exception here. - for (auto & future: parts_futures) - if (future.valid()) - future.wait(); + std::lock_guard lock{part_select_mutex}; + for (const auto & [_, node] : part->children) + parts_to_load.push_back(node); + } - throw; + { + std::lock_guard lock(part_loading_mutex); + loaded_parts.push_back(std::move(res)); + } + }, Priority{0}); } return loaded_parts; @@ -1691,11 +1661,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional(getActivePartsLoadingThreadPool().get(), "ActiveParts"); std::vector parts_to_load_by_disk(disks.size()); - std::vector> disks_futures; - disks_futures.reserve(disks.size()); + ThreadPoolCallbackRunnerLocal runner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); for (size_t i = 0; i < disks.size(); ++i) { @@ -1705,7 +1673,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionaliterateDirectory(relative_data_path); it->isValid(); it->next()) { @@ -1717,11 +1685,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalname(), format_version)) disk_parts.emplace_back(*part_info, it->name(), disk_ptr); } - }, Priority{0})); + }, Priority{0}); } /// For iteration to be completed - waitForAllToFinishAndRethrowFirstError(disks_futures); + runner.waitForAllToFinishAndRethrowFirstError(); PartLoadingTree::PartLoadingInfos parts_to_load; for (auto & disk_parts : parts_to_load_by_disk) @@ -1906,11 +1874,10 @@ try std::atomic_size_t num_loaded_parts = 0; - auto runner = threadPoolCallbackRunner(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts"); - std::vector> parts_futures; - auto blocker = CannotAllocateThreadFaultInjector::blockFaultInjections(); + ThreadPoolCallbackRunnerLocal runner(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts"); + while (true) { ThreadFuzzer::maybeInjectSleep(); @@ -1923,7 +1890,7 @@ try { /// Wait for every scheduled task /// In case of any exception it will be re-thrown and server will be terminated. - waitForAllToFinishAndRethrowFirstError(parts_futures); + runner.waitForAllToFinishAndRethrowFirstError(); LOG_DEBUG(log, "Stopped loading outdated data parts because task was canceled. " @@ -1938,7 +1905,7 @@ try outdated_unloaded_data_parts.pop_back(); } - parts_futures.push_back(runner([&, my_part = part]() + runner([&, my_part = part]() { auto res = loadDataPartWithRetries( my_part->info, my_part->name, my_part->disk, @@ -1955,12 +1922,10 @@ try res.part->remove(); else preparePartForRemoval(res.part); - }, Priority{})); + }, Priority{}); } - /// Wait for every scheduled task - for (auto & future : parts_futures) - future.get(); + runner.waitForAllToFinishAndRethrowFirstError(); LOG_DEBUG(log, "Loaded {} outdated data parts {}", num_loaded_parts, is_async ? "asynchronously" : "synchronously"); @@ -2449,7 +2414,6 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t /// Parallel parts removal. std::mutex part_names_mutex; - auto runner = threadPoolCallbackRunner(getPartsCleaningThreadPool().get(), "PartsCleaning"); /// This flag disallow straightforward concurrent parts removal. It's required only in case /// when we have parts on zero-copy disk + at least some of them were mutated. @@ -2469,12 +2433,11 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t LOG_DEBUG( log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); - std::vector> parts_to_remove_futures; - parts_to_remove_futures.reserve(parts_to_remove.size()); + ThreadPoolCallbackRunnerLocal runner(getPartsCleaningThreadPool().get(), "PartsCleaning"); for (const DataPartPtr & part : parts_to_remove) { - parts_to_remove_futures.push_back(runner([&part, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup()] + runner([&part, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup()] { asMutableDeletingPart(part)->remove(); if (part_names_succeed) @@ -2482,10 +2445,10 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t std::lock_guard lock(part_names_mutex); part_names_succeed->insert(part->name); } - }, Priority{0})); + }, Priority{0}); } - waitForAllToFinishAndRethrowFirstError(parts_to_remove_futures); + runner.waitForAllToFinishAndRethrowFirstError(); return; } @@ -2557,13 +2520,13 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t return independent_ranges; }; - std::vector> part_removal_futures; + ThreadPoolCallbackRunnerLocal runner(getPartsCleaningThreadPool().get(), "PartsCleaning"); - auto schedule_parts_removal = [this, &runner, &part_names_mutex, part_names_succeed, &part_removal_futures]( + auto schedule_parts_removal = [this, &runner, &part_names_mutex, part_names_succeed]( const MergeTreePartInfo & range, DataPartsVector && parts_in_range) { /// Below, range should be captured by copy to avoid use-after-scope on exception from pool - part_removal_futures.push_back(runner( + runner( [this, range, &part_names_mutex, part_names_succeed, batch = std::move(parts_in_range)] { LOG_TRACE(log, "Removing {} parts in blocks range {}", batch.size(), range.getPartNameForLogs()); @@ -2577,7 +2540,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t part_names_succeed->insert(part->name); } } - }, Priority{0})); + }, Priority{0}); }; RemovalRanges independent_ranges = split_into_independent_ranges(parts_to_remove, /* split_times */ 0); @@ -2641,7 +2604,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t independent_ranges = split_into_independent_ranges(excluded_parts, /* split_times */ 0); - waitForAllToFinishAndRethrowFirstError(part_removal_futures); + runner.waitForAllToFinishAndRethrowFirstError(); for (size_t i = 0; i < independent_ranges.infos.size(); ++i) { @@ -2650,7 +2613,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t schedule_parts_removal(range, std::move(parts_in_range)); } - waitForAllToFinishAndRethrowFirstError(part_removal_futures); + runner.waitForAllToFinishAndRethrowFirstError(); if (parts_to_remove.size() != sum_of_ranges + excluded_parts.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 6798f97e4942..28d706096644 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -239,7 +239,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync() std::future MergeTreeMarksLoader::loadMarksAsync() { - return scheduleFromThreadPool( + return scheduleFromThreadPoolUnsafe( [this]() -> MarkCache::MappedPtr { ProfileEvents::increment(ProfileEvents::BackgroundLoadingMarksTasks); diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index c19b4ddd8a23..6d2875b8d9f0 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -154,7 +154,7 @@ std::future MergeTreePrefetchedReadPool::createPrefetchedFuture(IMergeTree reader->prefetchBeginOfRange(priority); }; - return scheduleFromThreadPool(std::move(task), prefetch_threadpool, "ReadPrepare", priority); + return scheduleFromThreadPoolUnsafe(std::move(task), prefetch_threadpool, "ReadPrepare", priority); } void MergeTreePrefetchedReadPool::createPrefetchedReadersForTask(ThreadTask & task) diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index e1d1d0951e40..02b4768f5f26 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -105,7 +105,7 @@ struct MergeTreeSource::AsyncReadingState AsyncReadingState() { control = std::make_shared(); - callback_runner = threadPoolCallbackRunner(getIOThreadPool().get(), "MergeTreeRead"); + callback_runner = threadPoolCallbackRunnerUnsafe(getIOThreadPool().get(), "MergeTreeRead"); } ~AsyncReadingState() @@ -128,7 +128,7 @@ struct MergeTreeSource::AsyncReadingState } private: - ThreadPoolCallbackRunner callback_runner; + ThreadPoolCallbackRunnerUnsafe callback_runner; std::shared_ptr control; }; #endif diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 306a5eac8e59..8f18426c8513 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1190,7 +1190,7 @@ StorageAzureBlobSource::StorageAzureBlobSource( , file_iterator(file_iterator_) , need_only_count(need_only_count_) , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "AzureReader")) + , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "AzureReader")) { reader = createReader(); if (reader) diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 3f1ba33f6366..5b0d8802657d 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -330,7 +330,7 @@ class StorageAzureBlobSource : public ISource, WithContext LoggerPtr log = getLogger("StorageAzureBlobSource"); ThreadPool create_reader_pool; - ThreadPoolCallbackRunner create_reader_scheduler; + ThreadPoolCallbackRunnerUnsafe create_reader_scheduler; std::future reader_future; /// Recreate ReadBuffer and Pipeline for each file. diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index dbcd47c57451..5a2815a30f37 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -830,23 +830,22 @@ bool StorageBuffer::checkThresholdsImpl(bool direct, size_t rows, size_t bytes, void StorageBuffer::flushAllBuffers(bool check_thresholds) { + ThreadPoolCallbackRunnerLocal runner(*flush_pool, "BufferFlush"); for (auto & buf : buffers) { if (flush_pool) { - scheduleFromThreadPool([&] () + runner([&]() { flushBuffer(buf, check_thresholds, false); - }, *flush_pool, "BufferFlush"); + }); } else { flushBuffer(buf, check_thresholds, false); } } - - if (flush_pool) - flush_pool->wait(); + runner.waitForAllToFinishAndRethrowFirstError(); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 12c2ad331ad0..69d3cf3ad3b4 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -283,17 +283,6 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus return (num_remote_shards + num_local_shards) * settings.max_parallel_replicas; } -template -void waitFutures(F & futures) -{ - for (auto & future : futures) - future.wait(); - /// Make sure there is no exception. - for (auto & future : futures) - future.get(); - futures.clear(); -} - } /// For destruction of std::unique_ptr of type that is incomplete in class definition. @@ -1296,31 +1285,27 @@ void StorageDistributed::initializeFromDisk() /// Make initialization for large number of disks parallel. ThreadPool pool(CurrentMetrics::StorageDistributedThreads, CurrentMetrics::StorageDistributedThreadsActive, CurrentMetrics::StorageDistributedThreadsScheduled, disks.size()); - std::vector> futures; + ThreadPoolCallbackRunnerLocal runner(pool, "DistInit"); for (const DiskPtr & disk : disks) { - auto future = scheduleFromThreadPool([this, disk_to_init = disk] + runner([this, disk_to_init = disk] { initializeDirectoryQueuesForDisk(disk_to_init); - }, pool, "DistInit"); - futures.push_back(std::move(future)); + }); } - waitFutures(futures); - pool.wait(); + runner.waitForAllToFinishAndRethrowFirstError(); const auto & paths = getDataPaths(); std::vector last_increment(paths.size()); for (size_t i = 0; i < paths.size(); ++i) { - auto future = scheduleFromThreadPool([&paths, &last_increment, i] + runner([&paths, &last_increment, i] { last_increment[i] = getMaximumFileNumber(paths[i]); - }, pool, "DistInit"); - futures.push_back(std::move(future)); + }); } - waitFutures(futures); - pool.wait(); + runner.waitForAllToFinishAndRethrowFirstError(); for (const auto inc : last_increment) { @@ -1760,19 +1745,17 @@ void StorageDistributed::flushClusterNodesAllDataImpl(ContextPtr local_context, Stopwatch watch; ThreadPool pool(CurrentMetrics::StorageDistributedThreads, CurrentMetrics::StorageDistributedThreadsActive, CurrentMetrics::StorageDistributedThreadsScheduled, directory_queues.size()); - std::vector> futures; + ThreadPoolCallbackRunnerLocal runner(pool, "DistFlush"); for (const auto & node : directory_queues) { - auto future = scheduleFromThreadPool([node_to_flush = node, &settings_changes] + runner([node_to_flush = node, &settings_changes] { node_to_flush->flushAllData(settings_changes); - }, pool, "DistFlush"); - futures.push_back(std::move(future)); + }); } - waitFutures(futures); - pool.wait(); + runner.waitForAllToFinishAndRethrowFirstError(); LOG_INFO(log, "Pending INSERT blocks flushed, took {} ms.", watch.elapsedMilliseconds()); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2d3aef312bf9..9e49ce6f2dee 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -204,7 +204,7 @@ class StorageS3Source::DisclosedGlobIterator::Impl : WithContext , read_keys(read_keys_) , request_settings(request_settings_) , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) + , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, "ListObjects")) , file_progress_callback(file_progress_callback_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) @@ -413,7 +413,7 @@ class StorageS3Source::DisclosedGlobIterator::Impl : WithContext S3Settings::RequestSettings request_settings; ThreadPool list_objects_pool; - ThreadPoolCallbackRunner list_objects_scheduler; + ThreadPoolCallbackRunnerUnsafe list_objects_scheduler; std::future outcome_future; std::function file_progress_callback; }; @@ -527,7 +527,7 @@ StorageS3Source::ReadTaskIterator::ReadTaskIterator( : callback(callback_) { ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count); - auto pool_scheduler = threadPoolCallbackRunner(pool, "S3ReadTaskItr"); + auto pool_scheduler = threadPoolCallbackRunnerUnsafe(pool, "S3ReadTaskItr"); std::vector> keys; keys.reserve(max_threads_count); @@ -598,7 +598,7 @@ StorageS3Source::StorageS3Source( , max_parsing_threads(max_parsing_threads_) , need_only_count(need_only_count_) , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "CreateS3Reader")) + , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "CreateS3Reader")) { } @@ -875,7 +875,7 @@ class StorageS3Sink : public SinkToStorage configuration_.request_settings, std::move(blob_log), std::nullopt, - threadPoolCallbackRunner(getIOThreadPool().get(), "S3ParallelWrite"), + threadPoolCallbackRunnerUnsafe(getIOThreadPool().get(), "S3ParallelWrite"), context->getWriteSettings()), compression_method, static_cast(settings.output_format_compression_level), diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 19cbfaa6f08d..c8ab28fb20ed 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -241,7 +241,7 @@ class StorageS3Source : public SourceWithKeyCondition, WithContext LoggerPtr log = getLogger("StorageS3Source"); ThreadPool create_reader_pool; - ThreadPoolCallbackRunner create_reader_scheduler; + ThreadPoolCallbackRunnerUnsafe create_reader_scheduler; std::future reader_future; std::atomic initialized{false}; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index cc46cc8f8dcf..511ccbdef781 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -5,7 +5,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index ebcd8d63a52a..31d566ef8b6a 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -162,19 +162,9 @@ class DetachedPartsSource : public ISource worker_state.tasks.push_back({part.disk, relative_path, &parts_sizes.at(p_id - begin)}); } - std::vector> futures; - SCOPE_EXIT_SAFE({ - /// Cancel all workers - worker_state.next_task.store(worker_state.tasks.size()); - /// Exceptions are not propagated - for (auto & future : futures) - if (future.valid()) - future.wait(); - futures.clear(); - }); - auto max_thread_to_run = std::max(size_t(1), std::min(support_threads, worker_state.tasks.size() / 10)); - futures.reserve(max_thread_to_run); + + ThreadPoolCallbackRunnerLocal runner(getIOThreadPool().get(), "DP_BytesOnDisk"); for (size_t i = 0; i < max_thread_to_run; ++i) { @@ -191,16 +181,10 @@ class DetachedPartsSource : public ISource } }; - futures.push_back( - scheduleFromThreadPool( - std::move(worker), - getIOThreadPool().get(), - "DP_BytesOnDisk")); + runner(std::move(worker)); } - /// Exceptions are propagated - for (auto & future : futures) - future.get(); + runner.waitForAllToFinishAndRethrowFirstError(); } void generateRows(MutableColumns & new_columns, size_t max_rows) diff --git a/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv b/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv deleted file mode 100644 index ab35653b8ddd..000000000000 --- a/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv +++ /dev/null @@ -1,7 +0,0 @@ -0 BBB -1 BBB -2 BBB -3 BBB -4 AAA -5 BBB -6 AAA diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 9cfd087bd672..d1132a26ea8c 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -908,6 +908,7 @@ class MergeTreeSettingsRandomizer: ), "cache_populated_by_fetch": lambda: random.randint(0, 1), "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100), + "old_parts_lifetime": threshold_generator(0.2, 0.3, 0, 8 * 60), } @staticmethod From 578c4cfb9deeb023075695804f01ee100106115b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 5 Apr 2024 00:25:18 +0200 Subject: [PATCH 12/90] fix --- src/Common/threadPoolCallbackRunner.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h index cec07bbd8922..ef22f9038d80 100644 --- a/src/Common/threadPoolCallbackRunner.h +++ b/src/Common/threadPoolCallbackRunner.h @@ -9,6 +9,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + /// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously. template > using ThreadPoolCallbackRunnerUnsafe = std::function(Callback &&, Priority)>; @@ -172,7 +177,8 @@ class ThreadPoolCallbackRunnerLocal /// It can be cancelled only when waiting in dtor if (state == CANCELLED) continue; - task->future.wait(); + if (task->future.valid()) + task->future.wait(); } } From 53a3ad609aa60402b26547b295a12768aec9416c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 Apr 2024 21:32:40 +0200 Subject: [PATCH 13/90] Fix trash in documentation --- docs/en/operations/system-tables/asynchronous_metric_log.md | 2 +- src/Common/AsynchronousMetrics.cpp | 2 +- src/Common/AsynchronousMetrics.h | 4 ++-- src/Interpreters/SystemLog.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index e63ab65ba074..e0d3254fe736 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -3,7 +3,7 @@ slug: /en/operations/system-tables/asynchronous_metric_log --- # asynchronous_metric_log -Contains the historical values for `system.asynchronous_metrics`, which are saved once per minute. Enabled by default. +Contains the historical values for `system.asynchronous_metrics`, which are saved once per time interval (one second by default). Enabled by default. Columns: diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index ab54b180fbfc..6b26f65deab1 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -56,7 +56,7 @@ static std::unique_ptr openFileIfExists(const std::stri AsynchronousMetrics::AsynchronousMetrics( - int update_period_seconds, + unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) : update_period(update_period_seconds) , log(getLogger("AsynchronousMetrics")) diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index 4b3d28e80c54..b62529a08e71 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -44,7 +44,7 @@ struct ProtocolServerMetrics size_t current_threads; }; -/** Periodically (by default, each minute, starting at 30 seconds offset) +/** Periodically (by default, each second) * calculates and updates some metrics, * that are not updated automatically (so, need to be asynchronously calculated). * @@ -64,7 +64,7 @@ class AsynchronousMetrics using ProtocolServerMetricsFunc = std::function()>; AsynchronousMetrics( - int update_period_seconds, + unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_); virtual ~AsynchronousMetrics(); diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index db73fe038c04..3af8761ff8eb 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -291,7 +291,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem."); asynchronous_metric_log = createSystemLog( global_context, "system", "asynchronous_metric_log", config, - "asynchronous_metric_log", "Contains the historical values for system.asynchronous_metrics, which are saved once per minute."); + "asynchronous_metric_log", "Contains the historical values for system.asynchronous_metrics, once per time interval (one second by default)."); opentelemetry_span_log = createSystemLog( global_context, "system", "opentelemetry_span_log", config, "opentelemetry_span_log", "Contains information about trace spans for executed queries."); From b6aff78bf55d98b9b511dd80c7abd8396c9c5a4a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 8 Apr 2024 16:13:46 +0200 Subject: [PATCH 14/90] fix --- src/Storages/StorageBuffer.cpp | 11 +++++++---- tests/clickhouse-test | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 5a2815a30f37..97a459a5e72e 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -830,12 +830,14 @@ bool StorageBuffer::checkThresholdsImpl(bool direct, size_t rows, size_t bytes, void StorageBuffer::flushAllBuffers(bool check_thresholds) { - ThreadPoolCallbackRunnerLocal runner(*flush_pool, "BufferFlush"); + std::optional> runner; + if (flush_pool) + runner.emplace(*flush_pool, "BufferFlush"); for (auto & buf : buffers) { - if (flush_pool) + if (runner) { - runner([&]() + (*runner)([&]() { flushBuffer(buf, check_thresholds, false); }); @@ -845,7 +847,8 @@ void StorageBuffer::flushAllBuffers(bool check_thresholds) flushBuffer(buf, check_thresholds, false); } } - runner.waitForAllToFinishAndRethrowFirstError(); + if (runner) + runner->waitForAllToFinishAndRethrowFirstError(); } diff --git a/tests/clickhouse-test b/tests/clickhouse-test index d1132a26ea8c..eee6eb0c18dd 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -908,7 +908,7 @@ class MergeTreeSettingsRandomizer: ), "cache_populated_by_fetch": lambda: random.randint(0, 1), "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100), - "old_parts_lifetime": threshold_generator(0.2, 0.3, 0, 8 * 60), + "old_parts_lifetime": threshold_generator(0.2, 0.3, 30, 8 * 60), } @staticmethod From 33c0ac5cc6e1befca39f54a2fccfaf8f6eb8ed0c Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 9 Apr 2024 15:50:15 +0200 Subject: [PATCH 15/90] Fix backup restore path for AzureBlobStorage --- src/Backups/BackupIO_AzureBlobStorage.cpp | 89 +++---------------- .../test.py | 71 +++++++++++++-- 2 files changed, 76 insertions(+), 84 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index b9b208e321cf..2eb5233bd1b3 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -50,44 +50,20 @@ BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; bool BackupReaderAzureBlobStorage::fileExists(const String & file_name) { - String key; - if (startsWith(file_name, ".")) - { - key= configuration.blob_path + file_name; - } - else - { - key = file_name; - } + String key = fs::path(configuration.blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name) { - String key; - if (startsWith(file_name, ".")) - { - key= configuration.blob_path + file_name; - } - else - { - key = file_name; - } + String key = fs::path(configuration.blob_path) / file_name; ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); return object_metadata.size_bytes; } std::unique_ptr BackupReaderAzureBlobStorage::readFile(const String & file_name) { - String key; - if (startsWith(file_name, ".")) - { - key= configuration.blob_path + file_name; - } - else - { - key = file_name; - } + String key = fs::path(configuration.blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -194,7 +170,7 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St client, client, configuration.container, - fs::path(source), + fs::path(configuration.blob_path)/ source, 0, size, /* dest_container */ configuration.container, @@ -207,7 +183,7 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) { - copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings, + copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, fs::path(configuration.blob_path) / path_in_backup, settings, threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWRAzure")); } @@ -215,29 +191,13 @@ BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; bool BackupWriterAzureBlobStorage::fileExists(const String & file_name) { - String key; - if (startsWith(file_name, ".")) - { - key= configuration.blob_path + file_name; - } - else - { - key = file_name; - } + String key = fs::path(configuration.blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) { - String key; - if (startsWith(file_name, ".")) - { - key= configuration.blob_path + file_name; - } - else - { - key = file_name; - } + String key = fs::path(configuration.blob_path) / file_name; RelativePathsWithMetadata children; object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) @@ -247,16 +207,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) { - String key; - if (startsWith(file_name, ".")) - { - key= configuration.blob_path + file_name; - } - else - { - key = file_name; - } - + String key = fs::path(configuration.blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -264,15 +215,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const String & file_name) { - String key; - if (startsWith(file_name, ".")) - { - key= configuration.blob_path + file_name; - } - else - { - key = file_name; - } + String key = fs::path(configuration.blob_path) / file_name; return std::make_unique( client, key, @@ -283,15 +226,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin void BackupWriterAzureBlobStorage::removeFile(const String & file_name) { - String key; - if (startsWith(file_name, ".")) - { - key= configuration.blob_path + file_name; - } - else - { - key = file_name; - } + String key = fs::path(configuration.blob_path) / file_name; StoredObject object(key); object_storage->removeObjectIfExists(object); } @@ -300,7 +235,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(file_name); + objects.emplace_back(fs::path(configuration.blob_path) / file_name); object_storage->removeObjectsIfExist(objects); @@ -310,7 +245,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(file_name); + objects.emplace_back(fs::path(configuration.blob_path) / file_name); object_storage->removeObjectsIfExist(objects); } diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index a7c7b4395604..09a7f12bea16 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -41,6 +41,38 @@ def generate_cluster_def(port): Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + + + + azure_blob_storage + http://azurite1:{port}/devstoreaccount1 + cont + false + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + 100000 + 100000 + 10 + 10 + + + local + / + + + + + +
+ blob_storage_disk +
+ + hdd + +
+
+
+
""" ) @@ -169,12 +201,12 @@ def test_backup_restore(cluster): print(get_azure_file_content("test_simple_write_c.csv", port)) assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n' - backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup.csv')" + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup')" azure_query( node, f"BACKUP TABLE test_simple_write_connection_string TO {backup_destination}", ) - print(get_azure_file_content("test_simple_write_c_backup.csv.backup", port)) + print(get_azure_file_content("test_simple_write_c_backup/.backup", port)) azure_query( node, f"RESTORE TABLE test_simple_write_connection_string AS test_simple_write_connection_string_restored FROM {backup_destination};", @@ -195,7 +227,7 @@ def test_backup_restore_diff_container(cluster): azure_query( node, f"INSERT INTO test_simple_write_connection_string_cont1 VALUES (1, 'a')" ) - backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont1', 'test_simple_write_c_backup_cont1.csv')" + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont1', 'test_simple_write_c_backup_cont1')" azure_query( node, f"BACKUP TABLE test_simple_write_connection_string_cont1 TO {backup_destination}", @@ -224,13 +256,13 @@ def test_backup_restore_with_named_collection_azure_conf1(cluster): assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n' backup_destination = ( - f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup.csv')" + f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup')" ) azure_query( node, f"BACKUP TABLE test_write_connection_string TO {backup_destination}", ) - print(get_azure_file_content("test_simple_write_nc_backup.csv.backup", port)) + print(get_azure_file_content("test_simple_write_nc_backup/.backup", port)) azure_query( node, f"RESTORE TABLE test_write_connection_string AS test_write_connection_string_restored FROM {backup_destination};", @@ -253,13 +285,13 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster): assert get_azure_file_content("test_simple_write_2.csv", port) == '1,"a"\n' backup_destination = ( - f"AzureBlobStorage(azure_conf2, 'test_simple_write_nc_backup_2.csv')" + f"AzureBlobStorage(azure_conf2, 'test_simple_write_nc_backup_2')" ) azure_query( node, f"BACKUP TABLE test_write_connection_string_2 TO {backup_destination}", ) - print(get_azure_file_content("test_simple_write_nc_backup_2.csv.backup", port)) + print(get_azure_file_content("test_simple_write_nc_backup_2/.backup", port)) azure_query( node, f"RESTORE TABLE test_write_connection_string_2 AS test_write_connection_string_restored_2 FROM {backup_destination};", @@ -268,3 +300,28 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster): azure_query(node, f"SELECT * from test_write_connection_string_restored_2") == "1\ta\n" ) + +def test_backup_restore_on_merge_tree(cluster): + node = cluster.instances["node"] + port = cluster.env_variables["AZURITE_PORT"] + azure_query( + node, + f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='blob_storage_policy'", + ) + azure_query( + node, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')" + ) + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_backup')" + azure_query( + node, + f"BACKUP TABLE test_simple_merge_tree TO {backup_destination}", + ) + azure_query( + node, + f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};", + ) + assert ( + azure_query(node, f"SELECT * from test_simple_merge_tree_restored") + == "1\ta\n" + ) From 950d1dfb86fc24250f4a172091861ded2a95155d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 9 Apr 2024 14:02:35 +0000 Subject: [PATCH 16/90] Automatic style fix --- .../test_backup_restore_azure_blob_storage/test.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 09a7f12bea16..b3e8b65b5dcf 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -255,9 +255,7 @@ def test_backup_restore_with_named_collection_azure_conf1(cluster): print(get_azure_file_content("test_simple_write.csv", port)) assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n' - backup_destination = ( - f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup')" - ) + backup_destination = f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup')" azure_query( node, f"BACKUP TABLE test_write_connection_string TO {backup_destination}", @@ -301,6 +299,7 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster): == "1\ta\n" ) + def test_backup_restore_on_merge_tree(cluster): node = cluster.instances["node"] port = cluster.env_variables["AZURITE_PORT"] @@ -308,9 +307,7 @@ def test_backup_restore_on_merge_tree(cluster): node, f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='blob_storage_policy'", ) - azure_query( - node, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')" - ) + azure_query(node, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')") backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_backup')" azure_query( @@ -322,6 +319,5 @@ def test_backup_restore_on_merge_tree(cluster): f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};", ) assert ( - azure_query(node, f"SELECT * from test_simple_merge_tree_restored") - == "1\ta\n" + azure_query(node, f"SELECT * from test_simple_merge_tree_restored") == "1\ta\n" ) From c5eda195750246669e833f8a9640b7afdcb7397e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 9 Apr 2024 14:33:06 +0000 Subject: [PATCH 17/90] impl --- .../Transforms/SquashingChunksTransform.cpp | 14 ++++++++++++++ .../00182_simple_squashing_transform_bug.reference | 2 ++ .../00182_simple_squashing_transform_bug.sql | 6 ++++++ 3 files changed, 22 insertions(+) create mode 100644 tests/queries/1_stateful/00182_simple_squashing_transform_bug.reference create mode 100644 tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 7de9538e435c..62c86a274535 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -64,8 +64,22 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) } else { + if (chunk.hasRows()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); + auto block = squashing.add({}); chunk.setColumns(block.getColumns(), block.rows()); + + /// ISimpleTransform keeps output chunk (result of transform() execution) for some time and push it in the output port within subsequent prepare() call. + /// Because of our custom prepare() implementation we have to take care of both places where data could be buffered: `output_data` and `squashing`. + if (output_data.chunk.hasRows()) + { + auto res = std::move(output_data.chunk); + output_data.chunk.clear(); + if (chunk.hasRows()) + res.append(chunk); + chunk = std::move(res); + } } } diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.reference b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.reference new file mode 100644 index 000000000000..9c49da1ab8ad --- /dev/null +++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.reference @@ -0,0 +1,2 @@ +17747796 +17747796 diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql new file mode 100644 index 000000000000..e73de4b33fb9 --- /dev/null +++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql @@ -0,0 +1,6 @@ +-- Tags: global + +set allow_prefetched_read_pool_for_remote_filesystem=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0, max_threads=2, max_block_size=65387; + +SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); +SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); From eb9ed4161c7ea732611d579e2c99176e117244e4 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 9 Apr 2024 15:41:15 +0000 Subject: [PATCH 18/90] fix style --- src/Processors/Transforms/SquashingChunksTransform.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 62c86a274535..67cf22c7d4de 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -3,6 +3,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + SquashingChunksTransform::SquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) From 9d4f1d890eea467706b0272e987a5896f2c795d1 Mon Sep 17 00:00:00 2001 From: Joshua Hildred Date: Tue, 2 Apr 2024 05:24:16 -0700 Subject: [PATCH 19/90] Add an optimization that removes redundant equality checks on boolean functions. This fixes a bug in which the primary index is not used for queries like SELECT * FROM WHERE in () = 1 --- .../Passes/LogicalExpressionOptimizerPass.cpp | 76 +++++++++++++++++ .../Passes/LogicalExpressionOptimizerPass.h | 12 +++ .../03032_redundant_equals.reference | 23 +++++ .../0_stateless/03032_redundant_equals.sql | 83 +++++++++++++++++++ 4 files changed, 194 insertions(+) create mode 100644 tests/queries/0_stateless/03032_redundant_equals.reference create mode 100644 tests/queries/0_stateless/03032_redundant_equals.sql diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 5f08bb9035e6..546959c4d9c0 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -19,6 +19,19 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +using namespace std::literals; +static constexpr std::array boolean_functions{ + "equals"sv, "notEquals"sv, "less"sv, "greaterOrEquals"sv, "greater"sv, "lessOrEquals"sv, "in"sv, "notIn"sv, + "globalIn"sv, "globalNotIn"sv, "nullIn"sv, "notNullIn"sv, "globalNullIn"sv, "globalNullNotIn"sv, "isNull"sv, "isNotNull"sv, + "like"sv, "notLike"sv, "ilike"sv, "notILike"sv, "empty"sv, "notEmpty"sv, "not"sv, "and"sv, + "or"sv}; + +static bool isBooleanFunction(const String & func_name) +{ + return std::any_of( + boolean_functions.begin(), boolean_functions.end(), [&](const auto boolean_func) { return func_name == boolean_func; }); +} + /// Visitor that optimizes logical expressions _only_ in JOIN ON section class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext { @@ -253,6 +266,12 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont tryOptimizeAndEqualsNotEqualsChain(node); return; } + + if (function_node->getFunctionName() == "equals") + { + tryOptimizeOutRedundantEquals(node); + return; + } } private: @@ -552,6 +571,63 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont function_node.getArguments().getNodes() = std::move(or_operands); function_node.resolveAsFunction(or_function_resolver); } + + void tryOptimizeOutRedundantEquals(QueryTreeNodePtr & node) + { + auto & function_node = node->as(); + assert(function_node.getFunctionName() == "equals"); + + bool lhs_const; + bool maybe_invert; + + const ConstantNode * constant; + const FunctionNode * child_function; + + const auto function_arguments = function_node.getArguments().getNodes(); + if (function_arguments.size() != 2) + return; + + const auto & lhs = function_arguments[0]; + const auto & rhs = function_arguments[1]; + + if ((constant = lhs->as())) + lhs_const = true; + else if ((constant = rhs->as())) + lhs_const = false; + else + return; + + UInt64 val; + if (!constant->getValue().tryGet(val)) + return; + + if (val == 1) + maybe_invert = false; + else if (val == 0) + maybe_invert = true; + else + return; + + if (lhs_const) + child_function = rhs->as(); + else + child_function = lhs->as(); + + if (!child_function || !isBooleanFunction(child_function->getFunctionName())) + return; + if (maybe_invert) + { + auto not_resolver = FunctionFactory::instance().get("not", getContext()); + const auto not_node = std::make_shared("not"); + auto & arguments = not_node->getArguments().getNodes(); + arguments.reserve(1); + arguments.push_back(lhs_const ? rhs : lhs); + not_node->resolveAsFunction(not_resolver->build(not_node->getArgumentColumns())); + node = not_node; + } + else + node = lhs_const ? rhs : lhs; + } }; void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h index 7f8853232696..e3d9cf8a370a 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h @@ -96,6 +96,18 @@ namespace DB * * SELECT * FROM t1 JOIN t2 ON a <=> b * ------------------------------- + * + * 7. Remove redundant equality checks on boolean functions. + * - these requndant checks cause the primary index to not be used when if the query involves any primary key columns + * ------------------------------- + * SELECT * FROM t1 WHERE a IN (n) = 1 + * SELECT * FROM t1 WHERE a IN (n) = 0 + * + * will be transformed into + * + * SELECT * FROM t1 WHERE a IN (n) + * SELECT * FROM t1 WHERE NOT a IN (n) + * ------------------------------- */ class LogicalExpressionOptimizerPass final : public IQueryTreePass diff --git a/tests/queries/0_stateless/03032_redundant_equals.reference b/tests/queries/0_stateless/03032_redundant_equals.reference new file mode 100644 index 000000000000..d477c98b6048 --- /dev/null +++ b/tests/queries/0_stateless/03032_redundant_equals.reference @@ -0,0 +1,23 @@ +100 +100 +100 +100 +100 +100 +0 +0 +0 +1 +100 +101 +100 +101 +100 +101 +100 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql new file mode 100644 index 000000000000..afb9c8878661 --- /dev/null +++ b/tests/queries/0_stateless/03032_redundant_equals.sql @@ -0,0 +1,83 @@ +DROP TABLE IF EXISTS test_table; + +CREATE TABLE test_table +( + k UInt64, +) +ENGINE = MergeTree +ORDER BY k; + +INSERT INTO test_table SELECT number FROM numbers(10000000); + +SELECT * FROM test_table WHERE k in (100) = 1; +SELECT * FROM test_table WHERE k = (100) = 1; +SELECT * FROM test_table WHERE k not in (100) = 0; +SELECT * FROM test_table WHERE k != (100) = 0; +SELECT * FROM test_table WHERE 1 = (k = 100); +SELECT * FROM test_table WHERE 0 = (k not in (100)); +SELECT * FROM test_table WHERE k < 1 = 1; +SELECT * FROM test_table WHERE k >= 1 = 0; +SELECT * FROM test_table WHERE k > 1 = 0; +SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1; +SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1))) = 0; +SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1; +SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1; +SELECT * FROM test_table WHERE ((k not in (99) = 1) AND (k in (100) = 1)) = 1; + +SELECT count() +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM test_table WHERE k in (100) = 1 +) +WHERE + explain LIKE '%Granules: 1/%'; + +SELECT count() +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM test_table WHERE k >= 1 = 0 +) +WHERE + explain LIKE '%Granules: 1/%'; + +SELECT count() +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM test_table WHERE k not in (100) = 0 +) +WHERE + explain LIKE '%Granules: 1/%'; + +SELECT count() +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM test_table WHERE k > 1 = 0 +) +WHERE + explain LIKE '%Granules: 1/%'; + +SELECT count() +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1))) = 0 +) +WHERE + explain LIKE '%Granules: 1/%'; + + +SELECT count() +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1 +) +WHERE + explain LIKE '%Granules: 1/%'; + + +DROP TABLE test_table; From 626b7b12538fb1bb938620710718d9c6273fb44a Mon Sep 17 00:00:00 2001 From: Joshua Hildred Date: Tue, 9 Apr 2024 14:12:13 -0700 Subject: [PATCH 20/90] Fix style --- .../Passes/LogicalExpressionOptimizerPass.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h index e3d9cf8a370a..5f109993f3f1 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h @@ -98,15 +98,15 @@ namespace DB * ------------------------------- * * 7. Remove redundant equality checks on boolean functions. - * - these requndant checks cause the primary index to not be used when if the query involves any primary key columns + * - these requndant checks cause the primary index to not be used when if the query involves any primary key columns * ------------------------------- - * SELECT * FROM t1 WHERE a IN (n) = 1 + * SELECT * FROM t1 WHERE a IN (n) = 1 * SELECT * FROM t1 WHERE a IN (n) = 0 - * - * will be transformed into - * - * SELECT * FROM t1 WHERE a IN (n) - * SELECT * FROM t1 WHERE NOT a IN (n) + * + * will be transformed into + * + * SELECT * FROM t1 WHERE a IN (n) + * SELECT * FROM t1 WHERE NOT a IN (n) * ------------------------------- */ From dedc25fd341abacd6c9d8719aabb8feb1e824518 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 9 Apr 2024 23:43:40 +0200 Subject: [PATCH 21/90] fix --- src/Client/Connection.cpp | 16 ++++++++++------ src/Client/Connection.h | 3 ++- src/Client/IServerConnection.h | 2 +- src/Client/LocalConnection.h | 2 +- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index e5ac7ad66b9d..f791a77a261d 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -214,7 +214,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) DNSResolver::instance().removeHostFromCache(host); /// Add server address to exception. Exception will preserve stack trace. - e.addMessage("({})", getDescription()); + e.addMessage("({})", getDescription(/*with_extra*/ true)); throw; } catch (Poco::Net::NetException & e) @@ -225,7 +225,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) DNSResolver::instance().removeHostFromCache(host); /// Add server address to exception. Also Exception will remember new stack trace. It's a pity that more precise exception type is lost. - throw NetException(ErrorCodes::NETWORK_ERROR, "{} ({})", e.displayText(), getDescription()); + throw NetException(ErrorCodes::NETWORK_ERROR, "{} ({})", e.displayText(), getDescription(/*with_extra*/ true)); } catch (Poco::TimeoutException & e) { @@ -241,7 +241,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) ErrorCodes::SOCKET_TIMEOUT, "{} ({}, connection timeout {} ms)", e.displayText(), - getDescription(), + getDescription(/*with_extra*/ true), connection_timeout.totalMilliseconds()); } } @@ -473,8 +473,10 @@ const String & Connection::getDefaultDatabase() const return default_database; } -const String & Connection::getDescription() const +const String & Connection::getDescription(bool with_extra) const { + if (with_extra) + return full_description; return description; } @@ -1227,10 +1229,12 @@ void Connection::setDescription() description += ", " + ip_address; } + full_description = description; + if (const auto * socket_ = getSocket()) { - description += ", local address: "; - description += socket_->address().toString(); + full_description += ", local address: "; + full_description += socket_->address().toString(); } } diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 5d0411027a1c..20c66caa7448 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -89,7 +89,7 @@ class Connection : public IServerConnection const String & getServerDisplayName(const ConnectionTimeouts & timeouts) override; /// For log and exception messages. - const String & getDescription() const override; + const String & getDescription(bool with_extra = false) const override; const String & getHost() const; UInt16 getPort() const; const String & getDefaultDatabase() const; @@ -187,6 +187,7 @@ class Connection : public IServerConnection /// For messages in log and in exceptions. String description; + String full_description; void setDescription(); /// Returns resolved address if it was resolved. diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index a0c029c79fb0..724afa95d7a6 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -88,7 +88,7 @@ class IServerConnection : boost::noncopyable virtual const String & getServerTimezone(const ConnectionTimeouts & timeouts) = 0; virtual const String & getServerDisplayName(const ConnectionTimeouts & timeouts) = 0; - virtual const String & getDescription() const = 0; + virtual const String & getDescription(bool with_extra = false) const = 0; virtual std::vector> getPasswordComplexityRules() const = 0; diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 9c2d0a81d8d5..6218fbe341f4 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -90,7 +90,7 @@ class LocalConnection : public IServerConnection, WithContext const String & getServerTimezone(const ConnectionTimeouts & timeouts) override; const String & getServerDisplayName(const ConnectionTimeouts & timeouts) override; - const String & getDescription() const override { return description; } + const String & getDescription([[maybe_unused]] bool with_extra = false) const override { return description; } std::vector> getPasswordComplexityRules() const override { return {}; } From 52635d2b8fb35bf8c2b69822ca47c9b672d7d8e4 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 9 Apr 2024 17:03:15 +0000 Subject: [PATCH 22/90] add profile events for azure disk --- src/Common/ProfileEvents.cpp | 24 ++++-- .../IO/ReadBufferFromAzureBlobStorage.cpp | 26 +++++- .../AzureBlobStorage/AzureObjectStorage.cpp | 80 ++++++++++++------- .../AzureBlobStorage/AzureObjectStorage.h | 3 + .../ObjectStorages/S3/S3ObjectStorage.cpp | 3 + 5 files changed, 97 insertions(+), 39 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 33ccb4e9f025..23eed53509ee 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -403,13 +403,6 @@ The server successfully detected this situation and will download merged part fr M(S3PutObject, "Number of S3 API PutObject calls.") \ M(S3GetObject, "Number of S3 API GetObject calls.") \ \ - M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \ - M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ - M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ - M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ - M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ - M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ - \ M(DiskS3DeleteObjects, "Number of DiskS3 API DeleteObject(s) calls.") \ M(DiskS3CopyObject, "Number of DiskS3 API CopyObject calls.") \ M(DiskS3ListObjects, "Number of DiskS3 API ListObjects calls.") \ @@ -441,6 +434,23 @@ The server successfully detected this situation and will download merged part fr M(WriteBufferFromS3WaitInflightLimitMicroseconds, "Time spent on waiting while some of the current requests are done when its number reached the limit defined by s3_max_inflight_parts_for_one_file.") \ M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ \ + M(AzureGetObject, "Number of Azure API GetObject calls.") \ + M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \ + M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ + M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ + M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ + \ + M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \ + M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ + M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ + M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \ + M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ + \ + M(ReadBufferFromAzureMicroseconds, "Time spent on reading from Azure.") \ + M(ReadBufferFromAzureInitMicroseconds, "Time spent initializing connection to Azure.") \ + M(ReadBufferFromAzureBytes, "Bytes read from Azure.") \ + M(ReadBufferFromAzureRequestsErrors, "Number of exceptions while reading from Azure") \ + \ M(CachedReadBufferReadFromCacheHits, "Number of times the read from filesystem cache hit the cache.") \ M(CachedReadBufferReadFromCacheMisses, "Number of times the read from filesystem cache miss the cache.") \ M(CachedReadBufferReadFromSourceMicroseconds, "Time reading from filesystem cache source (from remote filesystem, etc)") \ diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index 5947b742339e..48b40f8f8c68 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -3,6 +3,7 @@ #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include @@ -14,6 +15,12 @@ namespace ProfileEvents { extern const Event RemoteReadThrottlerBytes; extern const Event RemoteReadThrottlerSleepMicroseconds; + extern const Event ReadBufferFromAzureMicroseconds; + extern const Event ReadBufferFromAzureBytes; + extern const Event ReadBufferFromAzureRequestsErrors; + extern const Event AzureGetObject; + extern const Event DiskAzureGetObject; + extern const Event ReadBufferFromAzureInitMicroseconds; } namespace DB @@ -67,7 +74,6 @@ void ReadBufferFromAzureBlobStorage::setReadUntilEnd() initialized = false; } } - } void ReadBufferFromAzureBlobStorage::setReadUntilPosition(size_t position) @@ -103,7 +109,9 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() auto handle_exception = [&, this](const auto & e, size_t i) { + ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureRequestsErrors); LOG_DEBUG(log, "Exception caught during Azure Read for file {} at attempt {}/{}: {}", path, i + 1, max_single_read_retries, e.Message); + if (i + 1 == max_single_read_retries) throw; @@ -115,6 +123,7 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() for (size_t i = 0; i < max_single_read_retries; ++i) { + ProfileEventTimeIncrement watch(ProfileEvents::ReadBufferFromAzureMicroseconds); try { bytes_read = data_stream->ReadToCount(reinterpret_cast(data_ptr), to_read_bytes); @@ -131,6 +140,7 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() if (bytes_read == 0) return false; + ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureBytes, bytes_read); BufferBase::set(data_ptr, bytes_read, 0); offset += bytes_read; @@ -215,7 +225,9 @@ void ReadBufferFromAzureBlobStorage::initialize() auto handle_exception = [&, this](const auto & e, size_t i) { + ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureRequestsErrors); LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message); + if (i + 1 == max_single_download_retries) throw; @@ -225,8 +237,14 @@ void ReadBufferFromAzureBlobStorage::initialize() for (size_t i = 0; i < max_single_download_retries; ++i) { + ProfileEventTimeIncrement watch(ProfileEvents::ReadBufferFromAzureInitMicroseconds); + try { + ProfileEvents::increment(ProfileEvents::AzureGetObject); + if (read_settings.for_object_storage) + ProfileEvents::increment(ProfileEvents::DiskAzureGetObject); + auto download_response = blob_client->Download(download_options); data_stream = std::move(download_response.Value.BodyStream); break; @@ -266,6 +284,8 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran for (size_t i = 0; i < max_single_download_retries && n > 0; ++i) { size_t bytes_copied = 0; + ProfileEventTimeIncrement watch(ProfileEvents::ReadBufferFromAzureMicroseconds); + try { Azure::Storage::Blobs::DownloadBlobOptions download_options; @@ -282,7 +302,9 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran } catch (const Azure::Core::RequestFailedException & e) { + ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureRequestsErrors); LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message); + if (i + 1 == max_single_download_retries) throw; @@ -290,6 +312,8 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran sleep_time_with_backoff_milliseconds *= 2; } + ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureBytes, bytes_copied); + range_begin += bytes_copied; to += bytes_copied; n -= bytes_copied; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index e0614613c3fb..fb3a35301c0b 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -22,6 +22,14 @@ namespace CurrentMetrics extern const Metric ObjectStorageAzureThreadsScheduled; } +namespace ProfileEvents +{ + extern const Event AzureListObjects; + extern const Event DiskAzureListObjects; + extern const Event AzureDeleteObjects; + extern const Event DiskAzureDeleteObjects; +} + namespace DB { @@ -58,6 +66,9 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync private: bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override { + ProfileEvents::increment(ProfileEvents::AzureListObjects); + ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); + batch.clear(); auto outcome = client->ListBlobs(options); auto blob_list_response = client->ListBlobs(options); @@ -116,6 +127,9 @@ bool AzureObjectStorage::exists(const StoredObject & object) const options.Prefix = object.remote_path; options.PageSizeHint = 1; + ProfileEvents::increment(ProfileEvents::AzureListObjects); + ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); + auto blobs_list_response = client_ptr->ListBlobs(options); auto blobs_list = blobs_list_response.Blobs; @@ -147,10 +161,14 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith options.PageSizeHint = max_keys; else options.PageSizeHint = settings.get()->list_object_keys_size; + Azure::Storage::Blobs::ListBlobsPagedResponse blob_list_response; while (true) { + ProfileEvents::increment(ProfileEvents::AzureListObjects); + ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); + blob_list_response = client_ptr->ListBlobs(options); auto blobs_list = blob_list_response.Blobs; @@ -270,59 +288,59 @@ std::unique_ptr AzureObjectStorage::writeObject( /// NO settings.get()); } -/// Remove file. Throws exception if file doesn't exists or it's a directory. -void AzureObjectStorage::removeObject(const StoredObject & object) +void AzureObjectStorage::removeObjectImpl(const StoredObject & object, const SharedAzureClientPtr & client_ptr, bool if_exists) { + ProfileEvents::increment(ProfileEvents::AzureDeleteObjects); + ProfileEvents::increment(ProfileEvents::DiskAzureDeleteObjects); + const auto & path = object.remote_path; LOG_TEST(log, "Removing single object: {}", path); - auto client_ptr = client.get(); - auto delete_info = client_ptr->DeleteBlob(path); - if (!delete_info.Value.Deleted) - throw Exception( - ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}", - path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown"); -} -void AzureObjectStorage::removeObjects(const StoredObjects & objects) -{ - auto client_ptr = client.get(); - for (const auto & object : objects) + try { - LOG_TEST(log, "Removing object: {} (total: {})", object.remote_path, objects.size()); - auto delete_info = client_ptr->DeleteBlob(object.remote_path); - if (!delete_info.Value.Deleted) + auto delete_info = client_ptr->DeleteBlob(path); + if (!if_exists && !delete_info.Value.Deleted) throw Exception( ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}", - object.remote_path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown"); - } -} - -void AzureObjectStorage::removeObjectIfExists(const StoredObject & object) -{ - auto client_ptr = client.get(); - try - { - LOG_TEST(log, "Removing single object: {}", object.remote_path); - auto delete_info = client_ptr->DeleteBlob(object.remote_path); + path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown"); } catch (const Azure::Storage::StorageException & e) { + if (!if_exists) + throw; + /// If object doesn't exist... if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) return; + tryLogCurrentException(__PRETTY_FUNCTION__); throw; } } -void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects) +/// Remove file. Throws exception if file doesn't exists or it's a directory. +void AzureObjectStorage::removeObject(const StoredObject & object) +{ + removeObjectImpl(object, client.get(), false); +} + +void AzureObjectStorage::removeObjects(const StoredObjects & objects) { auto client_ptr = client.get(); for (const auto & object : objects) - { - removeObjectIfExists(object); - } + removeObjectImpl(object, client_ptr, false); +} +void AzureObjectStorage::removeObjectIfExists(const StoredObject & object) +{ + removeObjectImpl(object, client.get(), true); +} + +void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects) +{ + auto client_ptr = client.get(); + for (const auto & object : objects) + removeObjectImpl(object, client_ptr, true); } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index b05fc7afc96c..f52ab803012b 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -164,6 +164,9 @@ class AzureObjectStorage : public IObjectStorage } private: + using SharedAzureClientPtr = std::shared_ptr; + void removeObjectImpl(const StoredObject & object, const SharedAzureClientPtr & client_ptr, bool if_exists); + const String name; /// client used to access the files in the Blob Storage cloud MultiVersion client; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index b343b73f7bd6..c4737f1a5ae4 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -120,6 +120,7 @@ class S3IteratorAsync final : public IObjectStorageIteratorAsync bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override { ProfileEvents::increment(ProfileEvents::S3ListObjects); + ProfileEvents::increment(ProfileEvents::DiskS3ListObjects); bool result = false; auto outcome = client->ListObjectsV2(request); @@ -292,6 +293,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet { ProfileEvents::increment(ProfileEvents::S3ListObjects); ProfileEvents::increment(ProfileEvents::DiskS3ListObjects); + outcome = client.get()->ListObjectsV2(request); throwIfError(outcome); @@ -325,6 +327,7 @@ void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exis { ProfileEvents::increment(ProfileEvents::S3DeleteObjects); ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects); + S3::DeleteObjectRequest request; request.SetBucket(uri.bucket); request.SetKey(object.remote_path); From aba3bbaeb63d31bdfef02bfd0d734dc6f35a9409 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 4 Apr 2024 21:41:32 +0200 Subject: [PATCH 23/90] Replace Tcl version with Python --- ...1676_clickhouse_client_autocomplete.python | 121 +++++++++++++++ .../01676_clickhouse_client_autocomplete.sh | 138 +----------------- 2 files changed, 123 insertions(+), 136 deletions(-) create mode 100644 tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python new file mode 100644 index 000000000000..02198eb77c33 --- /dev/null +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -0,0 +1,121 @@ +import pty +import os +import shlex +import time +import multiprocessing + +COMPLETION_TIMEOUT_SECONDS = 10 + + +def run_with_timeout(func, args, timeout): + process = multiprocessing.Process(target=func, args=args) + process.start() + process.join(timeout) + + if process.is_alive(): + process.terminate() + print("Timeout") + + +def test_completion(program, argv, comp_word): + comp_begin = comp_word[:-3] + + shell_pid, master = pty.fork() + if shell_pid == 0: + os.execv(program, argv) + else: + try: + output = os.read(master, 4096).decode() + while not ":)" in output: + output += os.read(master, 4096).decode() + + os.write(master, b"SET " + bytes(comp_begin.encode())) + output = os.read(master, 4096).decode() + while not comp_begin in output: + output += os.read(master, 4096).decode() + + time.sleep(0.15) + os.write(master, b"\t") + + output = os.read(master, 4096).decode() + # fail fast if there is a bell character in the output, + # meaning no concise completion is found + if "\x07" in output: + print(f"{comp_word}: FAIL") + return + + while not comp_word in output: + output += os.read(master, 4096).decode() + + print(f"{comp_word}: OK") + finally: + os.close(master) + + +client_compwords_positive = [ + # system.functions + "concatAssumeInjective", + # system.table_engines + "ReplacingMergeTree", + # system.formats + "JSONEachRow", + # system.table_functions + "clusterAllReplicas", + # system.data_type_families + "SimpleAggregateFunction", + # system.settings + "max_concurrent_queries_for_all_users", + # system.clusters + "test_shard_localhost", + # system.macros + "default_path_test", + # system.storage_policies, egh not uniq + "default", + # system.aggregate_function_combinators + "uniqCombined64ForEach", + # FIXME: one may add separate case for suggestion_limit + # system.databases + "system", + # system.tables + "aggregate_function_combinators", + # system.columns + "primary_key_bytes_in_memory_allocated", + # system.dictionaries + # FIXME: none + "definitely_broken_and_should_fail", +] + +local_compwords_positive = [ + # system.functions + "concatAssumeInjective", + # system.table_engines + "ReplacingMergeTree", + # system.formats + "JSONEachRow", + # system.table_functions + "clusterAllReplicas", + # system.data_type_families + "SimpleAggregateFunction", +] + + +if __name__ == "__main__": + print("# clickhouse-client") + clickhouse_client = os.environ["CLICKHOUSE_CLIENT"] + args = shlex.split(clickhouse_client) + [ + run_with_timeout( + test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS + ) + for comp_word in client_compwords_positive + ] + + print("# clickhouse-local") + clickhouse_local = os.environ["CLICKHOUSE_LOCAL"] + args = shlex.split(clickhouse_local) + [ + run_with_timeout( + test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS + ) + for comp_word in local_compwords_positive + ] diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index ebd6490077e4..88f18a5bb018 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -1,142 +1,8 @@ #!/usr/bin/env bash -# Tags: long, no-ubsan +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -SCRIPT_PATH="$CURDIR/$CLICKHOUSE_TEST_UNIQUE_NAME.generated-expect" - -# NOTE: database = $CLICKHOUSE_DATABASE is superfluous - -function test_completion_word() -{ - local w=$1 && shift - - local w_len=${#w} - local compword_begin=${w:0:$((w_len-3))} - local compword_end=${w:$((w_len-3))} - - # NOTE: - # - here and below you should escape variables of the expect. - # - you should not use "expect <<..." since in this case timeout/eof will - # not work (I guess due to attached stdin) - - # TODO: get build sanitizer and debug/release info to dynamically change test - # like here timeout 120 seconds is too big for release build - # but ok for sanitizer builds - cat > "$SCRIPT_PATH" << EOF -# NOTE: log will be appended -exp_internal -f $CLICKHOUSE_TMP/$(basename "${BASH_SOURCE[0]}").debuglog 0 - -# NOTE: when expect have EOF on stdin it also closes stdout, so let's reopen it -# again for logging -set stdout_channel [open "/dev/stdout" w] - -log_user 0 -set timeout 120 -match_max 100000 -expect_after { - # Do not ignore eof from expect - -i \$any_spawn_id eof { exp_continue } - # A default timeout action is to do nothing, change it to fail - -i \$any_spawn_id timeout { exit 1 } -} - -spawn bash -c "$*" -expect ":) " - -# Make a query -send -- "SET $compword_begin" -expect "SET $compword_begin" - -# Wait for suggestions to load, they are loaded in background -set is_done 0 -set timeout 1 -while {\$is_done == 0} { - send -- "\\t" - expect { - "$compword_begin$compword_end" { - puts \$stdout_channel "$compword_begin$compword_end: OK" - set is_done 1 - } - default { - sleep 1 - } - } -} - -close \$stdout_channel - -send -- "\\3\\4" -expect eof -EOF - - # NOTE: run expect under timeout since there is while loop that is not - # limited with timeout. - # - # NOTE: cat is required to serialize stdout for expect (without this pipe - # it will reopen the file again, and the output will be mixed). - timeout 2m expect -f "$SCRIPT_PATH" | cat -} - -# last 3 bytes will be completed, -# so take this in mind when you will update the list. -client_compwords_positive=( - # system.functions - concatAssumeInjective - # system.table_engines - ReplacingMergeTree - # system.formats - JSONEachRow - # system.table_functions - clusterAllReplicas - # system.data_type_families - SimpleAggregateFunction - # system.settings - max_concurrent_queries_for_all_users - # system.clusters - test_shard_localhost - # system.macros - default_path_test - # system.storage_policies, egh not uniq - default - # system.aggregate_function_combinators - uniqCombined64ForEach - - # FIXME: one may add separate case for suggestion_limit - # system.databases - system - # system.tables - aggregate_function_combinators - # system.columns - primary_key_bytes_in_memory_allocated - # system.dictionaries - # FIXME: none -) - -local_compwords_positive=( - # system.functions - concatAssumeInjective - # system.table_engines - ReplacingMergeTree - # system.formats - JSONEachRow - # system.table_functions - clusterAllReplicas - # system.data_type_families - SimpleAggregateFunction -) - -echo "# clickhouse-client" -for w in "${client_compwords_positive[@]}"; do - test_completion_word "$w" "$CLICKHOUSE_CLIENT" -done -echo "# clickhouse-local" -for w in "${local_compwords_positive[@]}"; do - test_completion_word "$w" "$CLICKHOUSE_LOCAL" -done - -rm -f "${SCRIPT_PATH:?}" - -exit 0 +python3 "$CURDIR"/01676_clickhouse_client_autocomplete.python From c8598bdb5448af64bf61aab78c89096dfa4a42ec Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Mon, 8 Apr 2024 20:54:28 +0200 Subject: [PATCH 24/90] Add a flag to load suggestion data synchronously --- programs/client/Client.cpp | 1 + src/Client/ClientBase.cpp | 7 +++++-- src/Client/ClientBase.h | 1 + src/Client/Suggest.cpp | 9 ++++++--- src/Client/Suggest.h | 2 +- .../01676_clickhouse_client_autocomplete.python | 4 +++- 6 files changed, 17 insertions(+), 7 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 72cad1dac076..e27a4f0f529b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -482,6 +482,7 @@ void Client::connect() server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch); load_suggestions = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION) && !config().getBool("disable_suggestion", false); + wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false); if (server_display_name = connection->getServerDisplayName(connection_parameters.timeouts); server_display_name.empty()) server_display_name = config().getString("host", "localhost"); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index db910de07f31..7ad8383d4605 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2474,9 +2474,9 @@ void ClientBase::runInteractive() { /// Load suggestion data from the server. if (global_context->getApplicationType() == Context::ApplicationType::CLIENT) - suggest->load(global_context, connection_parameters, config().getInt("suggestion_limit")); + suggest->load(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load); else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL) - suggest->load(global_context, connection_parameters, config().getInt("suggestion_limit")); + suggest->load(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load); } if (home_path.empty()) @@ -2972,6 +2972,7 @@ void ClientBase::init(int argc, char ** argv) ("progress", po::value()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::DEFAULT, "default"), "Print progress of queries execution - to TTY: tty|on|1|true|yes; to STDERR non-interactive mode: err; OFF: off|0|false|no; DEFAULT - interactive to TTY, non-interactive is off") ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.") + ("wait_for_suggestions_to_load", "Load suggestion data synchonously.") ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)") ("echo", "in batch mode, print query before execution") @@ -3101,6 +3102,8 @@ void ClientBase::init(int argc, char ** argv) config().setBool("echo", true); if (options.count("disable_suggestion")) config().setBool("disable_suggestion", true); + if (options.count("wait_for_suggestions_to_load")) + config().setBool("wait_for_suggestions_to_load", true); if (options.count("suggestion_limit")) config().setInt("suggestion_limit", options["suggestion_limit"].as()); if (options.count("highlight")) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 9ec87ababfc9..dc5c65530462 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -209,6 +209,7 @@ class ClientBase : public Poco::Util::Application, public IHints<2> std::optional suggest; bool load_suggestions = false; + bool wait_for_suggestions_to_load = false; std::vector queries; /// Queries passed via '--query' std::vector queries_files; /// If not empty, queries will be read from these files diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 03df582de10a..f63dbc64d271 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -110,7 +110,7 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti } template -void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit) +void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit, bool wait_for_load) { loading_thread = std::thread([my_context = Context::createCopy(context), connection_parameters, suggestion_limit, this] { @@ -152,6 +152,9 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p /// Note that keyword suggestions are available even if we cannot load data from server. }); + + if (wait_for_load) + loading_thread.join(); } void Suggest::load(IServerConnection & connection, @@ -228,8 +231,8 @@ void Suggest::fillWordsFromBlock(const Block & block) } template -void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit); +void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit, bool wait_for_load); template -void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit); +void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit, bool wait_for_load); } diff --git a/src/Client/Suggest.h b/src/Client/Suggest.h index 5cecdc4501b0..aac8a73f7020 100644 --- a/src/Client/Suggest.h +++ b/src/Client/Suggest.h @@ -27,7 +27,7 @@ class Suggest : public LineReader::Suggest, boost::noncopyable /// Load suggestions for clickhouse-client. template - void load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit); + void load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit, bool wait_for_load); void load(IServerConnection & connection, const ConnectionTimeouts & timeouts, diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index 02198eb77c33..5433a8d4199e 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -34,7 +34,7 @@ def test_completion(program, argv, comp_word): while not comp_begin in output: output += os.read(master, 4096).decode() - time.sleep(0.15) + time.sleep(0.25) os.write(master, b"\t") output = os.read(master, 4096).decode() @@ -103,6 +103,7 @@ if __name__ == "__main__": print("# clickhouse-client") clickhouse_client = os.environ["CLICKHOUSE_CLIENT"] args = shlex.split(clickhouse_client) + args.append("--wait_for_suggestions_to_load") [ run_with_timeout( test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS @@ -112,6 +113,7 @@ if __name__ == "__main__": print("# clickhouse-local") clickhouse_local = os.environ["CLICKHOUSE_LOCAL"] + args.append("--wait_for_suggestions_to_load") args = shlex.split(clickhouse_local) [ run_with_timeout( From afb52b6369e94b1143d83fc8fc41575eb3289b10 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Mon, 8 Apr 2024 23:58:53 +0200 Subject: [PATCH 25/90] Undo breaking statement --- .../0_stateless/01676_clickhouse_client_autocomplete.python | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index 5433a8d4199e..b4380dc71473 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -82,7 +82,6 @@ client_compwords_positive = [ "primary_key_bytes_in_memory_allocated", # system.dictionaries # FIXME: none - "definitely_broken_and_should_fail", ] local_compwords_positive = [ From 414b0289310f8830b26f5f59bd74cc60c3b2d8bb Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 9 Apr 2024 21:51:27 +0200 Subject: [PATCH 26/90] Support synchronous completions in local server too --- programs/local/LocalServer.cpp | 3 +++ .../0_stateless/01676_clickhouse_client_autocomplete.python | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 72920fbd8559..8f5afdb90223 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -560,6 +560,7 @@ void LocalServer::processConfig() const std::string clickhouse_dialect{"clickhouse"}; load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false) && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect; + wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false); auto logging = (config().has("logger.console") || config().has("logger.level") @@ -835,6 +836,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp config().setString("logger.level", options["logger.level"].as()); if (options.count("send_logs_level")) config().setString("send_logs_level", options["send_logs_level"].as()); + if (options.count("wait_for_suggestions_to_load")) + config().setBool("wait_for_suggestions_to_load", true); } void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector &, std::vector &) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index b4380dc71473..e62c35cd17d3 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -112,8 +112,8 @@ if __name__ == "__main__": print("# clickhouse-local") clickhouse_local = os.environ["CLICKHOUSE_LOCAL"] - args.append("--wait_for_suggestions_to_load") args = shlex.split(clickhouse_local) + args.append("--wait_for_suggestions_to_load") [ run_with_timeout( test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS From fb0c28a5b31154903d43c3ef48033cb9c29509a2 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 10 Apr 2024 09:18:23 +0200 Subject: [PATCH 27/90] Add debug logging --- ...1676_clickhouse_client_autocomplete.python | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index e62c35cd17d3..9f0354ff961e 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -4,8 +4,8 @@ import shlex import time import multiprocessing -COMPLETION_TIMEOUT_SECONDS = 10 - +COMPLETION_TIMEOUT_SECONDS = 30 +DEBUG_LOG = os.path.join(os.environ["CLICKHOUSE_TMP"], os.path.basename(os.path.abspath(__file__)) + ".debuglog") def run_with_timeout(func, args, timeout): process = multiprocessing.Process(target=func, args=args) @@ -25,19 +25,31 @@ def test_completion(program, argv, comp_word): os.execv(program, argv) else: try: - output = os.read(master, 4096).decode() + debug_log_fd = open(DEBUG_LOG, "a") + + output_b = os.read(master, 4096) + output = output_b.decode() + debug_log_fd.write(repr(output_b) + "\n") while not ":)" in output: - output += os.read(master, 4096).decode() + output_b = os.read(master, 4096) + output += output_b.decode() + debug_log_fd.write(repr(output_b) + "\n") os.write(master, b"SET " + bytes(comp_begin.encode())) - output = os.read(master, 4096).decode() + output_b = os.read(master, 4096) + output = output_b.decode() + debug_log_fd.write(repr(output_b) + "\n") while not comp_begin in output: - output += os.read(master, 4096).decode() + output_b = os.read(master, 4096) + output += output_b.decode() + debug_log_fd.write(repr(output_b) + "\n") - time.sleep(0.25) + time.sleep(0.01) os.write(master, b"\t") - output = os.read(master, 4096).decode() + output_b = os.read(master, 4096) + output = output_b.decode() + debug_log_fd.write(repr(output_b) + "\n") # fail fast if there is a bell character in the output, # meaning no concise completion is found if "\x07" in output: @@ -45,11 +57,14 @@ def test_completion(program, argv, comp_word): return while not comp_word in output: - output += os.read(master, 4096).decode() + output_b = os.read(master, 4096) + output += output_b.decode() + debug_log_fd.write(repr(output_b) + "\n") print(f"{comp_word}: OK") finally: os.close(master) + debug_log_fd.close() client_compwords_positive = [ From 216a3e2eeb911b63c733bd6b5d22e41cb0f5b491 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 10 Apr 2024 10:39:22 +0200 Subject: [PATCH 28/90] Reformat --- .../0_stateless/01676_clickhouse_client_autocomplete.python | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index 9f0354ff961e..7bb9209f55ce 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -5,7 +5,11 @@ import time import multiprocessing COMPLETION_TIMEOUT_SECONDS = 30 -DEBUG_LOG = os.path.join(os.environ["CLICKHOUSE_TMP"], os.path.basename(os.path.abspath(__file__)) + ".debuglog") +DEBUG_LOG = os.path.join( + os.environ["CLICKHOUSE_TMP"], + os.path.basename(os.path.abspath(__file__)) + ".debuglog", +) + def run_with_timeout(func, args, timeout): process = multiprocessing.Process(target=func, args=args) From 11d2fbcf49208f8a0ade5fcc911dbf20d5e127ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 18:17:29 +0200 Subject: [PATCH 29/90] Don't use virtual columns to filter if they have been overwritten --- .../optimizeUseAggregateProjection.cpp | 5 ++--- .../QueryPlan/ReadFromMergeTree.cpp | 3 ++- src/Storages/MergeTree/MergeTreeData.cpp | 21 ++++++++++++------- src/Storages/MergeTree/MergeTreeData.h | 5 +++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 5 +++-- .../MergeTree/MergeTreeDataSelectExecutor.h | 1 + ...virtual_column_override_group_by.reference | 1 + ...03093_virtual_column_override_group_by.sql | 2 ++ 8 files changed, 28 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/03093_virtual_column_override_group_by.reference create mode 100644 tests/queries/0_stateless/03093_virtual_column_override_group_by.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index b40fea47b3cc..64111602458f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -432,13 +432,12 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( { const auto & keys = aggregating.getParams().keys; const auto & aggregates = aggregating.getParams().aggregates; - Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter(); + const auto metadata = reading.getStorageMetadata(); + Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter(metadata); AggregateProjectionCandidates candidates; const auto & parts = reading.getParts(); - - const auto metadata = reading.getStorageMetadata(); ContextPtr context = reading.getContext(); const auto & projections = metadata->projections; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index bee42c3dddec..6bdd060513c9 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1415,7 +1415,8 @@ static void buildIndexes( indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */); } - indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context); + indexes->part_values + = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(metadata_snapshot, data, parts, filter_actions_dag, context); MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(indexes->part_offset_condition, filter_actions_dag, context); indexes->use_skip_indexes = settings.use_skip_indexes; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 5d4c3ab078e5..a948d80396a7 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1031,19 +1031,26 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value", "_part_data_version"}; -Block MergeTreeData::getHeaderWithVirtualsForFilter() const +Block MergeTreeData::getHeaderWithVirtualsForFilter(const StorageMetadataPtr & metadata) const { + const auto columns = metadata->getColumns().getAllPhysical(); Block header; auto virtuals_desc = getVirtualsPtr(); for (const auto & name : virtuals_useful_for_filter) + { + if (columns.contains(name)) + continue; if (auto column = virtuals_desc->tryGet(name)) header.insert({column->type->createColumn(), column->type, name}); + } + return header; } -Block MergeTreeData::getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const +Block MergeTreeData::getBlockWithVirtualsForFilter( + const StorageMetadataPtr & metadata, const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const { - auto block = getHeaderWithVirtualsForFilter(); + auto block = getHeaderWithVirtualsForFilter(metadata); for (const auto & part_or_projection : parts) { @@ -1072,7 +1079,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( return 0; auto metadata_snapshot = getInMemoryMetadataPtr(); - auto virtual_columns_block = getBlockWithVirtualsForFilter({parts[0]}); + auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); if (!filter_dag) @@ -1091,7 +1098,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( std::unordered_set part_values; if (valid) { - virtual_columns_block = getBlockWithVirtualsForFilter(parts); + virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, parts); VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) @@ -6694,11 +6701,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( }; Block virtual_columns_block; - auto virtual_block = getHeaderWithVirtualsForFilter(); + auto virtual_block = getHeaderWithVirtualsForFilter(metadata_snapshot); bool has_virtual_column = std::any_of(required_columns.begin(), required_columns.end(), [&](const auto & name) { return virtual_block.has(name); }); if (has_virtual_column || filter_dag) { - virtual_columns_block = getBlockWithVirtualsForFilter(parts, /*ignore_empty=*/ true); + virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, parts, /*ignore_empty=*/true); if (virtual_columns_block.rows() == 0) return {}; } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index d21f87c337ef..85537ce4a24a 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -990,10 +990,11 @@ class MergeTreeData : public IStorage, public WithMutableContext static const Names virtuals_useful_for_filter; /// Construct a sample block of virtual columns. - Block getHeaderWithVirtualsForFilter() const; + Block getHeaderWithVirtualsForFilter(const StorageMetadataPtr & metadata) const; /// Construct a block consisting only of possible virtual columns for part pruning. - Block getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const; + Block getBlockWithVirtualsForFilter( + const StorageMetadataPtr & metadata, const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const; /// In merge tree we do inserts with several steps. One of them: /// X. write part to temporary directory with some temp name diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index bcc936c57396..345872efddf9 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -473,6 +473,7 @@ void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset( } std::optional> MergeTreeDataSelectExecutor::filterPartsByVirtualColumns( + const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, const ActionsDAGPtr & filter_dag, @@ -481,12 +482,12 @@ std::optional> MergeTreeDataSelectExecutor::filterPar if (!filter_dag) return {}; - auto sample = data.getHeaderWithVirtualsForFilter(); + auto sample = data.getHeaderWithVirtualsForFilter(metadata_snapshot); auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample); if (!dag) return {}; - auto virtual_columns_block = data.getBlockWithVirtualsForFilter(parts); + auto virtual_columns_block = data.getBlockWithVirtualsForFilter(metadata_snapshot, parts); VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context); return VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index b1afd7e66683..ecccd6d55e39 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -166,6 +166,7 @@ class MergeTreeDataSelectExecutor /// Example: SELECT count() FROM table WHERE _part = 'part_name' /// If expression found, return a set with allowed part names (std::nullopt otherwise). static std::optional> filterPartsByVirtualColumns( + const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, const ActionsDAGPtr & filter_dag, diff --git a/tests/queries/0_stateless/03093_virtual_column_override_group_by.reference b/tests/queries/0_stateless/03093_virtual_column_override_group_by.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03093_virtual_column_override_group_by.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03093_virtual_column_override_group_by.sql b/tests/queries/0_stateless/03093_virtual_column_override_group_by.sql new file mode 100644 index 000000000000..168d38a15b5a --- /dev/null +++ b/tests/queries/0_stateless/03093_virtual_column_override_group_by.sql @@ -0,0 +1,2 @@ +CREATE TABLE override_test__fuzz_45 (`_part` Float32) ENGINE = MergeTree ORDER BY tuple() AS SELECT 1; +SELECT _part FROM override_test__fuzz_45 GROUP BY materialize(6), 1; From 12569cc5fe880f9a25728158884db1ac2af00472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 18:18:47 +0200 Subject: [PATCH 30/90] Don't allow the fuzzer to change allow_experimental_analyzer --- docker/test/fuzzer/query-fuzzer-tweaks-users.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index 023f257253a4..c31d2fd7f397 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -26,6 +26,11 @@ 200 + + + + + From f06dca1a5093a605a39b099da42fd5d59b387973 Mon Sep 17 00:00:00 2001 From: Murat Khairulin Date: Fri, 5 Apr 2024 14:53:32 +0500 Subject: [PATCH 31/90] Fix primary key in materialized view --- src/Storages/StorageMaterializedView.cpp | 6 ++++ .../03035_materialized_primary_key.reference | 3 ++ .../03035_materialized_primary_key.sql | 28 +++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 tests/queries/0_stateless/03035_materialized_primary_key.reference create mode 100644 tests/queries/0_stateless/03035_materialized_primary_key.sql diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 344b5dfce9b7..9e98b9830555 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -91,6 +91,12 @@ StorageMaterializedView::StorageMaterializedView( { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + auto storage_def = query.storage; + if (storage_def && storage_def->primary_key) + storage_metadata.primary_key = KeyDescription::getKeyFromAST(storage_def->primary_key->ptr(), + storage_metadata.columns, + local_context->getGlobalContext()); + if (query.sql_security) storage_metadata.setSQLSecurity(query.sql_security->as()); diff --git a/tests/queries/0_stateless/03035_materialized_primary_key.reference b/tests/queries/0_stateless/03035_materialized_primary_key.reference new file mode 100644 index 000000000000..4ee050c1d92c --- /dev/null +++ b/tests/queries/0_stateless/03035_materialized_primary_key.reference @@ -0,0 +1,3 @@ +test id +test_mv +test_mv_pk value diff --git a/tests/queries/0_stateless/03035_materialized_primary_key.sql b/tests/queries/0_stateless/03035_materialized_primary_key.sql new file mode 100644 index 000000000000..961b61851c3a --- /dev/null +++ b/tests/queries/0_stateless/03035_materialized_primary_key.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test VALUES (1, 'Alice'), (2, 'Bob'); + +DROP VIEW IF EXISTS test_mv; +CREATE MATERIALIZED VIEW test_mv +( + id UInt64, + value String +) ENGINE=MergeTree +ORDER BY id AS SELECT id, value FROM test; + +DROP VIEW IF EXISTS test_mv_pk; +CREATE MATERIALIZED VIEW test_mv_pk +( + value String, + id UInt64 +) ENGINE=MergeTree PRIMARY KEY value +POPULATE AS SELECT value, id FROM test; + +SELECT name, primary_key +FROM system.tables +WHERE name LIKE 'test%'; \ No newline at end of file From 1938184273e972328ab494b39e3d6a6cf06ab391 Mon Sep 17 00:00:00 2001 From: Murat Khairulin Date: Fri, 5 Apr 2024 23:56:23 +0500 Subject: [PATCH 32/90] Fix for style check --- tests/queries/0_stateless/03035_materialized_primary_key.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03035_materialized_primary_key.sql b/tests/queries/0_stateless/03035_materialized_primary_key.sql index 961b61851c3a..928aebc340b3 100644 --- a/tests/queries/0_stateless/03035_materialized_primary_key.sql +++ b/tests/queries/0_stateless/03035_materialized_primary_key.sql @@ -25,4 +25,4 @@ POPULATE AS SELECT value, id FROM test; SELECT name, primary_key FROM system.tables -WHERE name LIKE 'test%'; \ No newline at end of file +WHERE database = currentDatabase() AND name LIKE 'test%'; \ No newline at end of file From 9783ae2a82bfa68e6e41cd74ef21dd9798365bf0 Mon Sep 17 00:00:00 2001 From: Murat Khairulin Date: Sun, 7 Apr 2024 22:27:10 +0500 Subject: [PATCH 33/90] Fix style --- src/Storages/StorageMaterializedView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 9e98b9830555..696865dfa2f6 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -91,7 +91,7 @@ StorageMaterializedView::StorageMaterializedView( { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); - auto storage_def = query.storage; + auto * storage_def = query.storage; if (storage_def && storage_def->primary_key) storage_metadata.primary_key = KeyDescription::getKeyFromAST(storage_def->primary_key->ptr(), storage_metadata.columns, From dab3f55bdbe622a05eb7eae2b45f1bce159696d9 Mon Sep 17 00:00:00 2001 From: Murat Khairulin Date: Wed, 10 Apr 2024 01:20:39 +0500 Subject: [PATCH 34/90] Restart ci From 73db78fe43f5b6097e724a1f91965ba1bded92e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 19:13:03 +0200 Subject: [PATCH 35/90] Add test for #26674 --- .../03093_analyzer_column_alias.reference | 1 + .../03093_analyzer_column_alias.sql | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/queries/0_stateless/03093_analyzer_column_alias.reference create mode 100644 tests/queries/0_stateless/03093_analyzer_column_alias.sql diff --git a/tests/queries/0_stateless/03093_analyzer_column_alias.reference b/tests/queries/0_stateless/03093_analyzer_column_alias.reference new file mode 100644 index 000000000000..4d9ef9832ddf --- /dev/null +++ b/tests/queries/0_stateless/03093_analyzer_column_alias.reference @@ -0,0 +1 @@ +1 0 10 9 diff --git a/tests/queries/0_stateless/03093_analyzer_column_alias.sql b/tests/queries/0_stateless/03093_analyzer_column_alias.sql new file mode 100644 index 000000000000..9ff0f78ba245 --- /dev/null +++ b/tests/queries/0_stateless/03093_analyzer_column_alias.sql @@ -0,0 +1,21 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/26674 +SET allow_experimental_analyzer = true; + +SELECT + Carrier, + sum(toFloat64(C3)) AS C1, + sum(toFloat64(C1)) AS C2, + sum(toFloat64(C2)) AS C3 +FROM + ( + SELECT + 1 AS Carrier, + count(CAST(1, 'Nullable(Int32)')) AS C1, + max(number) AS C2, + min(number) AS C3 + FROM numbers(10) + GROUP BY Carrier + ) AS ITBL +GROUP BY Carrier +LIMIT 1000001 +SETTINGS prefer_column_name_to_alias=1; From 958d36eecbe9f1177ba4bc032cb2b3c5e5ec0c81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 19:31:12 +0200 Subject: [PATCH 36/90] Typo --- src/Processors/Formats/Impl/ProtobufListInputFormat.cpp | 4 ++-- src/Processors/Formats/Impl/ProtobufListInputFormat.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp index 2382b3cf27ac..c643ae060d62 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp @@ -86,7 +86,7 @@ size_t ProtobufListInputFormat::countRows(size_t max_block_size) ProtobufListSchemaReader::ProtobufListSchemaReader(const FormatSettings & format_settings) : schema_info( format_settings.schema.format_schema, "Protobuf", true, format_settings.schema.is_server, format_settings.schema.format_schema_path) - , skip_unsopported_fields(format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference) + , skip_unsupported_fields(format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference) , google_protos_path(format_settings.protobuf.google_protos_path) { } @@ -95,7 +95,7 @@ NamesAndTypesList ProtobufListSchemaReader::readSchema() { const auto * message_descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::Yes, google_protos_path); - return protobufSchemaToCHSchema(message_descriptor, skip_unsopported_fields); + return protobufSchemaToCHSchema(message_descriptor, skip_unsupported_fields); } void registerInputFormatProtobufList(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.h b/src/Processors/Formats/Impl/ProtobufListInputFormat.h index 947696bba820..8305af285063 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.h @@ -56,7 +56,7 @@ class ProtobufListSchemaReader : public IExternalSchemaReader private: const FormatSchemaInfo schema_info; - bool skip_unsopported_fields; + bool skip_unsupported_fields; const String google_protos_path; }; From d6260e984cb1261d28ed7f2d77031839b4977b5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 19:46:52 +0200 Subject: [PATCH 37/90] Avoid crash when reading protobuf with recursive types --- src/Formats/ProtobufSerializer.cpp | 31 +++++++++++++++++-- .../03094_recursive_type_proto.reference | 1 + .../0_stateless/03094_recursive_type_proto.sh | 8 +++++ .../format_schemas/03094_recursive_type.proto | 17 ++++++++++ 4 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03094_recursive_type_proto.reference create mode 100755 tests/queries/0_stateless/03094_recursive_type_proto.sh create mode 100644 tests/queries/0_stateless/format_schemas/03094_recursive_type.proto diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index f2f1d985cc9c..744cea9f4dc4 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -3721,8 +3721,23 @@ namespace return std::make_shared>(std::move(values)); } - std::optional getNameAndDataTypeFromField(const google::protobuf::FieldDescriptor * field_descriptor, bool skip_unsupported_fields, bool allow_repeat = true) + std::optional getNameAndDataTypeFromField( + const google::protobuf::FieldDescriptor * field_descriptor, bool skip_unsupported_fields, bool allow_repeat); + + std::optional getNameAndDataTypeFromFieldRecursive( + const google::protobuf::FieldDescriptor * field_descriptor, + bool skip_unsupported_fields, + bool allow_repeat, + std::unordered_set & pending_resolution) { + if (pending_resolution.contains(field_descriptor)) + { + if (skip_unsupported_fields) + return std::nullopt; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ClickHouse doesn't support type recursion ({})", field_descriptor->full_name()); + } + pending_resolution.emplace(field_descriptor); + if (allow_repeat && field_descriptor->is_map()) { auto name_and_type = getNameAndDataTypeFromField(field_descriptor, skip_unsupported_fields, false); @@ -3804,7 +3819,8 @@ namespace else if (message_descriptor->field_count() == 1) { const auto * nested_field_descriptor = message_descriptor->field(0); - auto nested_name_and_type = getNameAndDataTypeFromField(nested_field_descriptor, skip_unsupported_fields); + auto nested_name_and_type + = getNameAndDataTypeFromFieldRecursive(nested_field_descriptor, skip_unsupported_fields, true, pending_resolution); if (!nested_name_and_type) return std::nullopt; return NameAndTypePair{field_descriptor->name() + "_" + nested_name_and_type->name, nested_name_and_type->type}; @@ -3815,7 +3831,8 @@ namespace Strings nested_names; for (int i = 0; i != message_descriptor->field_count(); ++i) { - auto nested_name_and_type = getNameAndDataTypeFromField(message_descriptor->field(i), skip_unsupported_fields); + auto nested_name_and_type = getNameAndDataTypeFromFieldRecursive( + message_descriptor->field(i), skip_unsupported_fields, true, pending_resolution); if (!nested_name_and_type) continue; nested_types.push_back(nested_name_and_type->type); @@ -3831,6 +3848,14 @@ namespace UNREACHABLE(); } + + std::optional getNameAndDataTypeFromField( + const google::protobuf::FieldDescriptor * field_descriptor, bool skip_unsupported_fields, bool allow_repeat = true) + { + /// Keep track of the fields that are pending resolution to avoid recursive types, which are unsupported + std::unordered_set pending_resolution{}; + return getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, allow_repeat, pending_resolution); + } } std::unique_ptr ProtobufSerializer::create( diff --git a/tests/queries/0_stateless/03094_recursive_type_proto.reference b/tests/queries/0_stateless/03094_recursive_type_proto.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03094_recursive_type_proto.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03094_recursive_type_proto.sh b/tests/queries/0_stateless/03094_recursive_type_proto.sh new file mode 100755 index 000000000000..6fa374f98d50 --- /dev/null +++ b/tests/queries/0_stateless/03094_recursive_type_proto.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +SCHEMADIR=$CURDIR/format_schemas +$CLICKHOUSE_LOCAL -q "DESCRIBE TABLE file('nonexist', 'Protobuf') SETTINGS format_schema='$SCHEMADIR/03094_recursive_type.proto:Struct'" |& grep -c CANNOT_PARSE_PROTOBUF_SCHEMA diff --git a/tests/queries/0_stateless/format_schemas/03094_recursive_type.proto b/tests/queries/0_stateless/format_schemas/03094_recursive_type.proto new file mode 100644 index 000000000000..97b2c9480a1f --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/03094_recursive_type.proto @@ -0,0 +1,17 @@ +syntax = "proto3"; + +message Struct { + map fields = 1; +} + +message Value { + // The kind of value. + oneof kind { + string string_value = 1; + ListValue list_value = 2; + } +} + +message ListValue { + repeated Value values = 1; +} From dfa7a9704ad1ed34d5893234bc1bf852d3390ca2 Mon Sep 17 00:00:00 2001 From: Joshua Hildred Date: Wed, 10 Apr 2024 11:47:10 -0700 Subject: [PATCH 38/90] Fix an isssue with constants being wrapped in nullables --- .../Passes/LogicalExpressionOptimizerPass.cpp | 15 +++++++++++++++ .../0_stateless/03032_redundant_equals.reference | 2 ++ .../0_stateless/03032_redundant_equals.sql | 2 ++ 3 files changed, 19 insertions(+) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 546959c4d9c0..ee0ddf24233e 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -274,7 +274,18 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont } } + void leaveImpl(QueryTreeNodePtr & node) + { + if (!need_rerun_resolve) + return; + + if (auto * function_node = node->as()) + rerunFunctionResolve(function_node, getContext()); + } + private: + bool need_rerun_resolve = false; + void tryOptimizeAndEqualsNotEqualsChain(QueryTreeNodePtr & node) { auto & function_node = node->as(); @@ -615,6 +626,10 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont if (!child_function || !isBooleanFunction(child_function->getFunctionName())) return; + + if (function_node.getResultType()->isNullable() && !child_function->getResultType()->isNullable()) + need_rerun_resolve = true; + if (maybe_invert) { auto not_resolver = FunctionFactory::instance().get("not", getContext()); diff --git a/tests/queries/0_stateless/03032_redundant_equals.reference b/tests/queries/0_stateless/03032_redundant_equals.reference index d477c98b6048..09f4d8e3646a 100644 --- a/tests/queries/0_stateless/03032_redundant_equals.reference +++ b/tests/queries/0_stateless/03032_redundant_equals.reference @@ -15,6 +15,8 @@ 100 101 100 +100 +101 1 1 1 diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql index afb9c8878661..427845d9c12e 100644 --- a/tests/queries/0_stateless/03032_redundant_equals.sql +++ b/tests/queries/0_stateless/03032_redundant_equals.sql @@ -23,6 +23,8 @@ SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1))) SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1; SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1; SELECT * FROM test_table WHERE ((k not in (99) = 1) AND (k in (100) = 1)) = 1; +SELECT * FROM test_table WHERE ((k not in (101) = toNullable(0)) OR (k in (100) = toNullable(1))) = toNullable(1); + SELECT count() FROM From 70438f7e6e2596f40c0b4a2502e27afc49bc778a Mon Sep 17 00:00:00 2001 From: Joshua Hildred Date: Wed, 10 Apr 2024 12:11:12 -0700 Subject: [PATCH 39/90] Update tests to set allow_experimental_analyzer --- tests/queries/0_stateless/03032_redundant_equals.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql index 427845d9c12e..ae0b9651e12c 100644 --- a/tests/queries/0_stateless/03032_redundant_equals.sql +++ b/tests/queries/0_stateless/03032_redundant_equals.sql @@ -9,6 +9,8 @@ ORDER BY k; INSERT INTO test_table SELECT number FROM numbers(10000000); +SET allow_experimental_analyzer = 1; + SELECT * FROM test_table WHERE k in (100) = 1; SELECT * FROM test_table WHERE k = (100) = 1; SELECT * FROM test_table WHERE k not in (100) = 0; From 4f38bf4f6b6566a7e746d0f2c72967027692a016 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 10 Apr 2024 21:39:02 +0200 Subject: [PATCH 40/90] Revert "Revert "Rich syntax highlighting in the client"" --- src/Client/ClientBase.cpp | 8 +- src/Client/ClientBaseHelpers.cpp | 162 ++++++++++-------- src/Parsers/ASTOrderByElement.cpp | 1 - src/Parsers/CommonParsers.h | 2 + src/Parsers/ExpressionElementParsers.cpp | 4 +- src/Parsers/ExpressionElementParsers.h | 13 +- src/Parsers/ExpressionListParsers.cpp | 83 +++++---- src/Parsers/IParser.cpp | 23 +++ src/Parsers/IParser.h | 39 +++++ src/Parsers/IParserBase.cpp | 19 +- src/Parsers/ParserInsertQuery.cpp | 4 +- src/Parsers/parseDatabaseAndTableName.cpp | 15 -- src/Parsers/parseQuery.cpp | 58 ++++--- src/Parsers/parseQuery.h | 5 + ..._autocomplete_word_break_characters.expect | 2 +- ...01565_query_loop_after_client_error.expect | 19 +- .../01676_clickhouse_client_autocomplete.sh | 2 +- .../01702_system_query_log.reference | 20 +-- ...160_client_autocomplete_parse_query.expect | 2 +- 19 files changed, 301 insertions(+), 180 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 4948402bb7fb..f37b391eb664 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2061,7 +2061,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( return MultiQueryProcessingStage::QUERIES_END; // Remove leading empty newlines and other whitespace, because they - // are annoying to filter in query log. This is mostly relevant for + // are annoying to filter in the query log. This is mostly relevant for // the tests. while (this_query_begin < all_queries_end && isWhitespaceASCII(*this_query_begin)) ++this_query_begin; @@ -2091,7 +2091,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( { parsed_query = parseQuery(this_query_end, all_queries_end, true); } - catch (Exception & e) + catch (const Exception & e) { current_exception.reset(e.clone()); return MultiQueryProcessingStage::PARSING_EXCEPTION; @@ -2116,9 +2116,9 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // INSERT queries may have the inserted data in the query text // that follow the query itself, e.g. "insert into t format CSV 1;2". // They need special handling. First of all, here we find where the - // inserted data ends. In multy-query mode, it is delimited by a + // inserted data ends. In multi-query mode, it is delimited by a // newline. - // The VALUES format needs even more handling -- we also allow the + // The VALUES format needs even more handling - we also allow the // data to be delimited by semicolon. This case is handled later by // the format parser itself. // We can't do multiline INSERTs with inline data, because most diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index b08626962957..b1d29b34ffc6 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -1,11 +1,14 @@ #include "ClientBaseHelpers.h" - #include #include -#include +#include +#include #include +#include + + namespace DB { @@ -96,77 +99,102 @@ void highlight(const String & query, std::vector & colors { using namespace replxx; - static const std::unordered_map token_to_color - = {{TokenType::Whitespace, Replxx::Color::DEFAULT}, - {TokenType::Comment, Replxx::Color::GRAY}, - {TokenType::BareWord, Replxx::Color::DEFAULT}, - {TokenType::Number, Replxx::Color::GREEN}, - {TokenType::StringLiteral, Replxx::Color::CYAN}, - {TokenType::QuotedIdentifier, Replxx::Color::MAGENTA}, - {TokenType::OpeningRoundBracket, Replxx::Color::BROWN}, - {TokenType::ClosingRoundBracket, Replxx::Color::BROWN}, - {TokenType::OpeningSquareBracket, Replxx::Color::BROWN}, - {TokenType::ClosingSquareBracket, Replxx::Color::BROWN}, - {TokenType::DoubleColon, Replxx::Color::BROWN}, - {TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, - - {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::VerticalDelimiter, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::HereDoc, Replxx::Color::CYAN}, - {TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Spaceship, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::DoubleAt, Replxx::Color::MAGENTA}, - - {TokenType::EndOfStream, Replxx::Color::DEFAULT}, - - {TokenType::Error, Replxx::Color::RED}, - {TokenType::ErrorMultilineCommentIsNotClosed, Replxx::Color::RED}, - {TokenType::ErrorSingleQuoteIsNotClosed, Replxx::Color::RED}, - {TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED}, - {TokenType::ErrorSinglePipeMark, Replxx::Color::RED}, - {TokenType::ErrorWrongNumber, Replxx::Color::RED}, - {TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}}; - - const Replxx::Color unknown_token_color = Replxx::Color::RED; - - Lexer lexer(query.data(), query.data() + query.size()); - size_t pos = 0; + /// The `colors` array maps to a Unicode code point position in a string into a color. + /// A color is set for every position individually (not for a range). - for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken()) + /// Empty input. + if (colors.empty()) + return; + + /// The colors should be legible (and look gorgeous) in both dark and light themes. + /// When modifying this, check it in both themes. + + static const std::unordered_map type_to_color = { - if (token.type == TokenType::Semicolon || token.type == TokenType::VerticalDelimiter) - ReplxxLineReader::setLastIsDelimiter(true); - else if (token.type != TokenType::Whitespace) - ReplxxLineReader::setLastIsDelimiter(false); + {Highlight::keyword, replxx::color::bold(Replxx::Color::DEFAULT)}, + {Highlight::identifier, Replxx::Color::CYAN}, + {Highlight::function, Replxx::Color::BROWN}, + {Highlight::alias, replxx::color::rgb666(0, 4, 4)}, + {Highlight::substitution, Replxx::Color::MAGENTA}, + {Highlight::number, replxx::color::rgb666(0, 4, 0)}, + {Highlight::string, Replxx::Color::GREEN}, + }; + + /// We set reasonably small limits for size/depth, because we don't want the CLI to be slow. + /// While syntax highlighting is unneeded for long queries, which the user couldn't read anyway. - size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(token.begin), token.size()); - for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index) + const char * begin = query.data(); + const char * end = begin + query.size(); + Tokens tokens(begin, end, 1000, true); + IParser::Pos token_iterator(tokens, static_cast(1000), static_cast(10000)); + Expected expected; + + /// We don't do highlighting for foreign dialects, such as PRQL and Kusto. + /// Only normal ClickHouse SQL queries are highlighted. + + /// Currently we highlight only the first query in the multi-query mode. + + ParserQuery parser(end); + ASTPtr ast; + bool parse_res = false; + + try + { + parse_res = parser.parse(token_iterator, ast, expected); + } + catch (...) + { + /// Skip highlighting in the case of exceptions during parsing. + /// It is ok to ignore unknown exceptions here. + return; + } + + size_t pos = 0; + const char * prev = begin; + for (const auto & range : expected.highlights) + { + auto it = type_to_color.find(range.highlight); + if (it != type_to_color.end()) { - if (token_to_color.find(token.type) != token_to_color.end()) - colors[pos + code_point_index] = token_to_color.at(token.type); - else - colors[pos + code_point_index] = unknown_token_color; + /// We have to map from byte positions to Unicode positions. + pos += UTF8::countCodePoints(reinterpret_cast(prev), range.begin - prev); + size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(range.begin), range.end - range.begin); + + for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index) + colors[pos + code_point_index] = it->second; + + pos += utf8_len; + prev = range.end; } + } - pos += utf8_len; + Token last_token = token_iterator.max(); + /// Raw data in INSERT queries, which is not necessarily tokenized. + const char * insert_data = ast ? getInsertData(ast) : nullptr; + + /// Highlight the last error in red. If the parser failed or the lexer found an invalid token, + /// or if it didn't parse all the data (except, the data for INSERT query, which is legitimately unparsed) + if ((!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon)) + && !(insert_data && expected.max_parsed_pos >= insert_data) + && expected.max_parsed_pos >= prev) + { + pos += UTF8::countCodePoints(reinterpret_cast(prev), expected.max_parsed_pos - prev); + + if (pos >= colors.size()) + pos = colors.size() - 1; + + colors[pos] = Replxx::Color::BRIGHTRED; + } + + /// This is a callback for the client/local app to better find query end. Note: this is a kludge, remove it. + if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter + || query.ends_with(';') || query.ends_with("\\G")) /// This is for raw data in INSERT queries, which is not necessarily tokenized. + { + ReplxxLineReader::setLastIsDelimiter(true); + } + else if (last_token.type != TokenType::Whitespace) + { + ReplxxLineReader::setLastIsDelimiter(false); } } #endif diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp index be0416359a18..09193a8b5e16 100644 --- a/src/Parsers/ASTOrderByElement.cpp +++ b/src/Parsers/ASTOrderByElement.cpp @@ -1,4 +1,3 @@ -#include #include #include #include diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 49964b5c7281..2277e348b0f2 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -601,6 +601,8 @@ class ParserKeyword : public IParserBase constexpr const char * getName() const override { return s.data(); } + Highlight highlight() const override { return Highlight::keyword; } + protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 2c8ab65d1fc6..dce0bc62d5b5 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -278,7 +278,7 @@ bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, E bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr id_list; - if (!ParserList(std::make_unique(allow_query_parameter), std::make_unique(TokenType::Dot), false) + if (!ParserList(std::make_unique(allow_query_parameter, highlight_type), std::make_unique(TokenType::Dot), false) .parse(pos, id_list, expected)) return false; @@ -1491,7 +1491,7 @@ const char * ParserAlias::restricted_keywords[] = bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_as(Keyword::AS); - ParserIdentifier id_p; + ParserIdentifier id_p(false, Highlight::alias); bool has_as_word = s_as.ignore(pos, expected); if (!allow_alias_without_as_keyword && !has_as_word) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index b29f5cc42510..6dbb75450edd 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -25,12 +25,15 @@ class ParserSubquery : public IParserBase class ParserIdentifier : public IParserBase { public: - explicit ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {} + explicit ParserIdentifier(bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier) + : allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) {} + Highlight highlight() const override { return highlight_type; } protected: const char * getName() const override { return "identifier"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool allow_query_parameter; + Highlight highlight_type; }; @@ -53,8 +56,8 @@ class ParserTableAsStringLiteralIdentifier : public IParserBase class ParserCompoundIdentifier : public IParserBase { public: - explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false) - : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_) + explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier) + : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) { } @@ -63,6 +66,7 @@ class ParserCompoundIdentifier : public IParserBase bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool table_name_with_optional_uuid; bool allow_query_parameter; + Highlight highlight_type; }; /** *, t.*, db.table.*, COLUMNS('') APPLY(...) or EXCEPT(...) or REPLACE(...) @@ -253,6 +257,7 @@ class ParserNumber : public IParserBase protected: const char * getName() const override { return "number"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + Highlight highlight() const override { return Highlight::number; } }; /** Unsigned integer, used in right hand side of tuple access operator (x.1). @@ -273,6 +278,7 @@ class ParserStringLiteral : public IParserBase protected: const char * getName() const override { return "string literal"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + Highlight highlight() const override { return Highlight::string; } }; @@ -385,6 +391,7 @@ class ParserSubstitution : public IParserBase protected: const char * getName() const override { return "substitution"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + Highlight highlight() const override { return Highlight::substitution; } }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 05691529f430..276b4e820742 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -441,6 +441,21 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return parser.parse(pos, node, expected); } +namespace +{ + /// This wrapper is needed to highlight function names differently. + class ParserFunctionName : public IParserBase + { + protected: + const char * getName() const override { return "function name"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + ParserCompoundIdentifier parser(false, true, Highlight::function); + return parser.parse(pos, node, expected); + } + }; +} + enum class Action { @@ -809,6 +824,7 @@ struct ParserExpressionImpl static const Operator finish_between_operator; + ParserFunctionName function_name_parser; ParserCompoundIdentifier identifier_parser{false, true}; ParserNumber number_parser; ParserAsterisk asterisk_parser; @@ -2359,7 +2375,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr identifier; - if (ParserCompoundIdentifier(false,true).parse(pos, identifier, expected) + if (ParserFunctionName().parse(pos, identifier, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { auto start = getFunctionLayer(identifier, is_table_function, allow_function_parameters); @@ -2497,7 +2513,7 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { if (typeid_cast(layers.back().get()) || typeid_cast(layers.back().get())) { - if (identifier_parser.parse(pos, tmp, expected) + if (function_name_parser.parse(pos, tmp, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); @@ -2629,49 +2645,52 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { layers.back()->pushOperand(std::move(tmp)); } - else if (identifier_parser.parse(pos, tmp, expected)) + else { - if (pos->type == TokenType::OpeningRoundBracket) + old_pos = pos; + if (function_name_parser.parse(pos, tmp, expected) && pos->type == TokenType::OpeningRoundBracket) { ++pos; layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); return Action::OPERAND; } - else + pos = old_pos; + + if (identifier_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); } - } - else if (substitution_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else if (pos->type == TokenType::OpeningRoundBracket) - { - - if (subquery_parser.parse(pos, tmp, expected)) + else if (substitution_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); - return Action::OPERATOR; } + else if (pos->type == TokenType::OpeningRoundBracket) + { - ++pos; - layers.push_back(std::make_unique()); - return Action::OPERAND; - } - else if (pos->type == TokenType::OpeningSquareBracket) - { - ++pos; - layers.push_back(std::make_unique()); - return Action::OPERAND; - } - else if (mysql_global_variable_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else - { - return Action::NONE; + if (subquery_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + return Action::OPERATOR; + } + + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (mysql_global_variable_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else + { + return Action::NONE; + } } return Action::OPERATOR; diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp index 41981a4bb8aa..eb4ddfa01d24 100644 --- a/src/Parsers/IParser.cpp +++ b/src/Parsers/IParser.cpp @@ -9,6 +9,7 @@ namespace ErrorCodes extern const int TOO_SLOW_PARSING; } + IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) { depth = rhs.depth; @@ -32,4 +33,26 @@ IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) return *this; } + +template +static bool intersects(T a_begin, T a_end, T b_begin, T b_end) +{ + return (a_begin <= b_begin && b_begin < a_end) + || (b_begin <= a_begin && a_begin < b_end); +} + + +void Expected::highlight(HighlightedRange range) +{ + auto it = highlights.lower_bound(range); + while (it != highlights.end() && range.begin < it->end) + { + if (intersects(range.begin, range.end, it->begin, it->end)) + it = highlights.erase(it); + else + ++it; + } + highlights.insert(range); +} + } diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 291f8ee7d44a..f8146c0a4f6d 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -21,14 +22,42 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +enum class Highlight +{ + none = 0, + keyword, + identifier, + function, + alias, + substitution, + number, + string, +}; + +struct HighlightedRange +{ + const char * begin; + const char * end; + Highlight highlight; + + auto operator<=>(const HighlightedRange & other) const + { + return begin <=> other.begin; + } +}; + /** Collects variants, how parser could proceed further at rightmost position. + * Also collects a mapping of parsed ranges for highlighting, + * which is accumulated through the parsing. */ struct Expected { absl::InlinedVector variants; const char * max_parsed_pos = nullptr; + std::set highlights; + /// 'description' should be statically allocated string. ALWAYS_INLINE void add(const char * current_pos, const char * description) { @@ -48,6 +77,8 @@ struct Expected { add(it->begin, description); } + + void highlight(HighlightedRange range); }; @@ -158,6 +189,14 @@ class IParser return parse(pos, node, expected); } + /** If the parsed fragment should be highlighted in the query editor, + * which type of highlighting to use? + */ + virtual Highlight highlight() const + { + return Highlight::none; + } + virtual ~IParser() = default; }; diff --git a/src/Parsers/IParserBase.cpp b/src/Parsers/IParserBase.cpp index 0241250926dc..9d39056a8f16 100644 --- a/src/Parsers/IParserBase.cpp +++ b/src/Parsers/IParserBase.cpp @@ -10,8 +10,25 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected) return wrapParseImpl(pos, IncreaseDepthTag{}, [&] { + const char * begin = pos->begin; bool res = parseImpl(pos, node, expected); - if (!res) + if (res) + { + Highlight type = highlight(); + if (pos->begin > begin && type != Highlight::none) + { + Pos prev_token = pos; + --prev_token; + + HighlightedRange range; + range.begin = begin; + range.end = prev_token->end; + range.highlight = type; + + expected.highlight(range); + } + } + else node = nullptr; return res; }); diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 9373e6a1c936..0bbb181b39c6 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -40,7 +40,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_with(Keyword::WITH); ParserToken s_lparen(TokenType::OpeningRoundBracket); ParserToken s_rparen(TokenType::ClosingRoundBracket); - ParserToken s_semicolon(TokenType::Semicolon); ParserIdentifier name_p(true); ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); ParserFunction table_function_p{false}; @@ -147,8 +146,9 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// If VALUES is defined in query, everything except setting will be parsed as data, /// and if values followed by semicolon, the data should be null. - if (!s_semicolon.checkWithoutMoving(pos, expected)) + if (pos->type != TokenType::Semicolon) data = pos->begin; + format_str = "Values"; } else if (s_format.ignore(pos, expected)) diff --git a/src/Parsers/parseDatabaseAndTableName.cpp b/src/Parsers/parseDatabaseAndTableName.cpp index 81660bc46008..eaf020e445bf 100644 --- a/src/Parsers/parseDatabaseAndTableName.cpp +++ b/src/Parsers/parseDatabaseAndTableName.cpp @@ -60,21 +60,6 @@ bool parseDatabaseAndTableAsAST(IParser::Pos & pos, Expected & expected, ASTPtr } -bool parseDatabase(IParser::Pos & pos, Expected & expected, String & database_str) -{ - ParserToken s_dot(TokenType::Dot); - ParserIdentifier identifier_parser; - - ASTPtr database; - database_str = ""; - - if (!identifier_parser.parse(pos, database, expected)) - return false; - - tryGetIdentifierNameInto(database, database_str); - return true; -} - bool parseDatabaseAsAST(IParser::Pos & pos, Expected & expected, ASTPtr & database) { ParserIdentifier identifier_parser(/* allow_query_parameter */true); diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 51878efa7067..2a6abc234065 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -226,6 +226,32 @@ std::string getUnmatchedParenthesesErrorMessage( } +static ASTInsertQuery * getInsertAST(const ASTPtr & ast) +{ + /// Either it is INSERT or EXPLAIN INSERT. + if (auto * explain = ast->as()) + { + if (auto explained_query = explain->getExplainedQuery()) + { + return explained_query->as(); + } + } + else + { + return ast->as(); + } + + return nullptr; +} + +const char * getInsertData(const ASTPtr & ast) +{ + if (const ASTInsertQuery * insert = getInsertAST(ast)) + return insert->data; + return nullptr; +} + + ASTPtr tryParseQuery( IParser & parser, const char * & _out_query_end, /* also query begin as input parameter */ @@ -270,29 +296,11 @@ ASTPtr tryParseQuery( if (res && max_parser_depth) res->checkDepth(max_parser_depth); - ASTInsertQuery * insert = nullptr; - if (parse_res) - { - if (auto * explain = res->as()) - { - if (auto explained_query = explain->getExplainedQuery()) - { - insert = explained_query->as(); - } - } - else - { - insert = res->as(); - } - } - - // If parsed query ends at data for insertion. Data for insertion could be - // in any format and not necessary be lexical correct, so we can't perform - // most of the checks. - if (insert && insert->data) - { + /// If parsed query ends at data for insertion. Data for insertion could be + /// in any format and not necessary be lexical correct, so we can't perform + /// most of the checks. + if (res && getInsertData(res)) return res; - } // More granular checks for queries other than INSERT w/inline data. /// Lexical error @@ -434,11 +442,9 @@ std::pair splitMultipartQuery( ast = parseQueryAndMovePosition(parser, pos, end, "", true, max_query_size, max_parser_depth, max_parser_backtracks); - auto * insert = ast->as(); - - if (insert && insert->data) + if (ASTInsertQuery * insert = getInsertAST(ast)) { - /// Data for INSERT is broken on new line + /// Data for INSERT is broken on the new line pos = insert->data; while (*pos && *pos != '\n') ++pos; diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h index 93c1a4652671..564415d0b85c 100644 --- a/src/Parsers/parseQuery.h +++ b/src/Parsers/parseQuery.h @@ -71,4 +71,9 @@ std::pair splitMultipartQuery( size_t max_parser_backtracks, bool allow_settings_after_format_in_insert); +/** If the query contains raw data part, such as INSERT ... FORMAT ..., return a pointer to it. + * The SQL parser stops at the raw data part, which is parsed by a separate parser. + */ +const char * getInsertData(const ASTPtr & ast); + } diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect index 44f3ba9681a9..ffd3e742cec8 100755 --- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect @@ -20,7 +20,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0" expect ":) " # Make a query diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect index ac69c18ce392..6253840c63cf 100755 --- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect +++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect @@ -24,30 +24,21 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file --highlight 0" expect "\n:) " -send -- "DROP TABLE IF EXISTS t01565;\n" -# NOTE: this is important for -mn mode, you should send "\r" only after reading echoed command -expect "\r\n" -send -- "\r" +send -- "DROP TABLE IF EXISTS t01565;\r" expect "\nOk." expect "\n:)" -send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\n" -expect "\r\n" -send -- "\r" +send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\r" expect "\nOk." expect "\n:) " -send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\n" -expect "\r\n" -send -- "\r" +send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\r" expect "\n:) " -send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\n" -expect "\r\n" -send -- "\r" +send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\r" expect "\nOk." expect "\n:) " diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index ebd6490077e4..f04ffdae229f 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -43,7 +43,7 @@ expect_after { -i \$any_spawn_id timeout { exit 1 } } -spawn bash -c "$*" +spawn bash -c "$* --highlight 0" expect ":) " # Make a query diff --git a/tests/queries/0_stateless/01702_system_query_log.reference b/tests/queries/0_stateless/01702_system_query_log.reference index c653021aa5ae..5498b5377ba5 100644 --- a/tests/queries/0_stateless/01702_system_query_log.reference +++ b/tests/queries/0_stateless/01702_system_query_log.reference @@ -43,16 +43,16 @@ Alter ALTER TABLE sqllt.table UPDATE i = i + 1 WHERE 1; Alter ALTER TABLE sqllt.table DELETE WHERE i > 65535; Select -- not done, seems to hard, so I\'ve skipped queries of ALTER-X, where X is:\n-- PARTITION\n-- ORDER BY\n-- SAMPLE BY\n-- INDEX\n-- CONSTRAINT\n-- TTL\n-- USER\n-- QUOTA\n-- ROLE\n-- ROW POLICY\n-- SETTINGS PROFILE\n\nSELECT \'SYSTEM queries\'; System SYSTEM FLUSH LOGS; -System SYSTEM STOP MERGES sqllt.table -System SYSTEM START MERGES sqllt.table -System SYSTEM STOP TTL MERGES sqllt.table -System SYSTEM START TTL MERGES sqllt.table -System SYSTEM STOP MOVES sqllt.table -System SYSTEM START MOVES sqllt.table -System SYSTEM STOP FETCHES sqllt.table -System SYSTEM START FETCHES sqllt.table -System SYSTEM STOP REPLICATED SENDS sqllt.table -System SYSTEM START REPLICATED SENDS sqllt.table +System SYSTEM STOP MERGES sqllt.table; +System SYSTEM START MERGES sqllt.table; +System SYSTEM STOP TTL MERGES sqllt.table; +System SYSTEM START TTL MERGES sqllt.table; +System SYSTEM STOP MOVES sqllt.table; +System SYSTEM START MOVES sqllt.table; +System SYSTEM STOP FETCHES sqllt.table; +System SYSTEM START FETCHES sqllt.table; +System SYSTEM STOP REPLICATED SENDS sqllt.table; +System SYSTEM START REPLICATED SENDS sqllt.table; Select -- SYSTEM RELOAD DICTIONARY sqllt.dictionary; -- temporary out of order: Code: 210, Connection refused (localhost:9001) (version 21.3.1.1)\n-- DROP REPLICA\n-- haha, no\n-- SYSTEM KILL;\n-- SYSTEM SHUTDOWN;\n\n-- Since we don\'t really care about the actual output, suppress it with `FORMAT Null`.\nSELECT \'SHOW queries\'; Show SHOW CREATE TABLE sqllt.table FORMAT Null; Show SHOW CREATE DICTIONARY sqllt.dictionary FORMAT Null; diff --git a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect index 2d404b005c71..30d725e6a2a2 100755 --- a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect +++ b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect @@ -21,7 +21,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0" expect ":) " # Make a query From b9a08caa46e10e5e812615b754f6f3d0d3b7bb47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 22:02:44 +0200 Subject: [PATCH 41/90] No fast test (no protobuf) --- tests/queries/0_stateless/03094_recursive_type_proto.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03094_recursive_type_proto.sh b/tests/queries/0_stateless/03094_recursive_type_proto.sh index 6fa374f98d50..98a1b54ff9e0 100755 --- a/tests/queries/0_stateless/03094_recursive_type_proto.sh +++ b/tests/queries/0_stateless/03094_recursive_type_proto.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From e1646165c8e4c1b5c52d924a2b8e7b5c5d2e1b09 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 10 Apr 2024 23:08:25 +0200 Subject: [PATCH 42/90] fix backups --- src/Backups/BackupFileInfo.cpp | 53 +++++++------------------------ src/Backups/BackupsWorker.cpp | 58 ++++++++-------------------------- src/Common/ThreadPool.cpp | 3 -- 3 files changed, 25 insertions(+), 89 deletions(-) diff --git a/src/Backups/BackupFileInfo.cpp b/src/Backups/BackupFileInfo.cpp index f14b955149e3..84b6d67f5033 100644 --- a/src/Backups/BackupFileInfo.cpp +++ b/src/Backups/BackupFileInfo.cpp @@ -210,48 +210,25 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr BackupFileInfos infos; infos.resize(backup_entries.size()); - size_t num_active_jobs = 0; - std::mutex mutex; - std::condition_variable event; - std::exception_ptr exception; + std::atomic_bool failed = false; - auto thread_group = CurrentThread::getGroup(); LoggerPtr log = getLogger("FileInfosFromBackupEntries"); + ThreadPoolCallbackRunnerLocal runner(thread_pool, "BackupWorker"); for (size_t i = 0; i != backup_entries.size(); ++i) { - { - std::lock_guard lock{mutex}; - if (exception) - break; - ++num_active_jobs; - } + if (failed) + break; - auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &read_settings, &base_backup, &thread_group, &process_list_element, i, log]() + runner([&infos, &backup_entries, &read_settings, &base_backup, &process_list_element, i, log, &failed]() { - SCOPE_EXIT_SAFE({ - std::lock_guard lock{mutex}; - if (!--num_active_jobs) - event.notify_all(); - CurrentThread::detachFromGroupIfNotDetached(); - }); - + if (failed) + return; try { const auto & name = backup_entries[i].first; const auto & entry = backup_entries[i].second; - if (thread_group) - CurrentThread::attachToGroup(thread_group); - - setThreadName("BackupWorker"); - - { - std::lock_guard lock{mutex}; - if (exception) - return; - } - if (process_list_element) process_list_element->checkTimeLimit(); @@ -259,21 +236,13 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr } catch (...) { - std::lock_guard lock{mutex}; - if (!exception) - exception = std::current_exception(); + failed = true; + throw; } - }; - - thread_pool.scheduleOrThrowOnError(job); + }); } - { - std::unique_lock lock{mutex}; - event.wait(lock, [&] { return !num_active_jobs; }); - if (exception) - std::rethrow_exception(exception); - } + runner.waitForAllToFinishAndRethrowFirstError(); return infos; } diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 9a3e1052e0bf..c81f08288536 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -705,51 +705,27 @@ void BackupsWorker::writeBackupEntries( backup_entries.size()); } - size_t num_active_jobs = 0; - std::mutex mutex; - std::condition_variable event; - std::exception_ptr exception; + + std::atomic_bool failed = false; bool always_single_threaded = !backup->supportsWritingInMultipleThreads(); auto & thread_pool = getThreadPool(ThreadPoolId::BACKUP_COPY_FILES); - auto thread_group = CurrentThread::getGroup(); + ThreadPoolCallbackRunnerLocal runner(thread_pool, "BackupWorker"); for (size_t i = 0; i != backup_entries.size(); ++i) { + if (failed) + break; + auto & entry = backup_entries[i].second; const auto & file_info = file_infos[i]; + auto job = [&]() { - std::unique_lock lock{mutex}; - if (exception) - break; - ++num_active_jobs; - } - - auto job = [&](bool async) - { - SCOPE_EXIT_SAFE( - std::lock_guard lock{mutex}; - if (!--num_active_jobs) - event.notify_all(); - if (async) - CurrentThread::detachFromGroupIfNotDetached(); - ); - + if (failed) + return; try { - if (async && thread_group) - CurrentThread::attachToGroup(thread_group); - - if (async) - setThreadName("BackupWorker"); - - { - std::lock_guard lock{mutex}; - if (exception) - return; - } - if (process_list_element) process_list_element->checkTimeLimit(); @@ -772,27 +748,21 @@ void BackupsWorker::writeBackupEntries( } catch (...) { - std::lock_guard lock{mutex}; - if (!exception) - exception = std::current_exception(); + failed = true; + throw; } }; if (always_single_threaded) { - job(false); + job(); continue; } - thread_pool.scheduleOrThrowOnError([job] { job(true); }); + runner(std::move(job)); } - { - std::unique_lock lock{mutex}; - event.wait(lock, [&] { return !num_active_jobs; }); - if (exception) - std::rethrow_exception(exception); - } + runner.waitForAllToFinishAndRethrowFirstError(); } diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 9bea5ab4d5e6..b9029d9287df 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -585,9 +585,6 @@ thread_local bool CannotAllocateThreadFaultInjector::block_fault_injections = fa scope_guard CannotAllocateThreadFaultInjector::blockFaultInjections() { auto & ins = instance(); - if (!ins.enabled.load(std::memory_order_relaxed)) - return {}; - ins.block_fault_injections = true; return [&ins](){ ins.block_fault_injections = false; }; } From 8e26c4460b9fc8f4f5913ff2e1480330a02eec14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 23:39:47 +0200 Subject: [PATCH 43/90] Make transform always return the first match --- docs/en/sql-reference/functions/other-functions.md | 2 +- src/Common/HashTable/HashMap.h | 13 +++++++++++++ src/Functions/transform.cpp | 7 +++---- .../03094_transform_return_first.reference | 4 ++++ .../0_stateless/03094_transform_return_first.sql | 7 +++++++ 5 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03094_transform_return_first.reference create mode 100644 tests/queries/0_stateless/03094_transform_return_first.sql diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 187f248e92df..26351301a3bc 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -675,7 +675,7 @@ There are two variations of this function: Signature: -For `x` equal to one of the elements in `array_from`, the function returns the corresponding element in `array_to`, i.e. the one at the same array index. Otherwise, it returns `default`. If multiple matching elements exist `array_from`, an arbitrary corresponding element from `array_to` is returned. +For `x` equal to one of the elements in `array_from`, the function returns the corresponding element in `array_to`, i.e. the one at the same array index. Otherwise, it returns `default`. If multiple matching elements exist `array_from`, it returns the element corresponding to the first of them. `transform(T, Array(T), Array(U), U) -> U` diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h index dc601bf13198..f104fea72cbb 100644 --- a/src/Common/HashTable/HashMap.h +++ b/src/Common/HashTable/HashMap.h @@ -296,6 +296,19 @@ class HashMapTable : public HashTable return it->getMapped(); } + /// Only inserts the value if key isn't already present + void ALWAYS_INLINE insertIfNotPresent(const Key & x, const Cell::Mapped & value) + { + LookupResult it; + bool inserted; + this->emplace(x, it, inserted); + if (inserted) + { + new (&it->getMapped()) typename Cell::Mapped(); + it->getMapped() = value; + } + } + const typename Cell::Mapped & ALWAYS_INLINE at(const Key & x) const { if (auto it = this->find(x); it != this->end()) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 3c9654740f4e..0dbc99467101 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -755,7 +755,6 @@ namespace WhichDataType which(from_type); - /// Note: Doesn't check the duplicates in the `from` array. /// Field may be of Float type, but for the purpose of bitwise equality we can treat them as UInt64 if (isNativeNumber(which) || which.isDecimal32() || which.isDecimal64() || which.isEnum()) { @@ -777,7 +776,7 @@ namespace #pragma clang diagnostic pop memcpy(dst, ref.data, ref.size); - table[key] = i; + table.insertIfNotPresent(key, i); } } } @@ -790,7 +789,7 @@ namespace if (applyVisitor(FieldVisitorAccurateEquals(), (*cache.from_column)[i], (*from_column_uncasted)[i])) { StringRef ref = cache.from_column->getDataAt(i); - table[ref] = i; + table.insertIfNotPresent(ref, i); } } } @@ -804,7 +803,7 @@ namespace { SipHash hash; cache.from_column->updateHashWithValue(i, hash); - table[hash.get128()] = i; + table.insertIfNotPresent(hash.get128(), i); } } } diff --git a/tests/queries/0_stateless/03094_transform_return_first.reference b/tests/queries/0_stateless/03094_transform_return_first.reference new file mode 100644 index 000000000000..4f62b9488829 --- /dev/null +++ b/tests/queries/0_stateless/03094_transform_return_first.reference @@ -0,0 +1,4 @@ +1 +1 +(2,2) +2 diff --git a/tests/queries/0_stateless/03094_transform_return_first.sql b/tests/queries/0_stateless/03094_transform_return_first.sql new file mode 100644 index 000000000000..fa18440f7217 --- /dev/null +++ b/tests/queries/0_stateless/03094_transform_return_first.sql @@ -0,0 +1,7 @@ +SELECT transform(1, [1, 1, 1], [1, 4, 5]); +SELECT transform('1', ['1', '1', '1'], ['1', '4', '5']); +SELECT transform((0, 0), [(0, 0), (0, 0), (0, 0)], [(2, 2), (5, 5), (10, 10)]); + +-- https://github.com/ClickHouse/ClickHouse/issues/62183 +-- Case is turned into caseWithExpression, which then it's turned into transform +select case 1 when 1 then 2 when 1 then 4 end; From dcd0831f4c4fce416368eca70f1f2201cf974903 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 11 Apr 2024 00:12:52 +0200 Subject: [PATCH 44/90] fix --- src/Client/Connection.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 91b86ded5002..4e2456134793 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1224,15 +1224,15 @@ void Connection::setDescription() auto resolved_address = getResolvedAddress(); description = host + ":" + toString(port); + full_description = description; + if (resolved_address) { auto ip_address = resolved_address->host().toString(); if (host != ip_address) - description += ", " + ip_address; + full_description += ", " + ip_address; } - full_description = description; - if (const auto * socket_ = getSocket()) { full_description += ", local address: "; From 7344daec8f295baf1d11b8b51d82821e53fece19 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 10 Apr 2024 20:25:08 -0400 Subject: [PATCH 45/90] add Composable Protocols --- .../settings/composable-protocols.md | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 docs/en/operations/settings/composable-protocols.md diff --git a/docs/en/operations/settings/composable-protocols.md b/docs/en/operations/settings/composable-protocols.md new file mode 100644 index 000000000000..8a5ea584f4e7 --- /dev/null +++ b/docs/en/operations/settings/composable-protocols.md @@ -0,0 +1,155 @@ +--- +slug: /en/operations/settings/composable-protocols +sidebar_position: 64 +sidebar_label: Composable Protocols +--- + +# Composable Protocols + +Composable protocols allows more flexible configuration of TCP access to the ClickHouse server. This configuration can co-exist with or replace conventional configuration. + +## Composable protocols section is denoted as `protocols` in configuration xml +**Example:** +``` xml + + + +``` + +## Basic modules define protocol layers +**Example:** +``` xml + + + + + http + + + +``` +where: +- `plain_http` - name which can be referred by another layer +- `type` - denotes protocol handler which will be instantiated to process data, set of protocol handlers is predefined: + * `tcp` - native clickhouse protocol handler + * `http` - http clickhouse protocol handler + * `tls` - TLS encryption layer + * `proxy1` - PROXYv1 layer + * `mysql` - MySQL compatibility protocol handler + * `postgres` - PostgreSQL compatibility protocol handler + * `prometheus` - Prometheus protocol handler + * `interserver` - clickhouse interserver handler + +:::note +`gRPC` protocol handler is not inmplemented for `Composable protocols` +::: + +## Endpoint (i.e. listening port) is denoted by `` and (optional) `` tags +**Example:** +``` xml + + + + + http + + 127.0.0.1 + 8123 + + + + +``` +If `` is omitted, then `` from root config is used. + +## Layers sequence is defined by `` tag, referencing another module +**Example:** definition for HTTPS protocol +``` xml + + + + + http + + + + + tls + plain_http + 127.0.0.1 + 8443 + + + +``` + +## Endpoint can be attached to any layer +**Example:** definition for HTTP (port 8123) and HTTPS (port 8443) endpoints +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + + + +``` + +## Additional endpoints can be defined by referencing any module and omitting `` tag +**Example:** `another_http` endpoint is defined for `plain_http` module +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + + + + plain_http + 127.0.0.1 + 8223 + + + +``` + +## Some modules can contain specific for its layer parameters +**Example:** for TLS layer private key (`privateKeyFile`) and certificate files (`certificateFile`) can be specified +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + another_server.key + another_server.crt + + + +``` From 057747ccd2a6aef76121c3b87a52b22f65e32687 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 10 Apr 2024 20:33:42 -0400 Subject: [PATCH 46/90] fix --- docs/en/operations/settings/composable-protocols.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/composable-protocols.md b/docs/en/operations/settings/composable-protocols.md index 8a5ea584f4e7..b68a5906abf1 100644 --- a/docs/en/operations/settings/composable-protocols.md +++ b/docs/en/operations/settings/composable-protocols.md @@ -41,7 +41,7 @@ where: * `interserver` - clickhouse interserver handler :::note -`gRPC` protocol handler is not inmplemented for `Composable protocols` +`gRPC` protocol handler is not implemented for `Composable protocols` ::: ## Endpoint (i.e. listening port) is denoted by `` and (optional) `` tags From e793b0e148c7db3cd0053b9ba27dc769e50a9878 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 10 Apr 2024 20:37:26 -0400 Subject: [PATCH 47/90] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 30c2de2b5076..9f7776f5201d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -183,6 +183,8 @@ CompiledExpressionCacheCount ComplexKeyCache ComplexKeyDirect ComplexKeyHashed +Composable +composable Config ConnectionDetails Const @@ -697,6 +699,7 @@ PCRE PRCP PREWHERE PROCESSLIST +PROXYv PSUN PagerDuty ParallelFormattingOutputFormatThreads From 13774d897b8ab290ce64203d09f32bee60247cce Mon Sep 17 00:00:00 2001 From: Joshua Hildred Date: Wed, 10 Apr 2024 19:09:03 -0700 Subject: [PATCH 48/90] Add additional tests for queries with toLowCardinality and toNullable --- src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp | 2 +- tests/queries/0_stateless/03032_redundant_equals.reference | 4 ++++ tests/queries/0_stateless/03032_redundant_equals.sql | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index ee0ddf24233e..05efe983b42e 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -627,7 +627,7 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont if (!child_function || !isBooleanFunction(child_function->getFunctionName())) return; - if (function_node.getResultType()->isNullable() && !child_function->getResultType()->isNullable()) + if (removeLowCardinality(constant->getResultType())->isNullable()) need_rerun_resolve = true; if (maybe_invert) diff --git a/tests/queries/0_stateless/03032_redundant_equals.reference b/tests/queries/0_stateless/03032_redundant_equals.reference index 09f4d8e3646a..b154addf55fb 100644 --- a/tests/queries/0_stateless/03032_redundant_equals.reference +++ b/tests/queries/0_stateless/03032_redundant_equals.reference @@ -17,6 +17,10 @@ 100 100 101 +100 +101 +100 +101 1 1 1 diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql index ae0b9651e12c..bd2306c7575c 100644 --- a/tests/queries/0_stateless/03032_redundant_equals.sql +++ b/tests/queries/0_stateless/03032_redundant_equals.sql @@ -26,7 +26,8 @@ SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1; SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1; SELECT * FROM test_table WHERE ((k not in (99) = 1) AND (k in (100) = 1)) = 1; SELECT * FROM test_table WHERE ((k not in (101) = toNullable(0)) OR (k in (100) = toNullable(1))) = toNullable(1); - +SELECT * FROM test_table WHERE (((k NOT IN toLowCardinality(toNullable(101))) = toLowCardinality(toNullable(0))) OR ((k IN (toLowCardinality(100))) = toNullable(1))); +SELECT * FROM test_table WHERE (((k IN toLowCardinality(toNullable(101))) = toLowCardinality(toNullable(0))) AND ((k NOT IN (toLowCardinality(100))) = toNullable(1))) = toNullable(toLowCardinality(0)); SELECT count() FROM From 0e117ed197011aff8b746010ceac733450a892b6 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 11 Apr 2024 04:17:06 +0200 Subject: [PATCH 49/90] Update debug log filename --- .../0_stateless/01676_clickhouse_client_autocomplete.python | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index 7bb9209f55ce..dcb1d2581828 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -7,7 +7,7 @@ import multiprocessing COMPLETION_TIMEOUT_SECONDS = 30 DEBUG_LOG = os.path.join( os.environ["CLICKHOUSE_TMP"], - os.path.basename(os.path.abspath(__file__)) + ".debuglog", + os.path.basename(os.path.abspath(__file__)).strip(".python") + ".debuglog", ) From 7916cf8355f0494d578ffe3b3288bd54485f5be6 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 11 Apr 2024 04:21:12 +0200 Subject: [PATCH 50/90] Add debug log flushing --- .../0_stateless/01676_clickhouse_client_autocomplete.python | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index dcb1d2581828..13160d4e561c 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -34,19 +34,23 @@ def test_completion(program, argv, comp_word): output_b = os.read(master, 4096) output = output_b.decode() debug_log_fd.write(repr(output_b) + "\n") + debug_log_fd.flush() while not ":)" in output: output_b = os.read(master, 4096) output += output_b.decode() debug_log_fd.write(repr(output_b) + "\n") + debug_log_fd.flush() os.write(master, b"SET " + bytes(comp_begin.encode())) output_b = os.read(master, 4096) output = output_b.decode() debug_log_fd.write(repr(output_b) + "\n") + debug_log_fd.flush() while not comp_begin in output: output_b = os.read(master, 4096) output += output_b.decode() debug_log_fd.write(repr(output_b) + "\n") + debug_log_fd.flush() time.sleep(0.01) os.write(master, b"\t") @@ -54,6 +58,7 @@ def test_completion(program, argv, comp_word): output_b = os.read(master, 4096) output = output_b.decode() debug_log_fd.write(repr(output_b) + "\n") + debug_log_fd.flush() # fail fast if there is a bell character in the output, # meaning no concise completion is found if "\x07" in output: @@ -64,6 +69,7 @@ def test_completion(program, argv, comp_word): output_b = os.read(master, 4096) output += output_b.decode() debug_log_fd.write(repr(output_b) + "\n") + debug_log_fd.flush() print(f"{comp_word}: OK") finally: From edb22a89410cb576209733503cf11f3eb988250a Mon Sep 17 00:00:00 2001 From: Jayme Bird Date: Thu, 11 Apr 2024 10:22:02 +0100 Subject: [PATCH 51/90] add event_time to backup_log system table --- docs/en/operations/system-tables/backup_log.md | 2 ++ src/Interpreters/BackupLog.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/en/operations/system-tables/backup_log.md b/docs/en/operations/system-tables/backup_log.md index c73fd26683ea..d9c2a61cb818 100644 --- a/docs/en/operations/system-tables/backup_log.md +++ b/docs/en/operations/system-tables/backup_log.md @@ -9,6 +9,7 @@ Columns: - `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time of the entry. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision. - `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation. - `name` ([String](../../sql-reference/data-types/string.md)) — Name of the backup storage (the contents of the `FROM` or `TO` clause). @@ -67,6 +68,7 @@ Row 2: ────── hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 +event_time: 2023-08-19 11:08:56 event_time_microseconds: 2023-08-19 11:08:56.916192 id: e5b74ecb-f6f1-426a-80be-872f90043885 name: Disk('backups_disk', '1.zip') diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp index af6c7cf62346..a22c6e322bf8 100644 --- a/src/Interpreters/BackupLog.cpp +++ b/src/Interpreters/BackupLog.cpp @@ -24,6 +24,7 @@ ColumnsDescription BackupLogElement::getColumnsDescription() { {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time", std::make_shared(), "Time of the entry."}, {"event_time_microseconds", std::make_shared(6), "Time of the entry with microseconds precision."}, {"id", std::make_shared(), "Identifier of the backup or restore operation."}, {"name", std::make_shared(), "Name of the backup storage (the contents of the FROM or TO clause)."}, @@ -48,6 +49,7 @@ void BackupLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; columns[i++]->insert(getFQDNOrHostName()); columns[i++]->insert(DateLUT::instance().toDayNum(std::chrono::system_clock::to_time_t(event_time)).toUnderType()); + columns[i++]->insert(std::chrono::system_clock::to_time_t(event_time)); columns[i++]->insert(event_time_usec); columns[i++]->insert(info.id); columns[i++]->insert(info.name); From c1c7cf56bde1dd07ced5ad41a0dc4e7d2d5fef94 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 11 Apr 2024 13:43:51 +0200 Subject: [PATCH 52/90] Revert "[feature]: allow to attach parts from a different disk" --- .../statements/alter/partition.md | 2 - src/Storages/MergeTree/MergeTreeData.cpp | 50 ++--- src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 17 +- .../__init__.py | 0 .../configs/remote_servers.xml | 17 -- .../test_attach_partition_using_copy/test.py | 187 ------------------ tests/integration/test_multiple_disks/test.py | 36 ++-- 10 files changed, 54 insertions(+), 263 deletions(-) delete mode 100644 tests/integration/test_attach_partition_using_copy/__init__.py delete mode 100644 tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml delete mode 100644 tests/integration/test_attach_partition_using_copy/test.py diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 941dc000a028..ce5cecf6fd64 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -133,8 +133,6 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same indices and projections. - Both tables must have the same storage policy. -If both tables have the same storage policy, use hardlink to attach partition. Otherwise, use copying the data to attach partition. - ## REPLACE PARTITION ``` sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 5d4c3ab078e5..927001dd0f6b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7074,7 +7074,7 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData( return checkStructureAndGetMergeTreeData(*source_table, src_snapshot, my_snapshot); } -std::pair MergeTreeData::cloneAndLoadDataPart( +std::pair MergeTreeData::cloneAndLoadDataPartOnSameDisk( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, @@ -7084,23 +7084,28 @@ std::pair MergeTreeData::cloneAn const WriteSettings & write_settings) { chassert(!isStaticStorage()); - bool on_same_disk = false; - for (const DiskPtr & disk : this->getStoragePolicy()->getDisks()) + + /// Check that the storage policy contains the disk where the src_part is located. + bool does_storage_policy_allow_same_disk = false; + for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { if (disk->getName() == src_part->getDataPartStorage().getDiskName()) { - on_same_disk = true; + does_storage_policy_allow_same_disk = true; break; } } - + if (!does_storage_policy_allow_same_disk) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Could not clone and load part {} because disk does not belong to storage policy", + quoteString(src_part->getDataPartStorage().getFullPath())); String dst_part_name = src_part->getNewName(dst_part_info); String tmp_dst_part_name = tmp_part_prefix + dst_part_name; auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name); /// Why it is needed if we only hardlink files? - /// Answer: In issue #59377, add copy when attach from different disk. auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); auto src_part_storage = src_part->getDataPartStoragePtr(); @@ -7108,30 +7113,16 @@ std::pair MergeTreeData::cloneAn MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; String with_copy; - if (params.copy_instead_of_hardlink || !on_same_disk) + if (params.copy_instead_of_hardlink) with_copy = " (copying data)"; - - std::shared_ptr dst_part_storage{}; - if (on_same_disk && !params.copy_instead_of_hardlink) - { - dst_part_storage = src_part_storage->freeze( - relative_data_path, - tmp_dst_part_name, - read_settings, - write_settings, - /* save_metadata_callback= */ {}, - params); - } - else - { - auto reservation_on_dst = getStoragePolicy()->reserve(src_part->getBytesOnDisk()); - if (!reservation_on_dst) - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space on disk."); - dst_part_storage = src_part_storage->clonePart( - this->getRelativeDataPath(), tmp_dst_part_name, reservation_on_dst->getDisk(), read_settings, write_settings, {}, {}); - } - + auto dst_part_storage = src_part_storage->freeze( + relative_data_path, + tmp_dst_part_name, + read_settings, + write_settings, + /* save_metadata_callback= */ {}, + params); if (params.metadata_version_to_write.has_value()) { @@ -7153,7 +7144,7 @@ std::pair MergeTreeData::cloneAn .withPartFormatFromDisk() .build(); - if (on_same_disk && !params.copy_instead_of_hardlink && params.hardlinked_files) + if (!params.copy_instead_of_hardlink && params.hardlinked_files) { params.hardlinked_files->source_part_name = src_part->name; params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); @@ -7197,7 +7188,6 @@ std::pair MergeTreeData::cloneAn return std::make_pair(dst_data_part, std::move(temporary_directory_lock)); } - String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const { return disk->getPath() + relative_data_path; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index d21f87c337ef..b1fbadc57f07 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -839,7 +839,7 @@ class MergeTreeData : public IStorage, public WithMutableContext MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; MergeTreeData & checkStructureAndGetMergeTreeData(IStorage & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; - std::pair cloneAndLoadDataPart( + std::pair cloneAndLoadDataPartOnSameDisk( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a971c4fda1c6..5e388d6a8ac7 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2146,7 +2146,7 @@ bool MutateTask::prepare() scope_guard lock; { - std::tie(part, lock) = ctx->data->cloneAndLoadDataPart( + std::tie(part, lock) = ctx->data->cloneAndLoadDataPartOnSameDisk( ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, clone_params, ctx->context->getReadSettings(), ctx->context->getWriteSettings()); part->getDataPartStorage().beginTransaction(); ctx->temporary_directory_lock = std::move(lock); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 86af02be8990..0235a74400c8 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2096,7 +2096,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; - auto [dst_part, part_lock] = cloneAndLoadDataPart( + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( src_part, TMP_PREFIX, dst_part_info, @@ -2207,7 +2207,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const .copy_instead_of_hardlink = getSettings()->always_use_copy_instead_of_hardlinks, }; - auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPart( + auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk( src_part, TMP_PREFIX, dst_part_info, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 15d1b7f40103..fcb946c089cb 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2788,7 +2788,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) auto obtain_part = [&] (PartDescriptionPtr & part_desc) { - /// Fetches with zero-copy-replication are cheap, but cloneAndLoadDataPart(OnSameDisk) will do full copy. + /// Fetches with zero-copy-replication are cheap, but cloneAndLoadDataPartOnSameDisk will do full copy. /// It's okay to check the setting for current table and disk for the source table, because src and dst part are on the same disk. bool prefer_fetch_from_other_replica = !part_desc->replica.empty() && storage_settings_ptr->allow_remote_fs_zero_copy_replication && part_desc->src_table_part && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(); @@ -2807,7 +2807,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - auto [res_part, temporary_part_lock] = cloneAndLoadDataPart( + auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, @@ -4888,7 +4888,7 @@ bool StorageReplicatedMergeTree::fetchPart( .keep_metadata_version = true, }; - auto [cloned_part, lock] = cloneAndLoadDataPart( + auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk( part_to_clone, "tmp_clone_", part_info, @@ -8078,14 +8078,12 @@ void StorageReplicatedMergeTree::replacePartitionFrom( bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; - IDataPartStorage::ClonePartParams clone_params { .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - - auto [dst_part, part_lock] = cloneAndLoadDataPart( + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( src_part, TMP_PREFIX, dst_part_info, @@ -8093,10 +8091,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( clone_params, query_context->getReadSettings(), query_context->getWriteSettings()); - - dst_parts.emplace_back(std::move(dst_part)); - dst_parts_locks.emplace_back(std::move(part_lock)); src_parts.emplace_back(src_part); + dst_parts.emplace_back(dst_part); + dst_parts_locks.emplace_back(std::move(part_lock)); ephemeral_locks.emplace_back(std::move(*lock)); block_id_paths.emplace_back(block_id_path); part_checksums.emplace_back(hash_hex); @@ -8349,7 +8346,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = dest_metadata_snapshot->getMetadataVersion() }; - auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPart( + auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk( src_part, TMP_PREFIX, dst_part_info, diff --git a/tests/integration/test_attach_partition_using_copy/__init__.py b/tests/integration/test_attach_partition_using_copy/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml b/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml deleted file mode 100644 index b40730e9f7d5..000000000000 --- a/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - true - - replica1 - 9000 - - - replica2 - 9000 - - - - - diff --git a/tests/integration/test_attach_partition_using_copy/test.py b/tests/integration/test_attach_partition_using_copy/test.py deleted file mode 100644 index df5378742aef..000000000000 --- a/tests/integration/test_attach_partition_using_copy/test.py +++ /dev/null @@ -1,187 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry - -cluster = ClickHouseCluster(__file__) - -replica1 = cluster.add_instance( - "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] -) -replica2 = cluster.add_instance( - "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] -) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - yield cluster - except Exception as ex: - print(ex) - finally: - cluster.shutdown() - - -def cleanup(nodes): - for node in nodes: - node.query("DROP TABLE IF EXISTS source SYNC") - node.query("DROP TABLE IF EXISTS destination SYNC") - - -def create_source_table(node, table_name, replicated): - replica = node.name - engine = ( - f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" - if replicated - else "MergeTree()" - ) - node.query_with_retry( - """ - ATTACH TABLE {table_name} UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' - ( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) - ) - ENGINE = {engine} - ORDER BY (postcode1, postcode2, addr1, addr2) - SETTINGS disk = disk(type = web, endpoint = 'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/') - """.format( - table_name=table_name, engine=engine - ) - ) - - -def create_destination_table(node, table_name, replicated): - replica = node.name - engine = ( - f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" - if replicated - else "MergeTree()" - ) - node.query_with_retry( - """ - CREATE TABLE {table_name} - ( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) - ) - ENGINE = {engine} - ORDER BY (postcode1, postcode2, addr1, addr2) - """.format( - table_name=table_name, engine=engine - ) - ) - - -def test_both_mergtree(start_cluster): - create_source_table(replica1, "source", False) - create_destination_table(replica1, "destination", False) - - replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") - - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", - replica1.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" - ), - ) - - assert_eq_with_retry( - replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - cleanup([replica1]) - - -def test_all_replicated(start_cluster): - create_source_table(replica1, "source", True) - create_destination_table(replica1, "destination", True) - create_destination_table(replica2, "destination", True) - - replica1.query("SYSTEM SYNC REPLICA destination") - replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") - - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", - replica1.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" - ), - ) - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", - replica2.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC" - ), - ) - - assert_eq_with_retry( - replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - assert_eq_with_retry( - replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - cleanup([replica1, replica2]) - - -def test_only_destination_replicated(start_cluster): - create_source_table(replica1, "source", False) - create_destination_table(replica1, "destination", True) - create_destination_table(replica2, "destination", True) - - replica1.query("SYSTEM SYNC REPLICA destination") - replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") - - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", - replica1.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" - ), - ) - assert_eq_with_retry( - replica1, - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", - replica2.query( - f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC" - ), - ) - - assert_eq_with_retry( - replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - assert_eq_with_retry( - replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" - ) - - cleanup([replica1, replica2]) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 9584ace7f456..fdd81284b2a2 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -5,7 +5,6 @@ import threading import time from multiprocessing.dummy import Pool -from helpers.test_tools import assert_eq_with_retry import pytest from helpers.client import QueryRuntimeException @@ -1746,9 +1745,9 @@ def alter(): node1.query(f"DROP TABLE IF EXISTS {name} SYNC") -def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster): +def test_move_across_policies_does_not_work(start_cluster): try: - name = "test_move_across_policies_work_for_attach_not_work_for_move" + name = "test_move_across_policies_does_not_work" node1.query( """ @@ -1784,18 +1783,25 @@ def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster): except QueryRuntimeException: """All parts of partition 'all' are already on disk 'jbod2'.""" - node1.query( - """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( - name=name + with pytest.raises( + QueryRuntimeException, + match=".*because disk does not belong to storage policy.*", + ): + node1.query( + """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( + name=name + ) ) - ) - assert_eq_with_retry( - node1, - """SELECT * FROM {name}2""".format(name=name), + + with pytest.raises( + QueryRuntimeException, + match=".*because disk does not belong to storage policy.*", + ): node1.query( - """SELECT * FROM {name}""".format(name=name), - ), - ) + """ALTER TABLE {name}2 REPLACE PARTITION tuple() FROM {name}""".format( + name=name + ) + ) with pytest.raises( QueryRuntimeException, @@ -1807,6 +1813,10 @@ def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster): ) ) + assert node1.query( + """SELECT * FROM {name}""".format(name=name) + ).splitlines() == ["1"] + finally: node1.query(f"DROP TABLE IF EXISTS {name} SYNC") node1.query(f"DROP TABLE IF EXISTS {name}2 SYNC") From e21799f4a546e7bf798fb81a7d37c9cf324c89a5 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 11 Apr 2024 13:48:35 +0200 Subject: [PATCH 53/90] fix hung check --- src/Interpreters/InterpreterSystemQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 9b4534601c35..f6db12e977c5 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -731,10 +731,12 @@ BlockIO InterpreterSystemQuery::execute() case Type::STOP_THREAD_FUZZER: getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER); ThreadFuzzer::stop(); + CannotAllocateThreadFaultInjector::setFaultProbability(0); break; case Type::START_THREAD_FUZZER: getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER); ThreadFuzzer::start(); + CannotAllocateThreadFaultInjector::setFaultProbability(getContext()->getServerSettings().cannot_allocate_thread_fault_injection_probability); break; case Type::UNFREEZE: { From 0ef2153d55c2477a33c40301c74e0682eba63d1a Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Apr 2024 14:02:32 +0200 Subject: [PATCH 54/90] Fix --- src/Interpreters/Cache/SLRUFileCachePriority.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index 68bf182dd2e0..31143eb4a24d 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -209,8 +209,13 @@ bool SLRUFileCachePriority::collectCandidatesForEvictionInProtected( { return false; } - else - chassert(downgrade_candidates->size() > 0); + + /// We can have no downgrade candidates because cache size could + /// reduce concurrently because of lock-free cache entries invalidation. + if (downgrade_candidates->size() == 0) + { + return true; + } if (!probationary_queue.collectCandidatesForEviction( downgrade_stat.total_stat.releasable_size, downgrade_stat.total_stat.releasable_count, From 4f6b6e30e10153f448f599e592b496eb499f198a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 11 Apr 2024 12:30:34 +0000 Subject: [PATCH 55/90] Fix flaky 03093_bug37909_query_does_not_finish --- .../queries/0_stateless/03093_bug37909_query_does_not_finish.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql index 463922c4e29a..62fa3f437af4 100644 --- a/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql +++ b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql @@ -75,3 +75,4 @@ FROM /* WHERE (v_date >= '2022-05-08') AND (v_date <= '2022-06-07') placing condition has same effect */ GROUP BY vDate ORDER BY vDate ASC +SETTINGS allow_experimental_analyzer = 1; -- the query times out if allow_experimental_analyzer = 0 From 8b38fabcf9e7ae844ff8844d2c61a49cc765708b Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 11 Apr 2024 12:35:52 +0000 Subject: [PATCH 56/90] better --- .../Transforms/SquashingChunksTransform.cpp | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 67cf22c7d4de..0d69b6e0a8d5 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -74,17 +74,6 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) auto block = squashing.add({}); chunk.setColumns(block.getColumns(), block.rows()); - - /// ISimpleTransform keeps output chunk (result of transform() execution) for some time and push it in the output port within subsequent prepare() call. - /// Because of our custom prepare() implementation we have to take care of both places where data could be buffered: `output_data` and `squashing`. - if (output_data.chunk.hasRows()) - { - auto res = std::move(output_data.chunk); - output_data.chunk.clear(); - if (chunk.hasRows()) - res.append(chunk); - chunk = std::move(res); - } } } @@ -92,7 +81,21 @@ IProcessor::Status SimpleSquashingChunksTransform::prepare() { if (!finished && input.isFinished()) { + if (output.isFinished()) + return Status::Finished; + + if (!output.canPush()) + return Status::PortFull; + + if (has_output) + { + output.pushData(std::move(output_data)); + has_output = false; + return Status::PortFull; + } + finished = true; + /// On the next call to transform() we will return all data buffered in `squashing` (if any) return Status::Ready; } return ISimpleTransform::prepare(); From 3ff802376f0f6e8259d4087d91f0a6183e87d6f0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 11 Apr 2024 12:36:53 +0000 Subject: [PATCH 57/90] Add test for issue 24607 --- .../queries/0_stateless/03094_named_tuple_bug24607.reference | 1 + tests/queries/0_stateless/03094_named_tuple_bug24607.sql | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 tests/queries/0_stateless/03094_named_tuple_bug24607.reference create mode 100644 tests/queries/0_stateless/03094_named_tuple_bug24607.sql diff --git a/tests/queries/0_stateless/03094_named_tuple_bug24607.reference b/tests/queries/0_stateless/03094_named_tuple_bug24607.reference new file mode 100644 index 000000000000..fb6ca6c5c3ab --- /dev/null +++ b/tests/queries/0_stateless/03094_named_tuple_bug24607.reference @@ -0,0 +1 @@ +(1,'test') 1 diff --git a/tests/queries/0_stateless/03094_named_tuple_bug24607.sql b/tests/queries/0_stateless/03094_named_tuple_bug24607.sql new file mode 100644 index 000000000000..e3c97f3fe414 --- /dev/null +++ b/tests/queries/0_stateless/03094_named_tuple_bug24607.sql @@ -0,0 +1,4 @@ +SELECT + JSONExtract('{"a":1, "b":"test"}', 'Tuple(a UInt8, b String)') AS x, + x.a +SETTINGS allow_experimental_analyzer = 1; From c684770acee472375736c6429d74bc900f794c5a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 11 Apr 2024 15:16:21 +0200 Subject: [PATCH 58/90] Use sudo to compress logs with the highest permissions --- tests/ci/integration_tests_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 90e2b08386fc..1289190a29bb 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -398,7 +398,7 @@ def _install_clickhouse(self, debs_path): @staticmethod def _compress_logs(directory, relpaths, result_path): retcode = subprocess.call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL - f"tar --use-compress-program='zstd --threads=0' -cf {result_path} -C " + f"sudo tar --use-compress-program='zstd --threads=0' -cf {result_path} -C " f"{directory} {' '.join(relpaths)}", shell=True, ) From 0b76f95e57a271035571fca9342acb3636ae4cc9 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 11 Apr 2024 15:19:32 +0200 Subject: [PATCH 59/90] Remove unnecessary style-check comments --- tests/ci/integration_tests_runner.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 1289190a29bb..118a790590a7 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -13,13 +13,13 @@ import subprocess import sys import time -from typing import Any, Dict import zlib # for crc32 from collections import defaultdict from itertools import chain +from typing import Any, Dict -from integration_test_images import IMAGES from env_helper import CI +from integration_test_images import IMAGES MAX_RETRY = 1 NUM_WORKERS = 5 @@ -397,9 +397,9 @@ def _install_clickhouse(self, debs_path): @staticmethod def _compress_logs(directory, relpaths, result_path): - retcode = subprocess.call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL - f"sudo tar --use-compress-program='zstd --threads=0' -cf {result_path} -C " - f"{directory} {' '.join(relpaths)}", + retcode = subprocess.call( + f"sudo tar --use-compress-program='zstd --threads=0' " + f"-cf {result_path} -C {directory} {' '.join(relpaths)}", shell=True, ) # tar return 1 when the files are changed on compressing, we ignore it @@ -432,9 +432,7 @@ def _get_all_tests(self, repo_path): "Getting all tests to the file %s with cmd: \n%s", out_file_full, cmd ) with open(out_file_full, "wb") as ofd: - subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL - cmd, shell=True, stdout=ofd, stderr=ofd - ) + subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) all_tests = set() with open(out_file_full, "r", encoding="utf-8") as all_tests_fd: @@ -1007,9 +1005,7 @@ def run(): if CI: # Avoid overlaps with previous runs logging.info("Clearing dmesg before run") - subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL - "sudo -E dmesg --clear", shell=True - ) + subprocess.check_call("sudo -E dmesg --clear", shell=True) state, description, test_results, _ = runner.run_impl(repo_path, build_path) logging.info("Tests finished") @@ -1017,9 +1013,7 @@ def run(): if CI: # Dump dmesg (to capture possible OOMs) logging.info("Dumping dmesg") - subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL - "sudo -E dmesg -T", shell=True - ) + subprocess.check_call("sudo -E dmesg -T", shell=True) status = (state, description) out_results_file = os.path.join(str(runner.path()), "test_results.tsv") From 59d56668e9a5dd4b6a8ca0b73a689cae9d18f2d9 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 11 Apr 2024 11:58:44 +0000 Subject: [PATCH 60/90] CI: respect Sync status in the MQ --- .github/workflows/master.yml | 4 +- .github/workflows/pull_request.yml | 13 ++++- tests/ci/ci.py | 12 +++-- tests/ci/finish_check.py | 31 ++++++------ tests/ci/sync_pr.py | 81 +++++++++++++++++++++++------- 5 files changed, 100 insertions(+), 41 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 64372a90613e..9a719a205d4c 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -23,10 +23,10 @@ jobs: clear-repository: true # to ensure correct digests fetch-depth: 0 # to get version filter: tree:0 - - name: Check sync PR + - name: Merge sync PR run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 sync_pr.py || : + python3 sync_pr.py --merge || : - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 74ce8452de8b..0eac9a9a722d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -157,16 +157,25 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !failure() && !cancelled() && github.event_name != 'merge_group' }} - needs: [Tests_1, Tests_2] + if: ${{ !failure() && !cancelled() }} + needs: [Tests_1, Tests_2, Builds_1_Report, Builds_2_Report] runs-on: [self-hosted, style-checker] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 + - name: Check sync status + if: ${{ github.event_name == 'merge_group' }} + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 sync_pr.py --status - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py + - name: Auto merge if approved + if: ${{ github.event_name != 'merge_group' }} + run: | + cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --check-approved diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 8434355ce465..f11d62e91362 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -2121,11 +2121,13 @@ def main() -> int: pr_info, dump_to_file=True, ) - update_mergeable_check( - commit, - pr_info, - job_report.check_name or _get_ext_check_name(args.job_name), - ) + if not pr_info.is_merge_queue(): + # in the merge queue mergeable status must be set only in FinishCheck (last job in wf) + update_mergeable_check( + commit, + pr_info, + job_report.check_name or _get_ext_check_name(args.job_name), + ) print(f"Job report url: [{check_url}]") prepared_events = prepare_tests_results_for_clickhouse( diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index eebc846f4b1b..617f4c9d88cf 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -28,21 +28,22 @@ def main(): statuses = get_commit_filtered_statuses(commit) trigger_mergeable_check(commit, statuses) - statuses = [s for s in statuses if s.context == CI_STATUS_NAME] - if not statuses: - return - # Take the latest status - status = statuses[-1] - if status.state == PENDING: - post_commit_status( - commit, - SUCCESS, - status.target_url, - "All checks finished", - CI_STATUS_NAME, - pr_info, - dump_to_file=True, - ) + if not pr_info.is_merge_queue(): + statuses = [s for s in statuses if s.context == CI_STATUS_NAME] + if not statuses: + return + # Take the latest status + status = statuses[-1] + if status.state == PENDING: + post_commit_status( + commit, + SUCCESS, + status.target_url, + "All checks finished", + CI_STATUS_NAME, + pr_info, + dump_to_file=True, + ) if __name__ == "__main__": diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index f33f6122f309..e8d982fac166 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -2,14 +2,68 @@ """Script for automatic sync PRs handling in private repos""" +import argparse import sys from get_robot_token import get_best_robot_token from pr_info import PRInfo from github_helper import GitHub +from commit_status_helper import get_commit, post_commit_status +from report import FAILURE, SUCCESS + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Script for handling sync PRs", + ) + parser.add_argument( + "--merge", + action="store_true", + help="merge sync pr", + ) + parser.add_argument( + "--status", + action="store_true", + help="check and set sync pr status", + ) + args = parser.parse_args() + return args + + +def merge_sync_pr(gh, sync_pr): + if not sync_pr: + print("Sync PR not found - exiting") + return + + if sync_pr.state == "closed": + print(f"Sync PR [{sync_pr.number}] already closed - exiting") + sys.exit(0) + + if sync_pr.state != "open": + print( + f"WARNING: Unknown Sync PR [{sync_pr.number}] state [{sync_pr.state}] - exiting" + ) + sys.exit(0) + + print(f"Trying to merge Sync PR [{sync_pr.number}]") + if sync_pr.draft: + gh.toggle_pr_draft(sync_pr) + sync_pr.merge() + + +def set_sync_status(gh, pr_info, sync_pr): + if not sync_pr or sync_pr.mergeable: + post_commit_status(get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync") + else: + post_commit_status(get_commit(gh, pr_info.sha), SUCCESS, "", "", "A Sync") def main(): + args = parse_args() + + assert args.merge ^ args.status + gh = GitHub(get_best_robot_token()) pr_info = PRInfo() @@ -19,27 +73,20 @@ def main(): query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr", repo="ClickHouse/clickhouse-private", ) + + sync_pr = None + if len(prs) > 1: print(f"WARNING: More than one PR found [{prs}] - exiting") - sys.exit(0) - if len(prs) == 0: + elif len(prs) == 0: print("WARNING: No Sync PR found") - sys.exit(0) - - pr = prs[0] - - if pr.state == "closed": - print(f"Sync PR [{pr.number}] already closed - exiting") - sys.exit(0) - - if pr.state != "open": - print(f"WARNING: Unknown Sync PR [{pr.number}] state [{pr.state}] - exiting") - sys.exit(0) + else: + sync_pr = prs[0] - print(f"Trying to merge Sync PR [{pr.number}]") - if pr.draft: - gh.toggle_pr_draft(pr) - pr.merge() + if args.merge: + merge_sync_pr(gh, sync_pr) + elif args.status: + set_sync_status(gh, pr_info, sync_pr) if __name__ == "__main__": From 2bbd36a5b72f4ff4d7ebcdd66d0a7f8c5e955acf Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 11 Apr 2024 15:36:07 +0200 Subject: [PATCH 61/90] Run fast tests and style checks in parallel --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 74ce8452de8b..0db7be65feab 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -83,7 +83,7 @@ jobs: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}} RCSK FastTest: - needs: [RunConfig, StyleCheck] + needs: [RunConfig, BuildDockers] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Fast test') }} uses: ./.github/workflows/reusable_test.yml with: From 99078ef9d5e5fe3d205add749de6b72e2845eb4a Mon Sep 17 00:00:00 2001 From: Arnaud Rocher Date: Thu, 11 Apr 2024 15:56:47 +0200 Subject: [PATCH 62/90] Fix typo in `like` function documentation --- docs/en/sql-reference/functions/string-search-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index f7e56e73520f..df266b224fbb 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -970,7 +970,7 @@ If the haystack or the LIKE expression are not valid UTF-8, the behavior is unde No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. -To match against literal `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`. +To match against literal `%`, `_` and `\` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`. The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`. Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`. @@ -1768,4 +1768,4 @@ SELECT hasTokenCaseInsensitiveOrNull('Hello World','hello,world'); ```response null -``` \ No newline at end of file +``` From dd49b09902d29de85299d570d14b934f801d4ec3 Mon Sep 17 00:00:00 2001 From: Joshua Hildred Date: Thu, 11 Apr 2024 06:58:35 -0700 Subject: [PATCH 63/90] Address review comments --- .../Passes/LogicalExpressionOptimizerPass.cpp | 62 +++++++------------ .../03032_redundant_equals.reference | 3 + .../0_stateless/03032_redundant_equals.sql | 4 ++ 3 files changed, 28 insertions(+), 41 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 05efe983b42e..63f8c4786cec 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -274,18 +274,7 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont } } - void leaveImpl(QueryTreeNodePtr & node) - { - if (!need_rerun_resolve) - return; - - if (auto * function_node = node->as()) - rerunFunctionResolve(function_node, getContext()); - } - private: - bool need_rerun_resolve = false; - void tryOptimizeAndEqualsNotEqualsChain(QueryTreeNodePtr & node) { auto & function_node = node->as(); @@ -588,12 +577,6 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont auto & function_node = node->as(); assert(function_node.getFunctionName() == "equals"); - bool lhs_const; - bool maybe_invert; - - const ConstantNode * constant; - const FunctionNode * child_function; - const auto function_arguments = function_node.getArguments().getNodes(); if (function_arguments.size() != 2) return; @@ -601,47 +584,44 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont const auto & lhs = function_arguments[0]; const auto & rhs = function_arguments[1]; - if ((constant = lhs->as())) - lhs_const = true; - else if ((constant = rhs->as())) - lhs_const = false; + UInt64 constant_value; + bool is_lhs_const; + if (const auto * lhs_constant = lhs->as()) + { + if (!lhs_constant->getValue().tryGet(constant_value) || constant_value > 1 + || isNullableOrLowCardinalityNullable(lhs_constant->getResultType())) + return; + is_lhs_const = true; + } + else if (const auto * rhs_constant = rhs->as()) + { + if (!rhs_constant->getValue().tryGet(constant_value) || constant_value > 1 + || isNullableOrLowCardinalityNullable(rhs_constant->getResultType())) + return; + is_lhs_const = false; + } else return; - UInt64 val; - if (!constant->getValue().tryGet(val)) - return; - - if (val == 1) - maybe_invert = false; - else if (val == 0) - maybe_invert = true; - else - return; + bool need_invert = (constant_value == 0); - if (lhs_const) - child_function = rhs->as(); - else - child_function = lhs->as(); + const FunctionNode * child_function = is_lhs_const ? rhs->as() : lhs->as(); if (!child_function || !isBooleanFunction(child_function->getFunctionName())) return; - if (removeLowCardinality(constant->getResultType())->isNullable()) - need_rerun_resolve = true; - - if (maybe_invert) + if (need_invert) { auto not_resolver = FunctionFactory::instance().get("not", getContext()); const auto not_node = std::make_shared("not"); auto & arguments = not_node->getArguments().getNodes(); arguments.reserve(1); - arguments.push_back(lhs_const ? rhs : lhs); + arguments.push_back(is_lhs_const ? rhs : lhs); not_node->resolveAsFunction(not_resolver->build(not_node->getArgumentColumns())); node = not_node; } else - node = lhs_const ? rhs : lhs; + node = is_lhs_const ? rhs : lhs; } }; diff --git a/tests/queries/0_stateless/03032_redundant_equals.reference b/tests/queries/0_stateless/03032_redundant_equals.reference index b154addf55fb..5b211a69007c 100644 --- a/tests/queries/0_stateless/03032_redundant_equals.reference +++ b/tests/queries/0_stateless/03032_redundant_equals.reference @@ -15,6 +15,9 @@ 100 101 100 +101 +101 +101 100 101 100 diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql index bd2306c7575c..de85ec5cf002 100644 --- a/tests/queries/0_stateless/03032_redundant_equals.sql +++ b/tests/queries/0_stateless/03032_redundant_equals.sql @@ -25,6 +25,10 @@ SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1))) SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1; SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1; SELECT * FROM test_table WHERE ((k not in (99) = 1) AND (k in (100) = 1)) = 1; +-- we skip optimizing queries with toNullable(0 or 1) but lets make sure they still work +SELECT * FROM test_table WHERE (k = 101) = toLowCardinality(toNullable(1)); +SELECT * FROM test_table WHERE (k = 101) = toNullable(1); +SELECT * FROM test_table WHERE (k = 101) = toLowCardinality(1); SELECT * FROM test_table WHERE ((k not in (101) = toNullable(0)) OR (k in (100) = toNullable(1))) = toNullable(1); SELECT * FROM test_table WHERE (((k NOT IN toLowCardinality(toNullable(101))) = toLowCardinality(toNullable(0))) OR ((k IN (toLowCardinality(100))) = toNullable(1))); SELECT * FROM test_table WHERE (((k IN toLowCardinality(toNullable(101))) = toLowCardinality(toNullable(0))) AND ((k NOT IN (toLowCardinality(100))) = toNullable(1))) = toNullable(toLowCardinality(0)); From 671d2a8927a79b3038dc504524fbbe5cc3c39e9a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 11 Apr 2024 14:33:25 +0000 Subject: [PATCH 64/90] Automatic style fix --- tests/ci/sync_pr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index e8d982fac166..c58cc20d7af4 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -54,7 +54,9 @@ def merge_sync_pr(gh, sync_pr): def set_sync_status(gh, pr_info, sync_pr): if not sync_pr or sync_pr.mergeable: - post_commit_status(get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync") + post_commit_status( + get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync" + ) else: post_commit_status(get_commit(gh, pr_info.sha), SUCCESS, "", "", "A Sync") From 3e16309e991b6ac833a18eae1e7dd120e7c9f63b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Apr 2024 15:25:52 +0000 Subject: [PATCH 65/90] Allow constant folding throught __getScalar --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 22 +++++++++++++++++-- ..._limit_with_constant_expressions.reference | 3 +++ .../00834_limit_with_constant_expressions.sql | 2 ++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index aedf860f5be4..44e0d2a33037 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5624,17 +5624,35 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi function_name, scope.scope_node->formatASTForErrorMessage()); + bool argument_is_constant = false; const auto * constant_node = function_argument->as(); if (constant_node) { argument_column.column = constant_node->getResultType()->createColumnConst(1, constant_node->getValue()); argument_column.type = constant_node->getResultType(); + argument_is_constant = true; } - else + else if(const auto * get_scalar_function_node = function_argument->as(); + get_scalar_function_node && get_scalar_function_node->getFunctionName() == "__getScalar") { - all_arguments_constants = false; + /// Allow constant folding through getScalar + const auto * get_scalar_const_arg = get_scalar_function_node->getArguments().getNodes().at(0)->as(); + if (get_scalar_const_arg && scope.context->hasQueryContext()) + { + auto query_context = scope.context->getQueryContext(); + auto scalar_string = toString(get_scalar_const_arg->getValue()); + if (query_context->hasScalar(scalar_string)) + { + auto scalar = query_context->getScalar(scalar_string); + argument_column.column = ColumnConst::create(scalar.getByPosition(0).column, 1); + argument_column.type = get_scalar_function_node->getResultType(); + argument_is_constant = true; + } + } } + all_arguments_constants &= argument_is_constant; + argument_types.push_back(argument_column.type); argument_columns.emplace_back(std::move(argument_column)); } diff --git a/tests/queries/0_stateless/00834_limit_with_constant_expressions.reference b/tests/queries/0_stateless/00834_limit_with_constant_expressions.reference index 593bf010efd1..5d7483702424 100644 --- a/tests/queries/0_stateless/00834_limit_with_constant_expressions.reference +++ b/tests/queries/0_stateless/00834_limit_with_constant_expressions.reference @@ -19,3 +19,6 @@ 2 3 4 +0 +1 +2 diff --git a/tests/queries/0_stateless/00834_limit_with_constant_expressions.sql b/tests/queries/0_stateless/00834_limit_with_constant_expressions.sql index 544866341309..47b403a37f98 100644 --- a/tests/queries/0_stateless/00834_limit_with_constant_expressions.sql +++ b/tests/queries/0_stateless/00834_limit_with_constant_expressions.sql @@ -24,3 +24,5 @@ SELECT * FROM numbers(10) LIMIT LENGTH('NNN') + COS(0), toDate('0000-00-02'); -- SELECT * FROM numbers(10) LIMIT a + 5 - a; -- { serverError 47 } SELECT * FROM numbers(10) LIMIT a + b; -- { serverError 47 } SELECT * FROM numbers(10) LIMIT 'Hello'; -- { serverError 440 } + +SELECT number from numbers(10) order by number limit (select sum(number), count() from numbers(3)).1; From e8f616e80bce36463db1de8504e35e286953b0b7 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 11 Apr 2024 15:55:25 +0000 Subject: [PATCH 66/90] CI: fix for sync check status in mq --- tests/ci/sync_pr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index c58cc20d7af4..acff7ba541bb 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -53,7 +53,7 @@ def merge_sync_pr(gh, sync_pr): def set_sync_status(gh, pr_info, sync_pr): - if not sync_pr or sync_pr.mergeable: + if not sync_pr or not sync_pr.mergeable: post_commit_status( get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync" ) From 523ee302cbef13f2ed3a290457f79be6fe1527a6 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Apr 2024 16:37:54 +0000 Subject: [PATCH 67/90] more profile events --- src/Common/ProfileEvents.cpp | 2 ++ src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp | 4 ++++ .../AzureBlobStorage/AzureObjectStorage.cpp | 13 +++++++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 23eed53509ee..c00d10175869 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -439,12 +439,14 @@ The server successfully detected this situation and will download merged part fr M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ + M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \ \ M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \ M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \ M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ + M(DiskAzureGetProperties, "Number of Disk Azure blob storage API GetProperties calls.") \ \ M(ReadBufferFromAzureMicroseconds, "Time spent on reading from Azure.") \ M(ReadBufferFromAzureInitMicroseconds, "Time spent initializing connection to Azure.") \ diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index c79c95e974ec..48b4ed23af05 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -278,6 +278,10 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran try { + ProfileEvents::increment(ProfileEvents::AzureGetObject); + if (read_settings.for_object_storage) + ProfileEvents::increment(ProfileEvents::DiskAzureGetObject); + Azure::Storage::Blobs::DownloadBlobOptions download_options; download_options.Range = {static_cast(range_begin), n}; auto download_response = blob_client->Download(download_options); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index fb3a35301c0b..ff4b481eefd8 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -28,19 +28,21 @@ namespace ProfileEvents extern const Event DiskAzureListObjects; extern const Event AzureDeleteObjects; extern const Event DiskAzureDeleteObjects; + extern const Event AzureGetProperties; + extern const Event DiskAzureGetProperties; + extern const Event AzureCopyObject; + extern const Event DiskAzureCopyObject; } namespace DB { - namespace ErrorCodes { extern const int AZURE_BLOB_STORAGE_ERROR; extern const int UNSUPPORTED_METHOD; } - namespace { @@ -346,9 +348,13 @@ void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects) ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) const { + ProfileEvents::increment(ProfileEvents::AzureGetProperties); + ProfileEvents::increment(ProfileEvents::DiskAzureGetProperties); + auto client_ptr = client.get(); auto blob_client = client_ptr->GetBlobClient(path); auto properties = blob_client.GetProperties().Value; + ObjectMetadata result; result.size_bytes = properties.BlobSize; if (!properties.Metadata.empty()) @@ -379,6 +385,9 @@ void AzureObjectStorage::copyObject( /// NOLINT copy_options.Metadata[key] = value; } + ProfileEvents::increment(ProfileEvents::AzureCopyObject); + ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject); + dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options); } From cbfc2b96f92d52680554efaed563226d44c82d67 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 11 Apr 2024 19:28:10 +0200 Subject: [PATCH 68/90] fix polygon something --- src/Common/threadPoolCallbackRunner.h | 6 +++--- src/Dictionaries/PolygonDictionaryUtils.h | 13 +++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h index ef22f9038d80..5beec6608019 100644 --- a/src/Common/threadPoolCallbackRunner.h +++ b/src/Common/threadPoolCallbackRunner.h @@ -73,10 +73,10 @@ std::future scheduleFromThreadPoolUnsafe(T && task, ThreadPool & pool, c /// NOTE It's still not completely safe. /// When creating a runner on stack, you MUST make sure that it's created (and destroyed) before local objects captured by task lambda. -template > +template > class ThreadPoolCallbackRunnerLocal { - ThreadPool & pool; + PoolT & pool; std::string thread_name; enum TaskState @@ -106,7 +106,7 @@ class ThreadPoolCallbackRunnerLocal } public: - ThreadPoolCallbackRunnerLocal(ThreadPool & pool_, const std::string & thread_name_) + ThreadPoolCallbackRunnerLocal(PoolT & pool_, const std::string & thread_name_) : pool(pool_) , thread_name(thread_name_) { diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h index 0fd1fead456b..9fba467a3630 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.h +++ b/src/Dictionaries/PolygonDictionaryUtils.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -250,10 +251,11 @@ class GridRoot : public ICell auto y_shift = (current_max_y - current_min_y) / DividedCell::kSplit; std::vector>> children; children.resize(DividedCell::kSplit * DividedCell::kSplit); - std::vector threads{}; + + ThreadPoolCallbackRunnerLocal runner(GlobalThreadPool::instance(), "PolygonDict"); for (size_t i = 0; i < DividedCell::kSplit; current_min_x += x_shift, ++i) { - auto handle_row = [this, &children, &y_shift, &x_shift, &possible_ids, &depth, i](Coord x, Coord y) + auto handle_row = [this, &children, &y_shift, &x_shift, &possible_ids, &depth, i, x = current_min_x, y = current_min_y]() mutable { for (size_t j = 0; j < DividedCell::kSplit; y += y_shift, ++j) { @@ -261,12 +263,11 @@ class GridRoot : public ICell } }; if (depth <= kMultiProcessingDepth) - threads.emplace_back(handle_row, current_min_x, current_min_y); + runner(std::move(handle_row)); else - handle_row(current_min_x, current_min_y); + handle_row(); } - for (auto & thread : threads) - thread.join(); + runner.waitForAllToFinishAndRethrowFirstError(); return std::make_unique>(std::move(children)); } From 61d4d9a46edadaf88faaa0bb5535f54fc8af51d5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Apr 2024 19:39:44 +0200 Subject: [PATCH 69/90] Update QueryAnalysisPass.cpp --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 44e0d2a33037..b8fa9277d5c3 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5632,7 +5632,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi argument_column.type = constant_node->getResultType(); argument_is_constant = true; } - else if(const auto * get_scalar_function_node = function_argument->as(); + else if (const auto * get_scalar_function_node = function_argument->as(); get_scalar_function_node && get_scalar_function_node->getFunctionName() == "__getScalar") { /// Allow constant folding through getScalar From b90eb1962f78019322dcf9f59f7a29a916d24b2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Apr 2024 19:56:30 +0200 Subject: [PATCH 70/90] Remove mentions of clean_deleted_rows from the documentation --- .../mergetree-family/replacingmergetree.md | 4 ++-- docs/en/operations/settings/merge-tree-settings.md | 10 ---------- .../mergetree-family/replacingmergetree.md | 3 +-- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 6de818c130f0..a6258bcd5817 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -25,7 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] -[SETTINGS name=value, clean_deleted_rows=value, ...] +[SETTINGS name=value, ...] ``` For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md). @@ -97,7 +97,7 @@ SELECT * FROM mySecondReplacingMT FINAL; :::note `is_deleted` can only be enabled when `ver` is used. -The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used, or if the engine setting `clean_deleted_rows` has been set to `Always`. +The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used. No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 9327d52227f9..76250b804765 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -852,16 +852,6 @@ If the file name for column is too long (more than `max_file_name_length` bytes) The maximal length of the file name to keep it as is without hashing. Takes effect only if setting `replace_long_file_name_to_hash` is enabled. The value of this setting does not include the length of file extension. So, it is recommended to set it below the maximum filename length (usually 255 bytes) with some gap to avoid filesystem errors. Default value: 127. -## clean_deleted_rows - -Enable/disable automatic deletion of rows flagged as `is_deleted` when perform `OPTIMIZE ... FINAL` on a table using the ReplacingMergeTree engine. When disabled, the `CLEANUP` keyword has to be added to the `OPTIMIZE ... FINAL` to have the same behaviour. - -Possible values: - -- `Always` or `Never`. - -Default value: `Never` - ## allow_experimental_block_number_column Persists virtual column `_block_number` on merges. diff --git a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md index e8089b2c42b7..a6493f20b6ee 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md @@ -99,10 +99,9 @@ SELECT * FROM mySecondReplacingMT FINAL; - при использовании инструкции `OPTIMIZE ... FINAL CLEANUP` - при использовании инструкции `OPTIMIZE ... FINAL` - - параметр движка `clean_deleted_rows` установлен в значение `Always` (по умолчанию - `Never`) - есть новые версии строки -Не рекомендуется выполнять `FINAL CLEANUP` или использовать параметр движка `clean_deleted_rows` со значением `Always`, это может привести к неожиданным результатам, например удаленные строки могут вновь появиться. +Не рекомендуется выполнять `FINAL CLEANUP`, это может привести к неожиданным результатам, например удаленные строки могут вновь появиться. Вне зависимости от производимых изменений над данными, версия должна увеличиваться. Если у двух строк одна и та же версия, то остается только последняя вставленная строка. ::: From 20e8b64cee607211078800a2306b82424cac6082 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 11 Apr 2024 19:47:42 +0200 Subject: [PATCH 71/90] fix --- src/Client/Connection.cpp | 2 +- src/Client/Connection.h | 2 +- src/Client/IServerConnection.h | 2 +- src/Client/LocalConnection.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 4e2456134793..483201509c4d 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -475,7 +475,7 @@ const String & Connection::getDefaultDatabase() const return default_database; } -const String & Connection::getDescription(bool with_extra) const +const String & Connection::getDescription(bool with_extra) const /// NOLINT { if (with_extra) return full_description; diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 71513ea919d6..9632eb9d948c 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -88,7 +88,7 @@ class Connection : public IServerConnection const String & getServerDisplayName(const ConnectionTimeouts & timeouts) override; /// For log and exception messages. - const String & getDescription(bool with_extra = false) const override; + const String & getDescription(bool with_extra = false) const override; /// NOLINT const String & getHost() const; UInt16 getPort() const; const String & getDefaultDatabase() const; diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 724afa95d7a6..e7376491f8ce 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -88,7 +88,7 @@ class IServerConnection : boost::noncopyable virtual const String & getServerTimezone(const ConnectionTimeouts & timeouts) = 0; virtual const String & getServerDisplayName(const ConnectionTimeouts & timeouts) = 0; - virtual const String & getDescription(bool with_extra = false) const = 0; + virtual const String & getDescription(bool with_extra = false) const = 0; /// NOLINT virtual std::vector> getPasswordComplexityRules() const = 0; diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 6218fbe341f4..ac5a05c3e92d 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -90,7 +90,7 @@ class LocalConnection : public IServerConnection, WithContext const String & getServerTimezone(const ConnectionTimeouts & timeouts) override; const String & getServerDisplayName(const ConnectionTimeouts & timeouts) override; - const String & getDescription([[maybe_unused]] bool with_extra = false) const override { return description; } + const String & getDescription([[maybe_unused]] bool with_extra = false) const override { return description; } /// NOLINT std::vector> getPasswordComplexityRules() const override { return {}; } From 83d6f2ef99efb9857bbff5163f4f76b4c586b7e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Apr 2024 21:51:05 +0200 Subject: [PATCH 72/90] Correct the documentation about duplicates with argmin and argmax --- docs/en/sql-reference/aggregate-functions/reference/argmax.md | 2 +- docs/en/sql-reference/aggregate-functions/reference/argmin.md | 2 +- docs/ru/sql-reference/aggregate-functions/reference/argmax.md | 2 +- docs/ru/sql-reference/aggregate-functions/reference/argmin.md | 2 +- docs/zh/sql-reference/aggregate-functions/reference/argmax.md | 2 +- docs/zh/sql-reference/aggregate-functions/reference/argmin.md | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 8f10318838bb..2274dd4a5dcf 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -5,7 +5,7 @@ sidebar_position: 106 # argMax -Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered. +Calculates the `arg` value for a maximum `val` value. If there are multiple rows with equal `val` being the maximum, which of the associated `arg` is returned is not deterministic. Both parts the `arg` and the `max` behave as [aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md), they both [skip `Null`](/docs/en/sql-reference/aggregate-functions/index.md#null-processing) during processing and return not `Null` values if not `Null` values are available. **Syntax** diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 616fa3d33b83..297744fb1dbb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -5,7 +5,7 @@ sidebar_position: 105 # argMin -Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered. +Calculates the `arg` value for a minimum `val` value. If there are multiple rows with equal `val` being the maximum, which of the associated `arg` is returned is not deterministic. Both parts the `arg` and the `min` behave as [aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md), they both [skip `Null`](/docs/en/sql-reference/aggregate-functions/index.md#null-processing) during processing and return not `Null` values if not `Null` values are available. **Syntax** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md index f459683c7c92..0489606b3c15 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md @@ -5,7 +5,7 @@ sidebar_position: 106 # argMax {#agg-function-argmax} -Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений. +Вычисляет значение `arg` при максимальном значении `val`. **Синтаксис** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md index 4d8bc47c42d9..d74661cf43c8 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md @@ -5,7 +5,7 @@ sidebar_position: 105 # argMin {#agg-function-argmin} -Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений. +Вычисляет значение `arg` при минимальном значении `val`. **Синтаксис** diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md index 6ae7155ca245..d1d7930867f6 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md @@ -5,7 +5,7 @@ sidebar_position: 106 # argMax {#agg-function-argmax} -计算 `val` 最大值对应的 `arg` 值。 如果 `val` 最大值存在几个不同的 `arg` 值,输出遇到的第一个值。 +计算 `val` 最大值对应的 `arg` 值。 **语法** diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md index cb21a13021bb..fb66075f2b02 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md @@ -7,7 +7,7 @@ sidebar_position: 105 语法: `argMin(arg, val)` 或 `argMin(tuple(arg, val))` -计算 `val` 最小值对应的 `arg` 值。 如果 `val` 最小值存在几个不同的 `arg` 值,输出遇到的第一个(`arg`)值。 +计算 `val` 最小值对应的 `arg` 值。 **示例:** From 8f4f3aaf14d0838cdcb566d2c88f003981987264 Mon Sep 17 00:00:00 2001 From: Joshua Hildred Date: Thu, 11 Apr 2024 12:56:50 -0700 Subject: [PATCH 73/90] Small code cleanup --- src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 63f8c4786cec..eff52f8e6fd0 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -603,14 +603,12 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont else return; - bool need_invert = (constant_value == 0); - const FunctionNode * child_function = is_lhs_const ? rhs->as() : lhs->as(); - if (!child_function || !isBooleanFunction(child_function->getFunctionName())) return; - if (need_invert) + // if we have something like `function = 0`, we need to add a `NOT` when dropping the `= 0` + if (constant_value == 0) { auto not_resolver = FunctionFactory::instance().get("not", getContext()); const auto not_node = std::make_shared("not"); From 3ce02239c6392822250894f9cca1bf7c434a9242 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 11 Apr 2024 23:12:11 +0200 Subject: [PATCH 74/90] Revert "More optimal loading of marks" --- src/Storages/MergeTree/MarkRange.cpp | 5 -- src/Storages/MergeTree/MarkRange.h | 1 - .../MergeTree/MergeTreeIndexReader.cpp | 3 - .../MergeTree/MergeTreeMarksLoader.cpp | 8 --- src/Storages/MergeTree/MergeTreeMarksLoader.h | 1 - .../MergeTree/MergeTreeReaderCompact.cpp | 1 - .../MergeTree/MergeTreeReaderStream.cpp | 72 +++++-------------- .../MergeTree/MergeTreeReaderStream.h | 38 +++------- .../MergeTree/MergeTreeReaderWide.cpp | 32 +++------ src/Storages/MergeTree/MergeTreeReaderWide.h | 1 - .../MergeTree/MergeTreeSequentialSource.cpp | 9 ++- .../test_merge_tree_load_marks/__init__.py | 0 .../configs/config.xml | 12 ---- .../test_merge_tree_load_marks/test.py | 62 ---------------- .../02532_send_logs_level_test.reference | 1 - .../0_stateless/02532_send_logs_level_test.sh | 2 +- 16 files changed, 41 insertions(+), 207 deletions(-) delete mode 100644 tests/integration/test_merge_tree_load_marks/__init__.py delete mode 100644 tests/integration/test_merge_tree_load_marks/configs/config.xml delete mode 100644 tests/integration/test_merge_tree_load_marks/test.py diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index c6e98b4e5a15..bd8546f04cc7 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -81,11 +81,6 @@ size_t MarkRanges::getNumberOfMarks() const return result; } -bool MarkRanges::isOneRangeForWholePart(size_t num_marks_in_part) const -{ - return size() == 1 && front().begin == 0 && front().end == num_marks_in_part; -} - void MarkRanges::serialize(WriteBuffer & out) const { writeBinaryLittleEndian(this->size(), out); diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h index f36d5d898254..1d9d0a1e27e8 100644 --- a/src/Storages/MergeTree/MarkRange.h +++ b/src/Storages/MergeTree/MarkRange.h @@ -36,7 +36,6 @@ struct MarkRanges : public std::deque using std::deque::deque; /// NOLINT(modernize-type-traits) size_t getNumberOfMarks() const; - bool isOneRangeForWholePart(size_t num_marks_in_part) const; void serialize(WriteBuffer & out) const; String describe() const; diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp index e7ae1fc5c13d..6012994b46d7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -31,8 +31,6 @@ std::unique_ptr makeIndexReader( load_marks_threadpool, /*num_columns_in_mark=*/ 1); - marks_loader->startAsyncLoad(); - return std::make_unique( part->getDataPartStoragePtr(), index->getFileName(), extension, marks_count, @@ -67,7 +65,6 @@ MergeTreeIndexReader::MergeTreeIndexReader( mark_cache, uncompressed_cache, std::move(settings)); - version = index_format.version; stream->adjustRightMark(getLastMark(all_mark_ranges_)); diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 1e9a320fa953..01ceb845951d 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -64,10 +64,6 @@ MergeTreeMarksLoader::MergeTreeMarksLoader( , read_settings(read_settings_) , num_columns_in_mark(num_columns_in_mark_) , load_marks_threadpool(load_marks_threadpool_) -{ -} - -void MergeTreeMarksLoader::startAsyncLoad() { if (load_marks_threadpool) future = loadMarksAsync(); @@ -106,8 +102,6 @@ MergeTreeMarksGetterPtr MergeTreeMarksLoader::loadMarks() MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() { - LOG_TEST(getLogger("MergeTreeMarksLoader"), "Loading marks from path {}", mrk_path); - /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; @@ -224,9 +218,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync() } } else - { loaded_marks = loadMarksImpl(); - } if (!loaded_marks) { diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.h b/src/Storages/MergeTree/MergeTreeMarksLoader.h index 2aa4474e1c5d..73dd462f2fa6 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.h +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.h @@ -50,7 +50,6 @@ class MergeTreeMarksLoader ~MergeTreeMarksLoader(); - void startAsyncLoad(); MergeTreeMarksGetterPtr loadMarks(); size_t getNumColumns() const { return num_columns_in_mark; } diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 8810491b62ec..a22bff6b8d22 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -48,7 +48,6 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( , profile_callback(profile_callback_) , clock_type(clock_type_) { - marks_loader->startAsyncLoad(); } void MergeTreeReaderCompact::fillColumnPositions() diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index 15ef02440cb3..40a16176c69f 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -13,7 +13,6 @@ namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; extern const int CANNOT_READ_ALL_DATA; - extern const int LOGICAL_ERROR; } MergeTreeReaderStream::MergeTreeReaderStream( @@ -42,17 +41,14 @@ MergeTreeReaderStream::MergeTreeReaderStream( { } -void MergeTreeReaderStream::loadMarks() -{ - if (!marks_getter) - marks_getter = marks_loader->loadMarks(); -} - void MergeTreeReaderStream::init() { if (initialized) return; + initialized = true; + marks_getter = marks_loader->loadMarks(); + /// Compute the size of the buffer. auto [max_mark_range_bytes, sum_mark_range_bytes] = estimateMarkRangeBytes(all_mark_ranges); @@ -114,15 +110,11 @@ void MergeTreeReaderStream::init() data_buffer = non_cached_buffer.get(); compressed_data_buffer = non_cached_buffer.get(); } - - initialized = true; } void MergeTreeReaderStream::seekToMarkAndColumn(size_t row_index, size_t column_position) { init(); - loadMarks(); - const auto & mark = marks_getter->getMark(row_index, column_position); try @@ -201,7 +193,7 @@ CompressedReadBufferBase * MergeTreeReaderStream::getCompressedDataBuffer() return compressed_data_buffer; } -size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) +size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) const { /// NOTE: if we are reading the whole file, then right_mark == marks_count /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. @@ -210,8 +202,7 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) if (marks_count == 0) return 0; - chassert(right_mark <= marks_count); - loadMarks(); + assert(right_mark <= marks_count); if (right_mark == 0) return marks_getter->getMark(right_mark, 0).offset_in_compressed_file; @@ -290,9 +281,9 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) return file_size; } -std::pair MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges) +std::pair MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const { - loadMarks(); + assert(marks_getter != nullptr); size_t max_range_bytes = 0; size_t sum_range_bytes = 0; @@ -311,34 +302,7 @@ std::pair MergeTreeReaderStreamSingleColumn::estimateMarkRangeBy return {max_range_bytes, sum_range_bytes}; } -size_t MergeTreeReaderStreamSingleColumnWholePart::getRightOffset(size_t right_mark) -{ - if (right_mark != marks_count) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected one right mark: {}, got: {}", - marks_count, right_mark); - } - return file_size; -} - -std::pair MergeTreeReaderStreamSingleColumnWholePart::estimateMarkRangeBytes(const MarkRanges & mark_ranges) -{ - if (!mark_ranges.isOneRangeForWholePart(marks_count)) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected one mark range that covers the whole part, got: {}", - mark_ranges.describe()); - } - return {file_size, file_size}; -} - -void MergeTreeReaderStreamSingleColumnWholePart::seekToMark(size_t) -{ - throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeReaderStreamSingleColumnWholePart cannot seek to marks"); -} - -size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) +size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const { /// NOTE: if we are reading the whole file, then right_mark == marks_count /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. @@ -347,8 +311,7 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ if (marks_count == 0) return 0; - chassert(right_mark_non_included <= marks_count); - loadMarks(); + assert(right_mark_non_included <= marks_count); if (right_mark_non_included == 0) return marks_getter->getMark(right_mark_non_included, column_position).offset_in_compressed_file; @@ -384,9 +347,9 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ } std::pair -MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) +MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const { - loadMarks(); + assert(marks_getter != nullptr); /// As a maximal range we return the maximal size of a whole stripe. size_t max_range_bytes = 0; @@ -423,9 +386,8 @@ MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const Mark return {max_range_bytes, sum_range_bytes}; } -MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position) +MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position) const { - loadMarks(); const auto & current_mark = marks_getter->getMark(row_index, column_position); if (marks_getter->getNumColumns() == 1) @@ -472,27 +434,27 @@ MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeM return marks_getter->getMark(mark_index + 1, column_position + 1); } -size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included) +size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const { return getRightOffsetOneColumn(right_mark_non_included, column_position); } -std::pair MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) +std::pair MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const { return estimateMarkRangeBytesOneColumn(mark_ranges, column_position); } -size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included) +size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const { return getRightOffsetOneColumn(right_mark_non_included, marks_loader->getNumColumns() - 1); } -std::pair MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) +std::pair MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const { size_t max_range_bytes = 0; size_t sum_range_bytes = 0; - for (size_t i = 0; i < marks_loader->getNumColumns(); ++i) + for (size_t i = 0; i < marks_getter->getNumColumns(); ++i) { auto [current_max, current_sum] = estimateMarkRangeBytesOneColumn(mark_ranges, i); diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h index 05341cd8acc4..f3ca6953ceb9 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.h +++ b/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -40,7 +40,6 @@ class MergeTreeReaderStream /// Seeks to exact mark in file. void seekToMarkAndColumn(size_t row_index, size_t column_position); - /// Seeks to the start of the file. void seekToStart(); /** @@ -54,11 +53,11 @@ class MergeTreeReaderStream private: /// Returns offset in file up to which it's needed to read file to read all rows up to @right_mark mark. - virtual size_t getRightOffset(size_t right_mark) = 0; + virtual size_t getRightOffset(size_t right_mark) const = 0; /// Returns estimated max amount of bytes to read among mark ranges (which is used as size for read buffer) /// and total amount of bytes to read in all mark ranges. - virtual std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) = 0; + virtual std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const = 0; const ReadBufferFromFileBase::ProfileCallback profile_callback; const clockid_t clock_type; @@ -81,7 +80,6 @@ class MergeTreeReaderStream protected: void init(); - void loadMarks(); const MergeTreeReaderSettings settings; const size_t marks_count; @@ -102,25 +100,11 @@ class MergeTreeReaderStreamSingleColumn : public MergeTreeReaderStream { } - size_t getRightOffset(size_t right_mark_non_included) override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; + size_t getRightOffset(size_t right_mark_non_included) const override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override; void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); } }; -class MergeTreeReaderStreamSingleColumnWholePart : public MergeTreeReaderStream -{ -public: - template - explicit MergeTreeReaderStreamSingleColumnWholePart(Args &&... args) - : MergeTreeReaderStream{std::forward(args)...} - { - } - - size_t getRightOffset(size_t right_mark_non_included) override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; - void seekToMark(size_t row_index) override; -}; - /// Base class for reading from file that contains multiple columns. /// It is used to read from compact parts. /// See more details about data layout in MergeTreeDataPartCompact.h. @@ -134,9 +118,9 @@ class MergeTreeReaderStreamMultipleColumns : public MergeTreeReaderStream } protected: - size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position); - std::pair estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position); - MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position); + size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const; + std::pair estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const; + MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position) const; }; /// Class for reading a single column from file that contains multiple columns @@ -151,8 +135,8 @@ class MergeTreeReaderStreamOneOfMultipleColumns : public MergeTreeReaderStreamMu { } - size_t getRightOffset(size_t right_mark_non_included) override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; + size_t getRightOffset(size_t right_mark_non_included) const override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override; void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, column_position); } private: @@ -170,8 +154,8 @@ class MergeTreeReaderStreamAllOfMultipleColumns : public MergeTreeReaderStreamMu { } - size_t getRightOffset(size_t right_mark_non_included) override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; + size_t getRightOffset(size_t right_mark_non_included) const override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override; void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); } }; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index d398668d5c83..394a22835f1f 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -43,7 +43,6 @@ MergeTreeReaderWide::MergeTreeReaderWide( mark_ranges_, settings_, avg_value_size_hints_) - , read_whole_part(all_mark_ranges.isOneRangeForWholePart(data_part_info_for_read->getMarksCount())) { try { @@ -228,13 +227,12 @@ void MergeTreeReaderWide::addStreams( auto context = data_part_info_for_read->getContext(); auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; - size_t num_marks_in_part = data_part_info_for_read->getMarksCount(); auto marks_loader = std::make_shared( data_part_info_for_read, mark_cache, data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name), - num_marks_in_part, + data_part_info_for_read->getMarksCount(), data_part_info_for_read->getIndexGranularityInfo(), settings.save_marks_in_cache, settings.read_settings, @@ -245,24 +243,11 @@ void MergeTreeReaderWide::addStreams( auto stream_settings = settings; stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; - auto create_stream = [&]() - { - return std::make_unique( - data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION, - num_marks_in_part, all_mark_ranges, stream_settings, - uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), - std::move(marks_loader), profile_callback, clock_type); - }; - - if (read_whole_part) - { - streams.emplace(*stream_name, create_stream.operator()()); - } - else - { - marks_loader->startAsyncLoad(); - streams.emplace(*stream_name, create_stream.operator()()); - } + streams.emplace(*stream_name, std::make_unique( + data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION, + data_part_info_for_read->getMarksCount(), all_mark_ranges, stream_settings, + uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), + std::move(marks_loader), profile_callback, clock_type)); }; serialization->enumerateStreams(callback); @@ -340,8 +325,7 @@ void MergeTreeReaderWide::prefetchForColumn( if (stream_name && !prefetched_streams.contains(*stream_name)) { - bool seek_to_mark = !continue_reading && !read_whole_part; - + bool seek_to_mark = !continue_reading; if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) { buf->prefetch(priority); @@ -365,7 +349,7 @@ void MergeTreeReaderWide::readData( deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { - bool seek_to_mark = !was_prefetched && !continue_reading && !read_whole_part; + bool seek_to_mark = !was_prefetched && !continue_reading; return getStream( /* seek_to_start = */false, substream_path, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index 7ffe565d2623..a9a5526dd65b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -73,7 +73,6 @@ class MergeTreeReaderWide : public IMergeTreeReader std::unordered_map caches; std::unordered_set prefetched_streams; ssize_t prefetched_from_mark = -1; - bool read_whole_part = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 47661a3ff93f..c022cfe38617 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -184,12 +184,12 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( storage_snapshot, *mark_ranges, /*virtual_fields=*/ {}, - /*uncompressed_cache=*/ {}, + /*uncompressed_cache=*/{}, mark_cache.get(), alter_conversions, reader_settings, - /*avg_value_size_hints=*/ {}, - /*profile_callback=*/ {}); + {}, + {}); } static void fillBlockNumberColumns( @@ -230,7 +230,6 @@ try const auto & header = getPort().getHeader(); /// Part level is useful for next step for merging non-merge tree table bool add_part_level = storage.merging_params.mode != MergeTreeData::MergingParams::Ordinary; - size_t num_marks_in_part = data_part->getMarksCount(); if (!isCancelled() && current_row < data_part->rows_count) { @@ -239,7 +238,7 @@ try const auto & sample = reader->getColumns(); Columns columns(sample.size()); - size_t rows_read = reader->readRows(current_mark, num_marks_in_part, continue_reading, rows_to_read, columns); + size_t rows_read = reader->readRows(current_mark, data_part->getMarksCount(), continue_reading, rows_to_read, columns); if (rows_read) { diff --git a/tests/integration/test_merge_tree_load_marks/__init__.py b/tests/integration/test_merge_tree_load_marks/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/integration/test_merge_tree_load_marks/configs/config.xml b/tests/integration/test_merge_tree_load_marks/configs/config.xml deleted file mode 100644 index 1c9ee8d698f1..000000000000 --- a/tests/integration/test_merge_tree_load_marks/configs/config.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - system -
text_log
- 7500 - 1048576 - 8192 - 524288 - false - test - - diff --git a/tests/integration/test_merge_tree_load_marks/test.py b/tests/integration/test_merge_tree_load_marks/test.py deleted file mode 100644 index b066b2a6ec0d..000000000000 --- a/tests/integration/test_merge_tree_load_marks/test.py +++ /dev/null @@ -1,62 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) - -node = cluster.add_instance( - "node", - main_configs=["configs/config.xml"], -) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -# This test is bad and it should be a functional test but S3 metrics -# are accounted incorrectly for merges in part_log and query_log. -# Also we have text_log with level 'trace' in functional tests -# but this test requeires text_log with level 'test'. - - -@pytest.mark.parametrize("min_bytes_for_wide_part", [0, 1000000000]) -def test_merge_load_marks(started_cluster, min_bytes_for_wide_part): - node.query( - f""" - DROP TABLE IF EXISTS t_load_marks; - - CREATE TABLE t_load_marks (a UInt64, b UInt64) - ENGINE = MergeTree ORDER BY a - SETTINGS min_bytes_for_wide_part = {min_bytes_for_wide_part}; - - INSERT INTO t_load_marks SELECT number, number FROM numbers(1000); - INSERT INTO t_load_marks SELECT number, number FROM numbers(1000); - - OPTIMIZE TABLE t_load_marks FINAL; - SYSTEM FLUSH LOGS; - """ - ) - - uuid = node.query( - "SELECT uuid FROM system.tables WHERE table = 't_prewarm_merge'" - ).strip() - - result = node.query( - f""" - SELECT count() - FROM system.text_log - WHERE (query_id LIKE '%{uuid}::all_1_2_1%') AND (message LIKE '%Loading marks%') - """ - ).strip() - - result = int(result) - - is_wide = min_bytes_for_wide_part == 0 - not_loaded = result == 0 - - assert is_wide == not_loaded diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.reference b/tests/queries/0_stateless/02532_send_logs_level_test.reference index 7e51b888d9c7..dbd49cfc0a41 100644 --- a/tests/queries/0_stateless/02532_send_logs_level_test.reference +++ b/tests/queries/0_stateless/02532_send_logs_level_test.reference @@ -1,3 +1,2 @@ - MergeTreeMarksLoader: Loading marks from path data.cmrk3 MergeTreeRangeReader: First reader returned: num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), requested columns: key MergeTreeRangeReader: read() returned num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), sample block key diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 4afc6d4496bd..f65d8705569b 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -nm -q " drop table if exists data; - create table data (key Int) engine=MergeTree order by tuple() settings min_bytes_for_wide_part = '1G', compress_marks = 1; + create table data (key Int) engine=MergeTree order by tuple(); insert into data values (1); " From 361d73f7a4baa2df437c524964483a886465f140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 12 Apr 2024 00:07:53 +0200 Subject: [PATCH 75/90] Try to fix Bugfix validation job --- docker/test/stateless/run.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 271f30d187b5..898d82655004 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -16,8 +16,9 @@ ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone dpkg -i package_folder/clickhouse-common-static_*.deb dpkg -i package_folder/clickhouse-common-static-dbg_*.deb -dpkg -i package_folder/clickhouse-odbc-bridge_*.deb -dpkg -i package_folder/clickhouse-library-bridge_*.deb +# Accept failure in the next 2 commands until 23.4 is released (for compatibility and Bugfix validation run) +dpkg -i package_folder/clickhouse-odbc-bridge_*.deb || true +dpkg -i package_folder/clickhouse-library-bridge_*.deb || true dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb From 9664fb92499f3f4dfc8591baa98b70c007191825 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Apr 2024 00:44:26 +0200 Subject: [PATCH 76/90] Highlight only when necessary --- src/Client/ClientBaseHelpers.cpp | 1 + src/Parsers/IParser.cpp | 3 +++ src/Parsers/IParser.h | 1 + 3 files changed, 5 insertions(+) diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index b1d29b34ffc6..8310aa67c226 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -128,6 +128,7 @@ void highlight(const String & query, std::vector & colors Tokens tokens(begin, end, 1000, true); IParser::Pos token_iterator(tokens, static_cast(1000), static_cast(10000)); Expected expected; + expected.enable_highlighting = true; /// We don't do highlighting for foreign dialects, such as PRQL and Kusto. /// Only normal ClickHouse SQL queries are highlighted. diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp index eb4ddfa01d24..ddd210b01ece 100644 --- a/src/Parsers/IParser.cpp +++ b/src/Parsers/IParser.cpp @@ -44,6 +44,9 @@ static bool intersects(T a_begin, T a_end, T b_begin, T b_end) void Expected::highlight(HighlightedRange range) { + if (!enable_highlighting) + return; + auto it = highlights.lower_bound(range); while (it != highlights.end() && range.begin < it->end) { diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index f8146c0a4f6d..0ae862fee756 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -56,6 +56,7 @@ struct Expected absl::InlinedVector variants; const char * max_parsed_pos = nullptr; + bool enable_highlighting = false; std::set highlights; /// 'description' should be statically allocated string. From 20db642e7194cd985f9b7340c9cebf01fd0f0cff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Apr 2024 00:46:55 +0200 Subject: [PATCH 77/90] Fix a test --- .../0_stateless/01676_clickhouse_client_autocomplete.python | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index 13160d4e561c..c62d2e9d98a2 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -128,6 +128,7 @@ if __name__ == "__main__": clickhouse_client = os.environ["CLICKHOUSE_CLIENT"] args = shlex.split(clickhouse_client) args.append("--wait_for_suggestions_to_load") + args.append("--highlight=0") [ run_with_timeout( test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS @@ -139,6 +140,7 @@ if __name__ == "__main__": clickhouse_local = os.environ["CLICKHOUSE_LOCAL"] args = shlex.split(clickhouse_local) args.append("--wait_for_suggestions_to_load") + args.append("--highlight=0") [ run_with_timeout( test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS From d903e189c43380e6e1956b9787d9dd2f3189c9a1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Apr 2024 01:01:04 +0200 Subject: [PATCH 78/90] Update docker/test/stateless/run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 898d82655004..9497b7ecc197 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -16,7 +16,7 @@ ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone dpkg -i package_folder/clickhouse-common-static_*.deb dpkg -i package_folder/clickhouse-common-static-dbg_*.deb -# Accept failure in the next 2 commands until 23.4 is released (for compatibility and Bugfix validation run) +# Accept failure in the next two commands until 24.4 is released (for compatibility and Bugfix validation run) dpkg -i package_folder/clickhouse-odbc-bridge_*.deb || true dpkg -i package_folder/clickhouse-library-bridge_*.deb || true dpkg -i package_folder/clickhouse-server_*.deb From 13283abce6e1c25e16bcc684ad3ffee9cd419251 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Apr 2024 03:47:29 +0200 Subject: [PATCH 79/90] Apply review comments --- src/Coordination/KeeperAsynchronousMetrics.cpp | 2 +- src/Coordination/KeeperAsynchronousMetrics.h | 2 +- src/Interpreters/ServerAsynchronousMetrics.cpp | 4 ++-- src/Interpreters/ServerAsynchronousMetrics.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp index a5b4bc4af97c..86166ffe31bd 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.cpp +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -114,7 +114,7 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM } KeeperAsynchronousMetrics::KeeperAsynchronousMetrics( - ContextPtr context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) + ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_), context(std::move(context_)) { } diff --git a/src/Coordination/KeeperAsynchronousMetrics.h b/src/Coordination/KeeperAsynchronousMetrics.h index 33e8d6818d76..ec0e60cbb6ec 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.h +++ b/src/Coordination/KeeperAsynchronousMetrics.h @@ -13,7 +13,7 @@ class KeeperAsynchronousMetrics : public AsynchronousMetrics { public: KeeperAsynchronousMetrics( - ContextPtr context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_); + ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_); ~KeeperAsynchronousMetrics() override; private: diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 7703a3521303..4f8b03a5eaa9 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -53,8 +53,8 @@ void calculateMaxAndSum(Max & max, Sum & sum, T x) ServerAsynchronousMetrics::ServerAsynchronousMetrics( ContextPtr global_context_, - int update_period_seconds, - int heavy_metrics_update_period_seconds, + unsigned update_period_seconds, + unsigned heavy_metrics_update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) : WithContext(global_context_) , AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_) diff --git a/src/Interpreters/ServerAsynchronousMetrics.h b/src/Interpreters/ServerAsynchronousMetrics.h index b0cf8efbfd7d..e3c83dc748e4 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.h +++ b/src/Interpreters/ServerAsynchronousMetrics.h @@ -12,8 +12,8 @@ class ServerAsynchronousMetrics : WithContext, public AsynchronousMetrics public: ServerAsynchronousMetrics( ContextPtr global_context_, - int update_period_seconds, - int heavy_metrics_update_period_seconds, + unsigned update_period_seconds, + unsigned heavy_metrics_update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_); ~ServerAsynchronousMetrics() override; From 7cd3c86d6ed25f56f614fc95a20c430fb2e1f2bf Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Apr 2024 12:36:12 +0200 Subject: [PATCH 80/90] Add missing leftXYZ and rightXYZ functions --- .../functions/string-functions.md | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index d4df3e0479a8..68b139a892c0 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -102,6 +102,100 @@ Alias: - `CHAR_LENGTH` - `CHARACTER_LENGTH` +## left + +Returns the substring of a string `s` which starts at the specified byte index `offset` from the left. + +**Syntax** + +``` sql +left(s, offset) +``` + +**Parameters** + +- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). + +**Returned value** + +- For positive `offset`: A substring of `s` with `offset` many bytes, starting from the left of the string. +- For negative `offset`: A substring of `s` with `length(s) - |offset|` bytes, starting from the left of the string. +- An empty string if `length` is 0. + +**Example** + +Query: + +```sql +SELECT left('Hello', 3); +``` + +Result: + +```response +Hel +``` + +Query: + +```sql +SELECT left('Hello', -3); +``` + +Result: + +```response +He +``` + +## leftUTF8 + +Returns the substring of a UTF-8 encoded string `s` which starts at the specified byte index `offset` from the left. + +**Syntax** + +``` sql +leftUTF8(s, offset) +``` + +**Parameters** + +- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). + +**Returned value** + +- For positive `offset`: A substring of `s` with `offset` many bytes, starting from the left of the string. +- For negative `offset`: A substring of `s` with `length(s) - |offset|` bytes, starting from the left of the string. +- An empty string if `length` is 0. + +**Example** + +Query: + +```sql +SELECT leftUTF8('Привет', 4); +``` + +Result: + +```response +Прив +``` + +Query: + +```sql +SELECT leftUTF8('Привет', -4); +``` + +Result: + +```response +Пр +``` + ## leftPad Pads a string from the left with spaces or with a specified string (multiple times, if needed) until the resulting string reaches the specified `length`. @@ -176,6 +270,100 @@ Result: └─────────────────────────────┴────────────────────────┘ ``` +## right + +Returns the substring of a string `s` which starts at the specified byte index `offset` from the right. + +**Syntax** + +``` sql +right(s, offset) +``` + +**Parameters** + +- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). + +**Returned value** + +- For positive `offset`: A substring of `s` with `offset` many bytes, starting from the right of the string. +- For negative `offset`: A substring of `s` with `length(s) - |offset|` bytes, starting from the right of the string. +- An empty string if `length` is 0. + +**Example** + +Query: + +```sql +SELECT right('Hello', 3); +``` + +Result: + +```response +llo +``` + +Query: + +```sql +SELECT right('Hello', -3); +``` + +Result: + +```response +lo +``` + +## rightUTF8 + +Returns the substring of a UTF-8 encoded string `s` which starts at the specified byte index `offset` from the right. + +**Syntax** + +``` sql +rightUTF8(s, offset) +``` + +**Parameters** + +- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). + +**Returned value** + +- For positive `offset`: A substring of `s` with `offset` many bytes, starting from the right of the string. +- For negative `offset`: A substring of `s` with `length(s) - |offset|` bytes, starting from the right of the string. +- An empty string if `length` is 0. + +**Example** + +Query: + +```sql +SELECT rightUTF8('Привет', 4); +``` + +Result: + +```response +ивет +``` + +Query: + +```sql +SELECT rightUTF8('Привет', -4); +``` + +Result: + +```response +ет +``` + ## rightPad Pads a string from the right with spaces or with a specified string (multiple times, if needed) until the resulting string reaches the specified `length`. From b2a83d54f328f7c81405bfd67275af378fa5a680 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Apr 2024 12:41:45 +0200 Subject: [PATCH 81/90] Make descriptions less ambiguous --- docs/en/sql-reference/functions/string-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 68b139a892c0..92f0889563ba 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -104,7 +104,7 @@ Alias: ## left -Returns the substring of a string `s` which starts at the specified byte index `offset` from the left. +Returns a substring of string `s` with a specified `offset` starting from the left. **Syntax** @@ -151,7 +151,7 @@ He ## leftUTF8 -Returns the substring of a UTF-8 encoded string `s` which starts at the specified byte index `offset` from the left. +Returns a substring of a UTF-8 encoded string `s` with a specified `offset` starting from the left. **Syntax** @@ -272,7 +272,7 @@ Result: ## right -Returns the substring of a string `s` which starts at the specified byte index `offset` from the right. +Returns a substring of string `s` with a specified `offset` starting from the right. **Syntax** @@ -319,7 +319,7 @@ lo ## rightUTF8 -Returns the substring of a UTF-8 encoded string `s` which starts at the specified byte index `offset` from the right. +Returns a substring of UTF-8 encoded string `s` with a specified `offset` starting from the right. **Syntax** From 0d2e0e3131b9d97f03a13d9e7bb2f7208262d410 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 12 Apr 2024 11:11:49 +0000 Subject: [PATCH 82/90] CI: MQ sync status check fix --- tests/ci/sync_pr.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index acff7ba541bb..7240a07fb6e3 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -4,6 +4,7 @@ import argparse import sys +import time from get_robot_token import get_best_robot_token from pr_info import PRInfo @@ -53,12 +54,36 @@ def merge_sync_pr(gh, sync_pr): def set_sync_status(gh, pr_info, sync_pr): - if not sync_pr or not sync_pr.mergeable: + if not sync_pr: post_commit_status( - get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync" + get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR not found", "A Sync" ) - else: + return + + retries = 0 + while sync_pr.mergeable_state == "unknown" and retries < 3: + retries += 1 + print(f"Unknown status. Trying to fetch again [{retries}/3]") + time.sleep(5) + sync_pr = gh.get_pulls_from_search( + query=f"head:sync-upstream/pr/{sync_pr.number} org:ClickHouse type:pr", + repo="ClickHouse/clickhouse-private", + ) + + if sync_pr.mergeable_state == "clean": + print(f"Sync PR [{sync_pr.number}] is clean") post_commit_status(get_commit(gh, pr_info.sha), SUCCESS, "", "", "A Sync") + else: + print( + f"Sync PR [{sync_pr}] is not mergeable, state [{sync_pr.mergeable_state}]" + ) + post_commit_status( + get_commit(gh, pr_info.sha), + FAILURE, + "", + f"state: {sync_pr.mergeable_state}", + "A Sync", + ) def main(): From 6fff5723b78b972687a5c64c10f7658617317a9c Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:31:34 +0200 Subject: [PATCH 83/90] Add leftUTF and rightUTF to aspell-dict --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 697f93f78c9b..5d1d2b650fc8 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1833,6 +1833,7 @@ laravel largestTriangleThreeBuckets latencies ldap +leftUTF leftPad leftPadUTF lemmatization @@ -2306,6 +2307,7 @@ retriable reverseUTF rightPad rightPadUTF +rightUTF risc riscv ro From 3f10530c2871c1b8a8d2ee9001c76560f93e92ac Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 12 Apr 2024 11:23:43 +0000 Subject: [PATCH 84/90] remove A sync check from MQ for a while --- tests/ci/sync_pr.py | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index 7240a07fb6e3..0e1ab2994b9a 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -4,13 +4,12 @@ import argparse import sys -import time from get_robot_token import get_best_robot_token from pr_info import PRInfo from github_helper import GitHub from commit_status_helper import get_commit, post_commit_status -from report import FAILURE, SUCCESS +from report import SUCCESS def parse_args() -> argparse.Namespace: @@ -54,21 +53,23 @@ def merge_sync_pr(gh, sync_pr): def set_sync_status(gh, pr_info, sync_pr): + # FIXME: uncomment posting red Sync status to prohibit merge in MQ if PR state fetching works good if not sync_pr: - post_commit_status( - get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR not found", "A Sync" - ) + # post_commit_status( + # get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR not found", "A Sync" + # ) return - retries = 0 - while sync_pr.mergeable_state == "unknown" and retries < 3: - retries += 1 - print(f"Unknown status. Trying to fetch again [{retries}/3]") - time.sleep(5) - sync_pr = gh.get_pulls_from_search( - query=f"head:sync-upstream/pr/{sync_pr.number} org:ClickHouse type:pr", - repo="ClickHouse/clickhouse-private", - ) + # FIXME: fetch sync pr in a proper way + # retries = 0 + # while sync_pr.mergeable_state == "unknown" and retries < 3: + # retries += 1 + # print(f"Unknown status. Trying to fetch again [{retries}/3]") + # time.sleep(5) + # sync_pr = gh.get_pulls_from_search( + # query=f"head:sync-upstream/pr/{sync_pr.number} org:ClickHouse type:pr", + # repo="ClickHouse/clickhouse-private", + # ) if sync_pr.mergeable_state == "clean": print(f"Sync PR [{sync_pr.number}] is clean") @@ -77,13 +78,13 @@ def set_sync_status(gh, pr_info, sync_pr): print( f"Sync PR [{sync_pr}] is not mergeable, state [{sync_pr.mergeable_state}]" ) - post_commit_status( - get_commit(gh, pr_info.sha), - FAILURE, - "", - f"state: {sync_pr.mergeable_state}", - "A Sync", - ) + # post_commit_status( + # get_commit(gh, pr_info.sha), + # FAILURE, + # "", + # f"state: {sync_pr.mergeable_state}", + # "A Sync", + # ) def main(): From 2d6c51578a092c1b97d95bdbf3805950f50b7234 Mon Sep 17 00:00:00 2001 From: peter279k Date: Fri, 12 Apr 2024 19:47:38 +0800 Subject: [PATCH 85/90] Add truncate and trunc functions usage --- .../functions/rounding-functions.md | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 3ede66cf3166..74b68a038204 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -26,6 +26,37 @@ Returns the smallest round number that is greater than or equal to `x`. In every Returns the round number with largest absolute value that has an absolute value less than or equal to `x`‘s. In every other way, it is the same as the ’floor’ function (see above). +**Syntax** + +```sql +trunc(input, precision) +``` + +Alias: `truncate`. + +**Parameters** + +- `input`: A float type [Float](/docs/en/sql-reference/data-types/float.md). +- `precision`: A decimal type [Decimal](/docs/en/sql-reference/data-types/decimal.md). + +**Returned value** + +- A [Float64](/docs/en/sql-reference/data-types/float.md) value. + +**Example** + +Query: + +```sql +SELECT trunc(123.45, 1) as res; +``` + +```response +┌───res─┐ +│ 123.4 │ +└───────┘ +``` + ## round(x\[, N\]) Rounds a value to a specified number of decimal places. From 85cdecb12328860aea3a3bd14013657354027a8f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:57:42 +0200 Subject: [PATCH 86/90] Add a more illustrative example --- docs/en/sql-reference/functions/rounding-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 74b68a038204..1ffe7807e4f4 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -48,7 +48,7 @@ Alias: `truncate`. Query: ```sql -SELECT trunc(123.45, 1) as res; +SELECT trunc(123.499, 1) as res; ``` ```response From 81f97921ce71892224ce2c181f075bf2701e0fd7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 12 Apr 2024 14:17:08 +0200 Subject: [PATCH 87/90] Add requirement for ccache/sccache into dev docs --- docs/en/development/developer-instruction.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 42c7e5ac2957..763485331790 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -83,11 +83,17 @@ ClickHouse uses CMake and Ninja for building. - Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks. -To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build`. +- ccache - a compiler cache. It speeds up recompilation by caching previous compilations and detecting when the same compilation is being done again. -On CentOS, RedHat run `sudo yum install cmake ninja-build`. +:::tip +As an alternative for ccache a distributed [sccache](https://github.com/mozilla/sccache) could be used. The system will chose it with higher priority. +::: + +To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build ccache`. + +On CentOS, RedHat run `sudo yum install cmake ninja-build ccache`. -If you use Arch or Gentoo, you probably know it yourself how to install CMake. +If you use Arch or Gentoo, you probably know it yourself how to install CMake and others. ## C++ Compiler {#c-compiler} From e00e0ad65199db0534381aeb9c279b7091a684c4 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 12 Apr 2024 14:32:59 +0200 Subject: [PATCH 88/90] Fix a false statement --- docs/en/development/developer-instruction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 763485331790..6623c46fa9fa 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -86,7 +86,7 @@ ClickHouse uses CMake and Ninja for building. - ccache - a compiler cache. It speeds up recompilation by caching previous compilations and detecting when the same compilation is being done again. :::tip -As an alternative for ccache a distributed [sccache](https://github.com/mozilla/sccache) could be used. The system will chose it with higher priority. +As an alternative for ccache a distributed [sccache](https://github.com/mozilla/sccache) could be used. To prefer it, `-DCOMPILER_CACHE=sccache` CMake flag should be used. ::: To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build ccache`. From c10055eb788271fe0097fb5f1bd20dcd01c8442f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 12 Apr 2024 14:34:19 +0200 Subject: [PATCH 89/90] Add sccache to the spelling dictionary --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 9f7776f5201d..d191eb8b9ce1 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2348,6 +2348,7 @@ rw sasl satisfiable scala +sccache schemas seekable seektable From e2ff2f8a1fd2a660208111b6ba83849b24fefc5d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 14 Apr 2024 10:55:52 +0300 Subject: [PATCH 90/90] JOIN filter push down right stream filled crash fix --- .../Optimizations/filterPushDown.cpp | 2 +- ...er_push_down_right_stream_filled.reference | 0 ...n_filter_push_down_right_stream_filled.sql | 25 +++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.reference create mode 100644 tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.sql diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index ebf780bb692e..5eab5e8f4a46 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -363,7 +363,7 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: JoinKind::Left); } - if (join_filter_push_down_actions.right_stream_filter_to_push_down) + if (join_filter_push_down_actions.right_stream_filter_to_push_down && allow_push_down_to_right) { updated_steps += addNewFilterStepOrThrow(parent_node, nodes, diff --git a/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.reference b/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.sql b/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.sql new file mode 100644 index 000000000000..4ce7657e1487 --- /dev/null +++ b/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS t1__fuzz_0; +CREATE TABLE t1__fuzz_0 +( + `x` UInt8, + `str` String +) +ENGINE = MergeTree ORDER BY x; + +INSERT INTO t1__fuzz_0 SELECT number, toString(number) FROM numbers(10); + +DROP TABLE IF EXISTS left_join__fuzz_2; +CREATE TABLE left_join__fuzz_2 +( + `x` UInt32, + `s` LowCardinality(String) +) ENGINE = Join(`ALL`, LEFT, x); + +INSERT INTO left_join__fuzz_2 SELECT number, toString(number) FROM numbers(10); + +SELECT 14 FROM t1__fuzz_0 LEFT JOIN left_join__fuzz_2 USING (x) +WHERE pointInPolygon(materialize((-inf, 1023)), [(5, 0.9998999834060669), (1.1920928955078125e-7, 100.0000991821289), (1.000100016593933, 100.0000991821289)]) +ORDER BY toNullable('202.79.32.10') DESC NULLS LAST, toNullable(toLowCardinality(toUInt256(14))) ASC, x DESC NULLS LAST; + +DROP TABLE t1__fuzz_0; +DROP TABLE left_join__fuzz_2;