From ee2ec2f1c29144053dd8a4543a885f929aec6dc5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 2 Apr 2024 13:39:58 +0000
Subject: [PATCH 01/90] Remove double-conversion submodule

---
 .gitmodules               | 3 ---
 contrib/double-conversion | 1 -
 2 files changed, 4 deletions(-)
 delete mode 160000 contrib/double-conversion

diff --git a/.gitmodules b/.gitmodules
index a618104f3642..b6a5c5824b8b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -22,9 +22,6 @@
 [submodule "contrib/capnproto"]
 	path = contrib/capnproto
 	url = https://github.com/ClickHouse/capnproto
-[submodule "contrib/double-conversion"]
-	path = contrib/double-conversion
-	url = https://github.com/google/double-conversion
 [submodule "contrib/re2"]
 	path = contrib/re2
 	url = https://github.com/google/re2
diff --git a/contrib/double-conversion b/contrib/double-conversion
deleted file mode 160000
index cf2f0f3d547d..000000000000
--- a/contrib/double-conversion
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit cf2f0f3d547dc73b4612028a155b80536902ba02

From 732c215a27c02d66bea48c27c8bab6ebfbf4b5a4 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 2 Apr 2024 13:40:40 +0000
Subject: [PATCH 02/90] Add ClickHouse double-conversion submodule

---
 .gitmodules               | 3 +++
 contrib/double-conversion | 1 +
 2 files changed, 4 insertions(+)
 create mode 160000 contrib/double-conversion

diff --git a/.gitmodules b/.gitmodules
index b6a5c5824b8b..6c8de2fce645 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -366,3 +366,6 @@
 [submodule "contrib/idna"]
 	path = contrib/idna
 	url = https://github.com/ada-url/idna.git
+[submodule "contrib/double-conversion"]
+	path = contrib/double-conversion
+	url = https://github.com/ClickHouse/double-conversion.git
diff --git a/contrib/double-conversion b/contrib/double-conversion
new file mode 160000
index 000000000000..cf2f0f3d547d
--- /dev/null
+++ b/contrib/double-conversion
@@ -0,0 +1 @@
+Subproject commit cf2f0f3d547dc73b4612028a155b80536902ba02

From 7d87adc91ac3941deb0fa94dbe6d8237c35434ad Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 2 Apr 2024 13:51:36 +0000
Subject: [PATCH 03/90] Upgrade double-conversion to 3.3.0

---
 contrib/double-conversion-cmake/CMakeLists.txt | 17 +++++++++--------
 src/IO/ReadHelpers.h                           |  1 -
 src/IO/readFloatText.h                         |  1 -
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/contrib/double-conversion-cmake/CMakeLists.txt b/contrib/double-conversion-cmake/CMakeLists.txt
index dc5b1719abfe..4bea86985a1a 100644
--- a/contrib/double-conversion-cmake/CMakeLists.txt
+++ b/contrib/double-conversion-cmake/CMakeLists.txt
@@ -1,14 +1,15 @@
 SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/double-conversion")
 
 add_library(_double-conversion
-"${LIBRARY_DIR}/double-conversion/bignum.cc"
-"${LIBRARY_DIR}/double-conversion/bignum-dtoa.cc"
-"${LIBRARY_DIR}/double-conversion/cached-powers.cc"
-"${LIBRARY_DIR}/double-conversion/diy-fp.cc"
-"${LIBRARY_DIR}/double-conversion/double-conversion.cc"
-"${LIBRARY_DIR}/double-conversion/fast-dtoa.cc"
-"${LIBRARY_DIR}/double-conversion/fixed-dtoa.cc"
-"${LIBRARY_DIR}/double-conversion/strtod.cc")
+    "${LIBRARY_DIR}/double-conversion/bignum-dtoa.cc"
+    "${LIBRARY_DIR}/double-conversion/bignum.cc"
+    "${LIBRARY_DIR}/double-conversion/cached-powers.cc"
+    "${LIBRARY_DIR}/double-conversion/double-to-string.cc"
+    "${LIBRARY_DIR}/double-conversion/fast-dtoa.cc"
+    "${LIBRARY_DIR}/double-conversion/fixed-dtoa.cc"
+    "${LIBRARY_DIR}/double-conversion/string-to-double.cc"
+    "${LIBRARY_DIR}/double-conversion/strtod.cc"
+)
 
 target_include_directories(_double-conversion SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}")
 
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index ca568c469b40..36831fd11711 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -41,7 +41,6 @@
 #include <IO/VarInt.h>
 
 #include <pcg_random.hpp>
-#include <double-conversion/double-conversion.h>
 
 static constexpr auto DEFAULT_MAX_STRING_SIZE = 1_GiB;
 
diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h
index 597f0a06fb96..d1652784cc2a 100644
--- a/src/IO/readFloatText.h
+++ b/src/IO/readFloatText.h
@@ -4,7 +4,6 @@
 #include <Core/Defines.h>
 #include <base/shift10.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <double-conversion/double-conversion.h>
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunneeded-internal-declaration"

From 60392f7b9d1ab445090844c3448d97262581ed49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 2 Apr 2024 16:24:35 +0200
Subject: [PATCH 04/90] Add test for #35215

---
 .../0_stateless/03033_virtual_column_override.reference        | 1 +
 tests/queries/0_stateless/03033_virtual_column_override.sql    | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 tests/queries/0_stateless/03033_virtual_column_override.reference
 create mode 100644 tests/queries/0_stateless/03033_virtual_column_override.sql

diff --git a/tests/queries/0_stateless/03033_virtual_column_override.reference b/tests/queries/0_stateless/03033_virtual_column_override.reference
new file mode 100644
index 000000000000..d00491fd7e5b
--- /dev/null
+++ b/tests/queries/0_stateless/03033_virtual_column_override.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/03033_virtual_column_override.sql b/tests/queries/0_stateless/03033_virtual_column_override.sql
new file mode 100644
index 000000000000..49258bbb5337
--- /dev/null
+++ b/tests/queries/0_stateless/03033_virtual_column_override.sql
@@ -0,0 +1,3 @@
+DROP TABLE IF EXISTS override_test;
+CREATE TABLE override_test (_part UInt32) ENGINE = MergeTree ORDER BY tuple() AS SELECT 1;
+SELECT _part FROM override_test;

From 427ad784e8a07c204492006e47446d0c3deff76d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 2 Apr 2024 16:04:24 +0000
Subject: [PATCH 05/90] Actually bump the submodule

---
 contrib/double-conversion | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/double-conversion b/contrib/double-conversion
index cf2f0f3d547d..4f7a25d8ced8 160000
--- a/contrib/double-conversion
+++ b/contrib/double-conversion
@@ -1 +1 @@
-Subproject commit cf2f0f3d547dc73b4612028a155b80536902ba02
+Subproject commit 4f7a25d8ced8c7cf6eee6fd09d6788eaa23c9afe

From 98ac8031e09eb45ac63b51f467b99f73fc8accaa Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 3 Apr 2024 19:49:23 +0200
Subject: [PATCH 06/90] add fault injection

---
 docker/test/stress/run.sh                     |  1 +
 programs/server/Server.cpp                    |  4 +++
 src/Common/ThreadPool.cpp                     | 30 +++++++++++++++++++
 src/Common/ThreadPool.h                       | 15 ++++++++++
 src/Core/ServerSettings.h                     |  1 +
 .../cannot_allocate_thread_injection.xml      |  3 ++
 tests/config/install.sh                       |  6 ++++
 7 files changed, 60 insertions(+)
 create mode 100644 tests/config/config.d/cannot_allocate_thread_injection.xml

diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 6c6caf872e9b..81cc61c90bc4 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -215,6 +215,7 @@ stop_server
 export USE_S3_STORAGE_FOR_MERGE_TREE=1
 export RANDOMIZE_OBJECT_KEY_TYPE=1
 export ZOOKEEPER_FAULT_INJECTION=1
+export THREAD_POOL_FAULT_INJECTION=1
 configure
 
 # But we still need default disk because some tables loaded only into it
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 450e1696c115..071847c34582 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1569,6 +1569,8 @@ try
                     new_server_settings.http_connections_store_limit,
                 });
 
+            CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
+
             ProfileEvents::increment(ProfileEvents::MainConfigLoads);
 
             /// Must be the last.
@@ -2058,6 +2060,8 @@ try
         startup_watch.stop();
         ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds());
 
+        CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability);
+
         try
         {
             global_context->startClusterDiscovery();
diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp
index 3c2e6228421e..1adf716be24c 100644
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@@ -202,6 +202,9 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, Priority priority, std:
         /// Check if there are enough threads to process job.
         if (threads.size() < std::min(max_threads, scheduled_jobs + 1))
         {
+            if (CannotAllocateThreadFaultInjector::injectFault())
+                return on_error("fault injected");
+
             try
             {
                 threads.emplace_front();
@@ -541,3 +544,30 @@ void GlobalThreadPool::shutdown()
         the_instance->finalize();
     }
 }
+
+CannotAllocateThreadFaultInjector & CannotAllocateThreadFaultInjector::instance()
+{
+    static CannotAllocateThreadFaultInjector ins;
+    return ins;
+}
+
+void CannotAllocateThreadFaultInjector::setFaultProbability(double probability)
+{
+    auto & ins = instance();
+    std::lock_guard lock(ins.mutex);
+    ins.enabled = 0 < probability && probability <= 1;
+    if (ins.enabled)
+        ins.random.emplace(probability);
+    else
+        ins.random.reset();
+}
+
+bool CannotAllocateThreadFaultInjector::injectFault()
+{
+    auto & ins = instance();
+    if (!ins.enabled.load(std::memory_order_relaxed))
+        return false;
+
+    std::lock_guard lock(ins.mutex);
+    return ins.random && (*ins.random)(ins.rndgen);
+}
diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h
index 31e4eabf63b8..191a8f6271d7 100644
--- a/src/Common/ThreadPool.h
+++ b/src/Common/ThreadPool.h
@@ -10,8 +10,10 @@
 #include <optional>
 #include <atomic>
 #include <stack>
+#include <random>
 
 #include <boost/heap/priority_queue.hpp>
+#include <pcg_random.hpp>
 
 #include <Poco/Event.h>
 #include <Common/ThreadStatus.h>
@@ -324,3 +326,16 @@ using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl<true>;
 /// To make sure the tracing context is correctly propagated, we explicitly disable context propagation(including initialization and de-initialization) at underlying worker level.
 ///
 using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPoolNoTracingContextPropagation>;
+
+/// Enables fault injections globally for all thread pools
+class CannotAllocateThreadFaultInjector
+{
+    std::atomic_bool enabled = false;
+    std::mutex mutex;
+    pcg64_fast rndgen;
+    std::optional<std::bernoulli_distribution> random;
+    static CannotAllocateThreadFaultInjector & instance();
+public:
+    static void setFaultProbability(double probability);
+    static bool injectFault();
+};
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 6608a35a5a2c..8fbf4749d499 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -41,6 +41,7 @@ namespace DB
     M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
     M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
     M(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \
+    M(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \
     M(Int32, max_connections, 1024, "Max server connections.", 0) \
     M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
     M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
diff --git a/tests/config/config.d/cannot_allocate_thread_injection.xml b/tests/config/config.d/cannot_allocate_thread_injection.xml
new file mode 100644
index 000000000000..42bc0589b115
--- /dev/null
+++ b/tests/config/config.d/cannot_allocate_thread_injection.xml
@@ -0,0 +1,3 @@
+<clickhouse>
+    <cannot_allocate_thread_fault_injection_probability>0.01</cannot_allocate_thread_fault_injection_probability>
+</clickhouse>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 652d25a0a35b..5da64bf4e696 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -132,6 +132,12 @@ else
     ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/
 fi
 
+if [[ -n "$THREAD_POOL_FAULT_INJECTION" ]] && [[ "$THREAD_POOL_FAULT_INJECTION" -eq 1 ]]; then
+    ln -sf $SRC_PATH/config.d/cannot_allocate_thread_injection.xml $DEST_SERVER_PATH/config.d/
+else
+    rm -f $DEST_SERVER_PATH/config.d/cannot_allocate_thread_injection.xml ||:
+fi
+
 # We randomize creating the snapshot on exit for Keeper to test out using older snapshots
 value=$(($RANDOM % 2))
 sed --follow-symlinks -i "s|<create_snapshot_on_exit>[01]</create_snapshot_on_exit>|<create_snapshot_on_exit>$value</create_snapshot_on_exit>|" $DEST_SERVER_PATH/config.d/keeper_port.xml

From c53b20a77070841289c018c66ae806cc74db832e Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 3 Apr 2024 20:57:12 +0200
Subject: [PATCH 07/90] fix

---
 programs/server/Server.cpp                        |  3 ++-
 src/Common/AsyncLoader.cpp                        |  1 +
 src/Common/ThreadPool.cpp                         | 15 +++++++++++++++
 src/Common/ThreadPool.h                           |  5 +++++
 src/Storages/MergeTree/MergeTreeData.cpp          |  2 ++
 ...1947_multiple_pipe_read_sample_data_ZbApel.tsv |  7 +++++++
 tests/clickhouse-test                             |  1 +
 7 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 071847c34582..f918826130fb 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1569,7 +1569,8 @@ try
                     new_server_settings.http_connections_store_limit,
                 });
 
-            CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
+            if (global_context->isServerCompletelyStarted())
+                CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
 
             ProfileEvents::increment(ProfileEvents::MainConfigLoads);
 
diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp
index 3bec30893b9c..9607333b9f74 100644
--- a/src/Common/AsyncLoader.cpp
+++ b/src/Common/AsyncLoader.cpp
@@ -873,6 +873,7 @@ void AsyncLoader::spawn(Pool & pool, std::unique_lock<std::mutex> & lock)
         ALLOW_ALLOCATIONS_IN_SCOPE;
         if (log_events)
             LOG_DEBUG(log, "Spawn loader worker #{} in {}", pool.workers, pool.name);
+        auto blocker = CannotAllocateThreadFaultInjector::blockFaultInjections();
         pool.thread_pool->scheduleOrThrowOnError([this, &pool] { worker(pool); });
     });
 }
diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp
index 1adf716be24c..0877f8aa55e1 100644
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@@ -568,6 +568,21 @@ bool CannotAllocateThreadFaultInjector::injectFault()
     if (!ins.enabled.load(std::memory_order_relaxed))
         return false;
 
+    if (ins.block_fault_injections)
+        return false;
+
     std::lock_guard lock(ins.mutex);
     return ins.random && (*ins.random)(ins.rndgen);
 }
+
+thread_local bool CannotAllocateThreadFaultInjector::block_fault_injections = false;
+
+scope_guard CannotAllocateThreadFaultInjector::blockFaultInjections()
+{
+    auto & ins = instance();
+    if (!ins.enabled.load(std::memory_order_relaxed))
+        return {};
+
+    ins.block_fault_injections = true;
+    return [&ins](){ ins.block_fault_injections = false; };
+}
diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h
index 191a8f6271d7..7591832bbff9 100644
--- a/src/Common/ThreadPool.h
+++ b/src/Common/ThreadPool.h
@@ -334,8 +334,13 @@ class CannotAllocateThreadFaultInjector
     std::mutex mutex;
     pcg64_fast rndgen;
     std::optional<std::bernoulli_distribution> random;
+
+    static thread_local bool block_fault_injections;
+
     static CannotAllocateThreadFaultInjector & instance();
 public:
     static void setFaultProbability(double probability);
     static bool injectFault();
+
+    static scope_guard blockFaultInjections();
 };
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 8faed72b198d..2db360f91838 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1909,6 +1909,8 @@ try
     auto runner = threadPoolCallbackRunner<void>(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts");
     std::vector<std::future<void>> parts_futures;
 
+    auto blocker = CannotAllocateThreadFaultInjector::blockFaultInjections();
+
     while (true)
     {
         ThreadFuzzer::maybeInjectSleep();
diff --git a/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv b/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv
new file mode 100644
index 000000000000..ab35653b8ddd
--- /dev/null
+++ b/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv
@@ -0,0 +1,7 @@
+0	BBB
+1	BBB
+2	BBB
+3	BBB
+4	AAA
+5	BBB
+6	AAA
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 624512058bcf..e461942114ba 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -907,6 +907,7 @@ class MergeTreeSettingsRandomizer:
             1, 32 * 1024 * 1024
         ),
         "cache_populated_by_fetch": lambda: random.randint(0, 1),
+        "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100)
     }
 
     @staticmethod

From ea16070117b2535d180ec5dc1d6edffa0b77b767 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 3 Apr 2024 19:17:46 +0000
Subject: [PATCH 08/90] Automatic style fix

---
 tests/clickhouse-test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e461942114ba..9cfd087bd672 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -907,7 +907,7 @@ class MergeTreeSettingsRandomizer:
             1, 32 * 1024 * 1024
         ),
         "cache_populated_by_fetch": lambda: random.randint(0, 1),
-        "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100)
+        "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100),
     }
 
     @staticmethod

From 724f9b8a3eb3a72845928e5ec2d21018cf0cc62c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 4 Apr 2024 20:14:33 +0200
Subject: [PATCH 09/90] Update ReadBufferFromPocoSocket.cpp

---
 src/IO/ReadBufferFromPocoSocket.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index d399721d0603..26cdee4140c8 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -83,21 +83,21 @@ bool ReadBufferFromPocoSocket::nextImpl()
     }
     catch (const Poco::Net::NetException & e)
     {
-        throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), peer_address.toString());
+        throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket (peer: {}, local: {})", e.displayText(), peer_address.toString(), socket.address().toString());
     }
     catch (const Poco::TimeoutException &)
     {
-        throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded while reading from socket ({}, {} ms)",
-            peer_address.toString(),
+        throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded while reading from socket (peer: {}, local: {}, {} ms)",
+            peer_address.toString(), socket.address().toString(),
             socket.impl()->getReceiveTimeout().totalMilliseconds());
     }
     catch (const Poco::IOException & e)
     {
-        throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), peer_address.toString());
+        throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket (peer: {}, local: {})", e.displayText(), peer_address.toString(), socket.address().toString());
     }
 
     if (bytes_read < 0)
-        throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket ({})", peer_address.toString());
+        throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket (peer: {}, local: {})", peer_address.toString(), socket.address().toString());
 
     if (read_event != ProfileEvents::end())
         ProfileEvents::increment(read_event, bytes_read);

From 5a897bc43e6fbf83ad8143400adf6944c04616dc Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 4 Apr 2024 20:16:26 +0200
Subject: [PATCH 10/90] Update Connection.cpp

---
 src/Client/Connection.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 180942e6b838..e5ac7ad66b9d 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -195,6 +195,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
         out = std::make_shared<WriteBufferFromPocoSocket>(*socket);
         out->setAsyncCallback(async_callback);
         connected = true;
+        setDescription();
 
         sendHello();
         receiveHello(timeouts.handshake_timeout);
@@ -1225,6 +1226,12 @@ void Connection::setDescription()
         if (host != ip_address)
             description += ", " + ip_address;
     }
+
+    if (const auto * socket_ = getSocket())
+    {
+        description += ", local address: ";
+        description += socket_->address().toString();
+    }
 }
 
 
From 5db9fbed52c8c3f31b202fdd68d2d0117541d31e Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 4 Apr 2024 22:32:57 +0200
Subject: [PATCH 11/90] cancel tasks on exception

---
 src/Backups/BackupEntriesCollector.cpp        |  15 +-
 src/Backups/BackupIO_AzureBlobStorage.cpp     |   8 +-
 src/Backups/BackupIO_S3.cpp                   |  10 +-
 src/Backups/RestorerFromBackup.cpp            |   2 +-
 src/Common/ThreadPool.cpp                     |   6 +-
 src/Common/ThreadPoolTaskTracker.cpp          |   4 +-
 src/Common/ThreadPoolTaskTracker.h            |   6 +-
 src/Common/threadPoolCallbackRunner.h         | 135 +++++++++++++-
 src/Databases/DatabaseReplicated.cpp          |  15 +-
 src/Disks/IO/ThreadPoolReader.cpp             |   2 +-
 src/Disks/IO/ThreadPoolRemoteFSReader.cpp     |   2 +-
 .../IO/WriteBufferFromAzureBlobStorage.cpp    |   2 +-
 .../IO/WriteBufferFromAzureBlobStorage.h      |   2 +-
 .../ObjectStorageIteratorAsync.h              |   4 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |   8 +-
 src/Formats/FormatFactory.cpp                 |   2 +-
 .../copyAzureBlobStorageFile.cpp              |   8 +-
 .../copyAzureBlobStorageFile.h                |   4 +-
 src/IO/ParallelReadBuffer.cpp                 |   4 +-
 src/IO/ParallelReadBuffer.h                   |   6 +-
 src/IO/S3/copyS3File.cpp                      |  12 +-
 src/IO/S3/copyS3File.h                        |   4 +-
 src/IO/WriteBufferFromS3.cpp                  |   2 +-
 src/IO/WriteBufferFromS3.h                    |   2 +-
 src/IO/tests/gtest_writebuffer_s3.cpp         |   4 +-
 src/Interpreters/AsynchronousInsertQueue.cpp  |  17 +-
 src/Storages/MergeTree/MergeTreeData.cpp      | 171 +++++++-----------
 .../MergeTree/MergeTreeMarksLoader.cpp        |   2 +-
 .../MergeTree/MergeTreePrefetchedReadPool.cpp |   2 +-
 src/Storages/MergeTree/MergeTreeSource.cpp    |   4 +-
 src/Storages/StorageAzureBlob.cpp             |   2 +-
 src/Storages/StorageAzureBlob.h               |   2 +-
 src/Storages/StorageBuffer.cpp                |   9 +-
 src/Storages/StorageDistributed.cpp           |  39 ++--
 src/Storages/StorageS3.cpp                    |  10 +-
 src/Storages/StorageS3.h                      |   2 +-
 src/Storages/StorageURL.cpp                   |   1 -
 .../System/StorageSystemDetachedParts.cpp     |  24 +--
 ..._multiple_pipe_read_sample_data_ZbApel.tsv |   7 -
 tests/clickhouse-test                         |   1 +
 40 files changed, 303 insertions(+), 259 deletions(-)
 delete mode 100644 tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv

diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp
index c71ce195388c..016190535a96 100644
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@@ -786,20 +786,15 @@ void BackupEntriesCollector::makeBackupEntriesForTablesData()
     if (backup_settings.structure_only)
         return;
 
-    std::vector<std::future<void>> futures;
+    ThreadPoolCallbackRunnerLocal<void> runner(threadpool, "BackupCollect");
     for (const auto & table_name : table_infos | boost::adaptors::map_keys)
     {
-        futures.push_back(scheduleFromThreadPool<void>([&]()
+        runner([&]()
         {
             makeBackupEntriesForTableData(table_name);
-        }, threadpool, "BackupCollect"));
-    }
-    /// Wait for all tasks.
-    for (auto & future : futures)
-        future.wait();
-    /// Make sure there is no exception.
-    for (auto & future : futures)
-        future.get();
+        });
+    }
+    runner.waitForAllToFinishAndRethrowFirstError();
 }
 
 void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableName & table_name)
diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index b9b208e321cf..fac6e441e5af 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -121,7 +121,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
                 /* dest_path */ blob_path[0],
                 settings,
                 read_settings,
-                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"),
+                threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"),
                 /* for_disk_azure_blob_storage= */ true);
 
             return file_size;
@@ -178,7 +178,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu
                 fs::path(configuration.blob_path) / path_in_backup,
                 settings,
                 read_settings,
-                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
+                threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
             return; /// copied!
         }
     }
@@ -201,14 +201,14 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
        /* dest_path */ destination,
        settings,
        read_settings,
-       threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"),
+       threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"),
        /* for_disk_azure_blob_storage= */ true);
 }
 
 void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
     copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings,
-                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
+                                   threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
 }
 
 BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 2063af2061cc..70e50e443c1b 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -191,7 +191,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                 read_settings,
                 blob_storage_log,
                 object_attributes,
-                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupReaderS3"),
+                threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupReaderS3"),
                 /* for_disk_s3= */ true);
 
             return file_size;
@@ -259,7 +259,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
                 read_settings,
                 blob_storage_log,
                 {},
-                threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
+                threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
             return; /// copied!
         }
     }
@@ -283,14 +283,14 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
         read_settings,
         blob_storage_log,
         {},
-        threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
+        threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
 void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
     copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup,
                      s3_settings.request_settings, blob_storage_log, {},
-                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
+                     threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
 }
 
 BackupWriterS3::~BackupWriterS3() = default;
@@ -325,7 +325,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
         s3_settings.request_settings,
         blob_storage_log,
         std::nullopt,
-        threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"),
+        threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"),
         write_settings);
 }
 
diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp
index ed1d5b8a103d..6504444feff2 100644
--- a/src/Backups/RestorerFromBackup.cpp
+++ b/src/Backups/RestorerFromBackup.cpp
@@ -231,7 +231,7 @@ void RestorerFromBackup::schedule(std::function<void()> && task_, const char * t
 
     checkIsQueryCancelled();
 
-    auto future = scheduleFromThreadPool<void>(
+    auto future = scheduleFromThreadPoolUnsafe<void>(
         [this, task = std::move(task_)]() mutable
         {
             if (exception_caught)
diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp
index 0877f8aa55e1..b778362f4904 100644
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@@ -183,6 +183,9 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, Priority priority, std:
     {
         std::unique_lock lock(mutex);
 
+        if (CannotAllocateThreadFaultInjector::injectFault())
+            return on_error("fault injected");
+
         auto pred = [this] { return !queue_size || scheduled_jobs < queue_size || shutdown; };
 
         if (wait_microseconds)  /// Check for optional. Condition is true if the optional is set and the value is zero.
@@ -202,9 +205,6 @@ ReturnType ThreadPoolImpl<Thread>::scheduleImpl(Job job, Priority priority, std:
         /// Check if there are enough threads to process job.
         if (threads.size() < std::min(max_threads, scheduled_jobs + 1))
         {
-            if (CannotAllocateThreadFaultInjector::injectFault())
-                return on_error("fault injected");
-
             try
             {
                 threads.emplace_front();
diff --git a/src/Common/ThreadPoolTaskTracker.cpp b/src/Common/ThreadPoolTaskTracker.cpp
index 10207eb62967..61d34801f7a5 100644
--- a/src/Common/ThreadPoolTaskTracker.cpp
+++ b/src/Common/ThreadPoolTaskTracker.cpp
@@ -10,7 +10,7 @@ namespace ProfileEvents
 namespace DB
 {
 
-TaskTracker::TaskTracker(ThreadPoolCallbackRunner<void> scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_)
+TaskTracker::TaskTracker(ThreadPoolCallbackRunnerUnsafe<void> scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_)
     : is_async(bool(scheduler_))
     , scheduler(scheduler_ ? std::move(scheduler_) : syncRunner())
     , max_tasks_inflight(max_tasks_inflight_)
@@ -22,7 +22,7 @@ TaskTracker::~TaskTracker()
     safeWaitAll();
 }
 
-ThreadPoolCallbackRunner<void> TaskTracker::syncRunner()
+ThreadPoolCallbackRunnerUnsafe<void> TaskTracker::syncRunner()
 {
     return [](Callback && callback, int64_t) mutable -> std::future<void>
     {
diff --git a/src/Common/ThreadPoolTaskTracker.h b/src/Common/ThreadPoolTaskTracker.h
index 72591648d304..84bc3344fe34 100644
--- a/src/Common/ThreadPoolTaskTracker.h
+++ b/src/Common/ThreadPoolTaskTracker.h
@@ -23,10 +23,10 @@ class TaskTracker
 public:
     using Callback = std::function<void()>;
 
-    TaskTracker(ThreadPoolCallbackRunner<void> scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_);
+    TaskTracker(ThreadPoolCallbackRunnerUnsafe<void> scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_);
     ~TaskTracker();
 
-    static ThreadPoolCallbackRunner<void> syncRunner();
+    static ThreadPoolCallbackRunnerUnsafe<void> syncRunner();
 
     bool isAsync() const;
 
@@ -50,7 +50,7 @@ class TaskTracker
     void collectFinishedFutures(bool propagate_exceptions) TSA_REQUIRES(mutex);
 
     const bool is_async;
-    ThreadPoolCallbackRunner<void> scheduler;
+    ThreadPoolCallbackRunnerUnsafe<void> scheduler;
     const size_t max_tasks_inflight;
 
     using FutureList = std::list<std::future<void>>;
diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h
index 6f7892ae4bb4..cec07bbd8922 100644
--- a/src/Common/threadPoolCallbackRunner.h
+++ b/src/Common/threadPoolCallbackRunner.h
@@ -11,11 +11,16 @@ namespace DB
 
 /// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously.
 template <typename Result, typename Callback = std::function<Result()>>
-using ThreadPoolCallbackRunner = std::function<std::future<Result>(Callback &&, Priority)>;
+using ThreadPoolCallbackRunnerUnsafe = std::function<std::future<Result>(Callback &&, Priority)>;
+
+/// NOTE When using ThreadPoolCallbackRunnerUnsafe you MUST ensure that all async tasks are finished
+/// before any objects they may use are destroyed.
+/// A common mistake is capturing some some local objects in lambda and passing it to the runner.
+/// In case of exception, these local objects will be destroyed before scheduled tasks are finished.
 
 /// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrowOnError()'.
 template <typename Result, typename Callback = std::function<Result()>>
-ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name)
+ThreadPoolCallbackRunnerUnsafe<Result, Callback> threadPoolCallbackRunnerUnsafe(ThreadPool & pool, const std::string & thread_name)
 {
     return [my_pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](Callback && callback, Priority priority) mutable -> std::future<Result>
     {
@@ -54,10 +59,132 @@ ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool &
 }
 
 template <typename Result, typename T>
-std::future<Result> scheduleFromThreadPool(T && task, ThreadPool & pool, const std::string & thread_name, Priority priority = {})
+std::future<Result> scheduleFromThreadPoolUnsafe(T && task, ThreadPool & pool, const std::string & thread_name, Priority priority = {})
 {
-    auto schedule = threadPoolCallbackRunner<Result, T>(pool, thread_name);
+    auto schedule = threadPoolCallbackRunnerUnsafe<Result, T>(pool, thread_name);
     return schedule(std::move(task), priority); /// NOLINT
 }
 
+/// NOTE It's still not completely safe.
+/// When creating a runner on stack, you MUST make sure that it's created (and destroyed) before local objects captured by task lambda.
+
+template <typename Result, typename Callback = std::function<Result()>>
+class ThreadPoolCallbackRunnerLocal
+{
+    ThreadPool & pool;
+    std::string thread_name;
+
+    enum TaskState
+    {
+        SCHEDULED = 0,
+        RUNNING = 1,
+        FINISHED = 2,
+        CANCELLED = 3,
+    };
+
+    struct Task
+    {
+        std::future<Result> future;
+        std::atomic<TaskState> state = SCHEDULED;
+    };
+
+    /// NOTE It will leak for a global object with long lifetime
+    std::vector<std::shared_ptr<Task>> tasks;
+
+    void cancelScheduledTasks()
+    {
+        for (auto & task : tasks)
+        {
+            TaskState expected = SCHEDULED;
+            task->state.compare_exchange_strong(expected, CANCELLED);
+        }
+    }
+
+public:
+    ThreadPoolCallbackRunnerLocal(ThreadPool & pool_, const std::string & thread_name_)
+        : pool(pool_)
+        , thread_name(thread_name_)
+    {
+    }
+
+    ~ThreadPoolCallbackRunnerLocal()
+    {
+        cancelScheduledTasks();
+        waitForAllToFinish();
+    }
+
+    void operator() (Callback && callback, Priority priority = {})
+    {
+        auto & task = tasks.emplace_back(std::make_shared<Task>());
+
+        auto task_func = std::make_shared<std::packaged_task<Result()>>(
+        [task, thread_group = CurrentThread::getGroup(), my_thread_name = thread_name, my_callback = std::move(callback)]() mutable -> Result
+        {
+            TaskState expected = SCHEDULED;
+            if (!task->state.compare_exchange_strong(expected, RUNNING))
+            {
+                if (expected == CANCELLED)
+                    return;
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state {} when running a task in {}", expected, my_thread_name);
+            }
+
+            SCOPE_EXIT_SAFE(
+            {
+                expected = RUNNING;
+                if (!task->state.compare_exchange_strong(expected, FINISHED))
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state {} when finishing a task in {}", expected, my_thread_name);
+            });
+
+            if (thread_group)
+                CurrentThread::attachToGroup(thread_group);
+
+            SCOPE_EXIT_SAFE(
+            {
+                {
+                    /// Release all captured resources before detaching thread group
+                    /// Releasing has to use proper memory tracker which has been set here before callback
+
+                    [[maybe_unused]] auto tmp = std::move(my_callback);
+                }
+
+                if (thread_group)
+                    CurrentThread::detachFromGroupIfNotDetached();
+            });
+
+            setThreadName(my_thread_name.data());
+
+            return my_callback();
+        });
+
+        task->future = task_func->get_future();
+
+        /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority".
+        /// Note: calling method scheduleOrThrowOnError in intentional, because we don't want to throw exceptions
+        /// in critical places where this callback runner is used (e.g. loading or deletion of parts)
+        pool.scheduleOrThrowOnError([my_task = std::move(task_func)]{ (*my_task)(); }, priority);
+    }
+
+    void waitForAllToFinish()
+    {
+        for (const auto & task : tasks)
+        {
+            TaskState state = task->state;
+            /// It can be cancelled only when waiting in dtor
+            if (state == CANCELLED)
+                continue;
+            task->future.wait();
+        }
+    }
+
+    void waitForAllToFinishAndRethrowFirstError()
+    {
+        waitForAllToFinish();
+        for (auto & task : tasks)
+            task->future.get();
+
+        tasks.clear();
+    }
+
+};
+
 }
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 59b3e52e139e..80281d5d2bcb 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1098,8 +1098,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
     auto allow_concurrent_table_creation = getContext()->getServerSettings().max_database_replicated_create_table_thread_pool_size > 1;
     auto tables_to_create_by_level = tables_dependencies.getTablesSplitByDependencyLevel();
 
-    auto create_tables_runner = threadPoolCallbackRunner<void>(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables");
-    std::vector<std::future<void>> create_table_futures;
+    ThreadPoolCallbackRunnerLocal<void> runner(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables");
 
     for (const auto & tables_to_create : tables_to_create_by_level)
     {
@@ -1131,20 +1130,12 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
             };
 
             if (allow_concurrent_table_creation)
-                create_table_futures.push_back(create_tables_runner(task, Priority{0}));
+                runner(std::move(task));
             else
                 task();
         }
 
-        /// First wait for all tasks to finish.
-        for (auto & future : create_table_futures)
-            future.wait();
-
-        /// Now rethrow the first exception if any.
-        for (auto & future : create_table_futures)
-            future.get();
-
-        create_table_futures.clear();
+        runner.waitForAllToFinishAndRethrowFirstError();
     }
     LOG_INFO(log, "All tables are created successfully");
 
diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp
index bb295643726c..4713e20ccc87 100644
--- a/src/Disks/IO/ThreadPoolReader.cpp
+++ b/src/Disks/IO/ThreadPoolReader.cpp
@@ -203,7 +203,7 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
 
     ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMiss);
 
-    auto schedule = threadPoolCallbackRunner<Result>(*pool, "ThreadPoolRead");
+    auto schedule = threadPoolCallbackRunnerUnsafe<Result>(*pool, "ThreadPoolRead");
 
     return schedule([request, fd]() -> Result
     {
diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
index 590fc4c4656b..2df087e941f9 100644
--- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
+++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
@@ -106,7 +106,7 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
     }
 
     ProfileEventTimeIncrement<Microseconds> elapsed(ProfileEvents::ThreadpoolReaderSubmit);
-    return scheduleFromThreadPool<Result>(
+    return scheduleFromThreadPoolUnsafe<Result>(
         [request, this]() -> Result { return execute(request, /*seek_performed=*/true); }, *pool, "VFSRead", request.priority);
 }
 
diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp
index 05b93dd1fa34..d407ec59394c 100644
--- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp
+++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp
@@ -44,7 +44,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage(
     size_t buf_size_,
     const WriteSettings & write_settings_,
     std::shared_ptr<const AzureObjectStorageSettings> settings_,
-    ThreadPoolCallbackRunner<void> schedule_)
+    ThreadPoolCallbackRunnerUnsafe<void> schedule_)
     : WriteBufferFromFileBase(buf_size_, nullptr, 0)
     , log(getLogger("WriteBufferFromAzureBlobStorage"))
     , buffer_allocation_policy(createBufferAllocationPolicy(*settings_))
diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h
index 6e10c07b255b..0989eb7bfb0a 100644
--- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h
+++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h
@@ -36,7 +36,7 @@ class WriteBufferFromAzureBlobStorage : public WriteBufferFromFileBase
         size_t buf_size_,
         const WriteSettings & write_settings_,
         std::shared_ptr<const AzureObjectStorageSettings> settings_,
-        ThreadPoolCallbackRunner<void> schedule_ = {});
+        ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {});
 
     ~WriteBufferFromAzureBlobStorage() override;
 
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
index 5f63e5f6e8a5..7fdb02bdfe2b 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
@@ -19,7 +19,7 @@ class IObjectStorageIteratorAsync : public IObjectStorageIterator
         CurrentMetrics::Metric threads_scheduled_metric,
         const std::string & thread_name)
         : list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1)
-        , list_objects_scheduler(threadPoolCallbackRunner<BatchAndHasNext>(list_objects_pool, thread_name))
+        , list_objects_scheduler(threadPoolCallbackRunnerUnsafe<BatchAndHasNext>(list_objects_pool, thread_name))
     {
     }
 
@@ -53,7 +53,7 @@ class IObjectStorageIteratorAsync : public IObjectStorageIterator
 
     mutable std::recursive_mutex mutex;
     ThreadPool list_objects_pool;
-    ThreadPoolCallbackRunner<BatchAndHasNext> list_objects_scheduler;
+    ThreadPoolCallbackRunnerUnsafe<BatchAndHasNext> list_objects_scheduler;
     std::future<BatchAndHasNext> outcome_future;
     RelativePathsWithMetadata current_batch;
     RelativePathsWithMetadata::iterator current_batch_iterator;
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index b343b73f7bd6..77dd93395ba2 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -247,9 +247,9 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files");
 
     auto settings_ptr = s3_settings.get();
-    ThreadPoolCallbackRunner<void> scheduler;
+    ThreadPoolCallbackRunnerUnsafe<void> scheduler;
     if (write_settings.s3_allow_parallel_part_upload)
-        scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "VFSWrite");
+        scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "VFSWrite");
 
 
     auto blob_storage_log = BlobStorageLogWriter::create(disk_name);
@@ -461,7 +461,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
         auto current_client = dest_s3->client.get();
         auto settings_ptr = s3_settings.get();
         auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
-        auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
+        auto scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "S3ObjStor_copy");
         try {
             copyS3File(
                 current_client,
@@ -503,7 +503,7 @@ void S3ObjectStorage::copyObject( // NOLINT
     auto current_client = client.get();
     auto settings_ptr = s3_settings.get();
     auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
-    auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
+    auto scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "S3ObjStor_copy");
     copyS3File(current_client,
         uri.bucket,
         object_from.remote_path,
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 8cbb1b9e5639..3dccd8c6b319 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -437,7 +437,7 @@ std::unique_ptr<ReadBuffer> FormatFactory::wrapReadBufferIfNeeded(
             settings.max_download_buffer_size);
 
         res = wrapInParallelReadBufferIfSupported(
-            buf, threadPoolCallbackRunner<void>(getIOThreadPool().get(), "ParallelRead"),
+            buf, threadPoolCallbackRunnerUnsafe<void>(getIOThreadPool().get(), "ParallelRead"),
             max_download_threads, settings.max_download_buffer_size, file_size);
     }
 
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
index 4714c7959278..ef8c01f4b5ec 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
@@ -45,7 +45,7 @@ namespace
             const String & dest_container_for_logging_,
             const String & dest_blob_,
             std::shared_ptr<const AzureObjectStorageSettings> settings_,
-            ThreadPoolCallbackRunner<void> schedule_,
+            ThreadPoolCallbackRunnerUnsafe<void> schedule_,
             bool for_disk_azure_blob_storage_,
             const Poco::Logger * log_)
             : create_read_buffer(create_read_buffer_)
@@ -72,7 +72,7 @@ namespace
         const String & dest_container_for_logging;
         const String & dest_blob;
         std::shared_ptr<const AzureObjectStorageSettings> settings;
-        ThreadPoolCallbackRunner<void> schedule;
+        ThreadPoolCallbackRunnerUnsafe<void> schedule;
         bool for_disk_azure_blob_storage;
         const Poco::Logger * log;
         size_t max_single_part_upload_size;
@@ -269,7 +269,7 @@ void copyDataToAzureBlobStorageFile(
     const String & dest_container_for_logging,
     const String & dest_blob,
     std::shared_ptr<const AzureObjectStorageSettings> settings,
-    ThreadPoolCallbackRunner<void> schedule,
+    ThreadPoolCallbackRunnerUnsafe<void> schedule,
     bool for_disk_azure_blob_storage)
 {
     UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyDataToAzureBlobStorageFile")};
@@ -288,7 +288,7 @@ void copyAzureBlobStorageFile(
     const String & dest_blob,
     std::shared_ptr<const AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
-    ThreadPoolCallbackRunner<void> schedule,
+    ThreadPoolCallbackRunnerUnsafe<void> schedule,
     bool for_disk_azure_blob_storage)
 {
 
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index 1433f8d18ba0..170a3d7f6aae 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -31,7 +31,7 @@ void copyAzureBlobStorageFile(
     const String & dest_blob,
     std::shared_ptr<const AzureObjectStorageSettings> settings,
     const ReadSettings & read_settings,
-    ThreadPoolCallbackRunner<void> schedule_ = {},
+    ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {},
     bool for_disk_azure_blob_storage = false);
 
 
@@ -48,7 +48,7 @@ void copyDataToAzureBlobStorageFile(
     const String & dest_container_for_logging,
     const String & dest_blob,
     std::shared_ptr<const AzureObjectStorageSettings> settings,
-    ThreadPoolCallbackRunner<void> schedule_ = {},
+    ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {},
     bool for_disk_azure_blob_storage = false);
 
 }
diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp
index cdeb8a186351..5718830db645 100644
--- a/src/IO/ParallelReadBuffer.cpp
+++ b/src/IO/ParallelReadBuffer.cpp
@@ -42,7 +42,7 @@ struct ParallelReadBuffer::ReadWorker
 };
 
 ParallelReadBuffer::ParallelReadBuffer(
-    SeekableReadBuffer & input_, ThreadPoolCallbackRunner<void> schedule_, size_t max_working_readers_, size_t range_step_, size_t file_size_)
+    SeekableReadBuffer & input_, ThreadPoolCallbackRunnerUnsafe<void> schedule_, size_t max_working_readers_, size_t range_step_, size_t file_size_)
     : SeekableReadBuffer(nullptr, 0)
     , max_working_readers(max_working_readers_)
     , schedule(std::move(schedule_))
@@ -293,7 +293,7 @@ void ParallelReadBuffer::finishAndWait()
 }
 
 std::unique_ptr<ParallelReadBuffer> wrapInParallelReadBufferIfSupported(
-    ReadBuffer & buf, ThreadPoolCallbackRunner<void> schedule, size_t max_working_readers,
+    ReadBuffer & buf, ThreadPoolCallbackRunnerUnsafe<void> schedule, size_t max_working_readers,
     size_t range_step, size_t file_size)
 {
     auto * seekable = dynamic_cast<SeekableReadBuffer*>(&buf);
diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h
index daac11903995..cfeec2b3677d 100644
--- a/src/IO/ParallelReadBuffer.h
+++ b/src/IO/ParallelReadBuffer.h
@@ -28,7 +28,7 @@ class ParallelReadBuffer : public SeekableReadBuffer, public WithFileSize
     bool nextImpl() override;
 
 public:
-    ParallelReadBuffer(SeekableReadBuffer & input, ThreadPoolCallbackRunner<void> schedule_, size_t max_working_readers, size_t range_step_, size_t file_size);
+    ParallelReadBuffer(SeekableReadBuffer & input, ThreadPoolCallbackRunnerUnsafe<void> schedule_, size_t max_working_readers, size_t range_step_, size_t file_size);
 
     ~ParallelReadBuffer() override { finishAndWait(); }
 
@@ -63,7 +63,7 @@ class ParallelReadBuffer : public SeekableReadBuffer, public WithFileSize
     size_t max_working_readers;
     std::atomic_size_t active_working_readers{0};
 
-    ThreadPoolCallbackRunner<void> schedule;
+    ThreadPoolCallbackRunnerUnsafe<void> schedule;
 
     SeekableReadBuffer & input;
     size_t file_size;
@@ -94,7 +94,7 @@ class ParallelReadBuffer : public SeekableReadBuffer, public WithFileSize
 /// If `buf` is a SeekableReadBuffer with supportsReadAt() == true, creates a ParallelReadBuffer
 /// from it. Otherwise returns nullptr;
 std::unique_ptr<ParallelReadBuffer> wrapInParallelReadBufferIfSupported(
-    ReadBuffer & buf, ThreadPoolCallbackRunner<void> schedule, size_t max_working_readers,
+    ReadBuffer & buf, ThreadPoolCallbackRunnerUnsafe<void> schedule, size_t max_working_readers,
     size_t range_step, size_t file_size);
 
 }
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index b780c1fc08f6..3b1f25ed9949 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -58,7 +58,7 @@ namespace
             const String & dest_key_,
             const S3Settings::RequestSettings & request_settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
-            ThreadPoolCallbackRunner<void> schedule_,
+            ThreadPoolCallbackRunnerUnsafe<void> schedule_,
             bool for_disk_s3_,
             BlobStorageLogWriterPtr blob_storage_log_,
             const LoggerPtr log_)
@@ -84,7 +84,7 @@ namespace
         const S3Settings::RequestSettings & request_settings;
         const S3Settings::RequestSettings::PartUploadSettings & upload_settings;
         const std::optional<std::map<String, String>> & object_metadata;
-        ThreadPoolCallbackRunner<void> schedule;
+        ThreadPoolCallbackRunnerUnsafe<void> schedule;
         bool for_disk_s3;
         BlobStorageLogWriterPtr blob_storage_log;
         const LoggerPtr log;
@@ -467,7 +467,7 @@ namespace
             const String & dest_key_,
             const S3Settings::RequestSettings & request_settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
-            ThreadPoolCallbackRunner<void> schedule_,
+            ThreadPoolCallbackRunnerUnsafe<void> schedule_,
             bool for_disk_s3_,
             BlobStorageLogWriterPtr blob_storage_log_)
             : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyDataToS3File"))
@@ -650,7 +650,7 @@ namespace
             const S3Settings::RequestSettings & request_settings_,
             const ReadSettings & read_settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
-            ThreadPoolCallbackRunner<void> schedule_,
+            ThreadPoolCallbackRunnerUnsafe<void> schedule_,
             bool for_disk_s3_,
             BlobStorageLogWriterPtr blob_storage_log_)
             : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyS3File"))
@@ -856,7 +856,7 @@ void copyDataToS3File(
     const S3Settings::RequestSettings & settings,
     BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata,
-    ThreadPoolCallbackRunner<void> schedule,
+    ThreadPoolCallbackRunnerUnsafe<void> schedule,
     bool for_disk_s3)
 {
     CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3, blob_storage_log};
@@ -876,7 +876,7 @@ void copyS3File(
     const ReadSettings & read_settings,
     BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata,
-    ThreadPoolCallbackRunner<void> schedule,
+    ThreadPoolCallbackRunnerUnsafe<void> schedule,
     bool for_disk_s3)
 {
     if (settings.allow_native_copy)
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index 5eb6f702473a..d5da4d260b15 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -42,7 +42,7 @@ void copyS3File(
     const ReadSettings & read_settings,
     BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
-    ThreadPoolCallbackRunner<void> schedule_ = {},
+    ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {},
     bool for_disk_s3 = false);
 
 /// Copies data from any seekable source to S3.
@@ -60,7 +60,7 @@ void copyDataToS3File(
     const S3Settings::RequestSettings & settings,
     BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
-    ThreadPoolCallbackRunner<void> schedule_ = {},
+    ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {},
     bool for_disk_s3 = false);
 
 }
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index 5e898dec9b84..e41867ce225b 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -94,7 +94,7 @@ WriteBufferFromS3::WriteBufferFromS3(
     const S3Settings::RequestSettings & request_settings_,
     BlobStorageLogWriterPtr blob_log_,
     std::optional<std::map<String, String>> object_metadata_,
-    ThreadPoolCallbackRunner<void> schedule_,
+    ThreadPoolCallbackRunnerUnsafe<void> schedule_,
     const WriteSettings & write_settings_)
     : WriteBufferFromFileBase(buf_size_, nullptr, 0)
     , bucket(bucket_)
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index e7a06f251158..1df559b252ce 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -41,7 +41,7 @@ class WriteBufferFromS3 final : public WriteBufferFromFileBase
         const S3Settings::RequestSettings & request_settings_,
         BlobStorageLogWriterPtr blob_log_,
         std::optional<std::map<String, String>> object_metadata_ = std::nullopt,
-        ThreadPoolCallbackRunner<void> schedule_ = {},
+        ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {},
         const WriteSettings & write_settings_ = {});
 
     ~WriteBufferFromS3() override;
diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp
index d9cb486c09e4..447b72ed7c6e 100644
--- a/src/IO/tests/gtest_writebuffer_s3.cpp
+++ b/src/IO/tests/gtest_writebuffer_s3.cpp
@@ -452,7 +452,7 @@ struct UploadPartFailIngection: InjectionModel
 struct BaseSyncPolicy
 {
     virtual ~BaseSyncPolicy() = default;
-    virtual DB::ThreadPoolCallbackRunner<void> getScheduler() { return {}; }
+    virtual DB::ThreadPoolCallbackRunnerUnsafe<void> getScheduler() { return {}; }
     virtual void execute(size_t) {}
     virtual void setAutoExecute(bool) {}
 
@@ -465,7 +465,7 @@ struct SimpleAsyncTasks : BaseSyncPolicy
     bool auto_execute = false;
     std::deque<std::packaged_task<void()>> queue;
 
-    DB::ThreadPoolCallbackRunner<void> getScheduler() override
+    DB::ThreadPoolCallbackRunnerUnsafe<void> getScheduler() override
     {
         return [this] (std::function<void()> && operation, size_t /*priority*/)
         {
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index c05d1b8f979b..fbbfaa5f7522 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -281,10 +281,19 @@ void AsynchronousInsertQueue::scheduleDataProcessingJob(
 
     /// Wrap 'unique_ptr' with 'shared_ptr' to make this
     /// lambda copyable and allow to save it to the thread pool.
-    pool.scheduleOrThrowOnError(
-        [this, key, global_context, shard_num, my_data = std::make_shared<InsertDataPtr>(std::move(data))]() mutable
-        { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); },
-        priority);
+    auto data_shared = std::make_shared<InsertDataPtr>(std::move(data));
+    try
+    {
+        pool.scheduleOrThrowOnError(
+            [this, key, global_context, shard_num, my_data = data_shared]() mutable
+            { processData(key, std::move(*my_data), std::move(global_context), flush_time_history_per_queue_shard[shard_num]); },
+            priority);
+    }
+    catch (...)
+    {
+        for (auto & entry : (**data_shared).entries)
+            entry->finish(std::current_exception());
+    }
 }
 
 void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context)
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 2db360f91838..6d6bbddfb6a9 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1502,20 +1502,6 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPartWithRetries(
     UNREACHABLE();
 }
 
-/// Wait for all tasks to finish and rethrow the first exception if any.
-/// The tasks access local variables of the caller function, so we can't just rethrow the first exception until all other tasks are finished.
-void waitForAllToFinishAndRethrowFirstError(std::vector<std::future<void>> & futures)
-{
-    /// First wait for all tasks to finish.
-    for (auto & future : futures)
-        future.wait();
-
-    /// Now rethrow the first exception if any.
-    for (auto & future : futures)
-        future.get();
-
-    futures.clear();
-}
 
 std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(PartLoadingTreeNodes & parts_to_load)
 {
@@ -1526,83 +1512,67 @@ std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
     /// Shuffle all the parts randomly to possible speed up loading them from JBOD.
     std::shuffle(parts_to_load.begin(), parts_to_load.end(), thread_local_rng);
 
-    auto runner = threadPoolCallbackRunner<void>(getActivePartsLoadingThreadPool().get(), "ActiveParts");
-    std::vector<std::future<void>> parts_futures;
-
     std::mutex part_select_mutex;
     std::mutex part_loading_mutex;
 
     std::vector<LoadPartResult> loaded_parts;
 
-    try
+    ThreadPoolCallbackRunnerLocal<void> runner(getActivePartsLoadingThreadPool().get(), "ActiveParts");
+    while (true)
     {
-        while (true)
+        bool are_parts_to_load_empty = false;
         {
-            bool are_parts_to_load_empty = false;
-            {
-                std::lock_guard lock(part_select_mutex);
-                are_parts_to_load_empty = parts_to_load.empty();
-            }
+            std::lock_guard lock(part_select_mutex);
+            are_parts_to_load_empty = parts_to_load.empty();
+        }
 
-            if (are_parts_to_load_empty)
-            {
-                /// Wait for all scheduled tasks.
-                waitForAllToFinishAndRethrowFirstError(parts_futures);
+        if (are_parts_to_load_empty)
+        {
+            /// Wait for all scheduled tasks.
+            runner.waitForAllToFinishAndRethrowFirstError();
 
-                /// At this point it is possible, that some other parts appeared in the queue for processing (parts_to_load),
-                /// because we added them from inside the pool.
-                /// So we need to recheck it.
-            }
+            /// At this point it is possible, that some other parts appeared in the queue for processing (parts_to_load),
+            /// because we added them from inside the pool.
+            /// So we need to recheck it.
+        }
 
-            PartLoadingTree::NodePtr current_part;
-            {
-                std::lock_guard lock(part_select_mutex);
-                if (parts_to_load.empty())
-                    break;
+        PartLoadingTree::NodePtr current_part;
+        {
+            std::lock_guard lock(part_select_mutex);
+            if (parts_to_load.empty())
+                break;
 
-                current_part = parts_to_load.back();
-                parts_to_load.pop_back();
-            }
+            current_part = parts_to_load.back();
+            parts_to_load.pop_back();
+        }
 
-            parts_futures.push_back(runner(
-                [&, part = std::move(current_part)]()
+        runner(
+            [&, part = std::move(current_part)]()
+            {
+                /// Pass a separate mutex to guard the set of parts, because this lambda
+                /// is called concurrently but with already locked @data_parts_mutex.
+                auto res = loadDataPartWithRetries(
+                    part->info, part->name, part->disk,
+                    DataPartState::Active, part_loading_mutex, loading_parts_initial_backoff_ms,
+                    loading_parts_max_backoff_ms, loading_parts_max_tries);
+
+                part->is_loaded = true;
+                bool is_active_part = res.part->getState() == DataPartState::Active;
+
+                /// If part is broken or duplicate or should be removed according to transaction
+                /// and it has any covered parts then try to load them to replace this part.
+                if (!is_active_part && !part->children.empty())
                 {
-                    /// Pass a separate mutex to guard the set of parts, because this lambda
-                    /// is called concurrently but with already locked @data_parts_mutex.
-                    auto res = loadDataPartWithRetries(
-                        part->info, part->name, part->disk,
-                        DataPartState::Active, part_loading_mutex, loading_parts_initial_backoff_ms,
-                        loading_parts_max_backoff_ms, loading_parts_max_tries);
-
-                    part->is_loaded = true;
-                    bool is_active_part = res.part->getState() == DataPartState::Active;
-
-                    /// If part is broken or duplicate or should be removed according to transaction
-                    /// and it has any covered parts then try to load them to replace this part.
-                    if (!is_active_part && !part->children.empty())
-                    {
-                        std::lock_guard lock{part_select_mutex};
-                        for (const auto & [_, node] : part->children)
-                            parts_to_load.push_back(node);
-                    }
-
-                    {
-                        std::lock_guard lock(part_loading_mutex);
-                        loaded_parts.push_back(std::move(res));
-                    }
-                }, Priority{0}));
-        }
-    }
-    catch (...)
-    {
-        /// Wait for all scheduled tasks
-        /// A future becomes invalid after .get() call
-        /// + .wait() method is used not to throw any exception here.
-        for (auto & future: parts_futures)
-            if (future.valid())
-                future.wait();
+                    std::lock_guard lock{part_select_mutex};
+                    for (const auto & [_, node] : part->children)
+                        parts_to_load.push_back(node);
+                }
 
-        throw;
+                {
+                    std::lock_guard lock(part_loading_mutex);
+                    loaded_parts.push_back(std::move(res));
+                }
+            }, Priority{0});
     }
 
     return loaded_parts;
@@ -1691,11 +1661,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
         }
     }
 
-    auto runner = threadPoolCallbackRunner<void>(getActivePartsLoadingThreadPool().get(), "ActiveParts");
     std::vector<PartLoadingTree::PartLoadingInfos> parts_to_load_by_disk(disks.size());
 
-    std::vector<std::future<void>> disks_futures;
-    disks_futures.reserve(disks.size());
+    ThreadPoolCallbackRunnerLocal<void> runner(getActivePartsLoadingThreadPool().get(), "ActiveParts");
 
     for (size_t i = 0; i < disks.size(); ++i)
     {
@@ -1705,7 +1673,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
 
         auto & disk_parts = parts_to_load_by_disk[i];
 
-        disks_futures.push_back(runner([&, disk_ptr]()
+        runner([&, disk_ptr]()
         {
             for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next())
             {
@@ -1717,11 +1685,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
                 if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version))
                     disk_parts.emplace_back(*part_info, it->name(), disk_ptr);
             }
-        }, Priority{0}));
+        }, Priority{0});
     }
 
     /// For iteration to be completed
-    waitForAllToFinishAndRethrowFirstError(disks_futures);
+    runner.waitForAllToFinishAndRethrowFirstError();
 
     PartLoadingTree::PartLoadingInfos parts_to_load;
     for (auto & disk_parts : parts_to_load_by_disk)
@@ -1906,11 +1874,10 @@ try
 
     std::atomic_size_t num_loaded_parts = 0;
 
-    auto runner = threadPoolCallbackRunner<void>(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts");
-    std::vector<std::future<void>> parts_futures;
-
     auto blocker = CannotAllocateThreadFaultInjector::blockFaultInjections();
 
+    ThreadPoolCallbackRunnerLocal<void> runner(getOutdatedPartsLoadingThreadPool().get(), "OutdatedParts");
+
     while (true)
     {
         ThreadFuzzer::maybeInjectSleep();
@@ -1923,7 +1890,7 @@ try
             {
                 /// Wait for every scheduled task
                 /// In case of any exception it will be re-thrown and server will be terminated.
-                waitForAllToFinishAndRethrowFirstError(parts_futures);
+                runner.waitForAllToFinishAndRethrowFirstError();
 
                 LOG_DEBUG(log,
                     "Stopped loading outdated data parts because task was canceled. "
@@ -1938,7 +1905,7 @@ try
             outdated_unloaded_data_parts.pop_back();
         }
 
-        parts_futures.push_back(runner([&, my_part = part]()
+        runner([&, my_part = part]()
         {
             auto res = loadDataPartWithRetries(
             my_part->info, my_part->name, my_part->disk,
@@ -1955,12 +1922,10 @@ try
                 res.part->remove();
             else
                 preparePartForRemoval(res.part);
-        }, Priority{}));
+        }, Priority{});
     }
 
-    /// Wait for every scheduled task
-    for (auto & future : parts_futures)
-        future.get();
+    runner.waitForAllToFinishAndRethrowFirstError();
 
     LOG_DEBUG(log, "Loaded {} outdated data parts {}",
         num_loaded_parts, is_async ? "asynchronously" : "synchronously");
@@ -2449,7 +2414,6 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
 
     /// Parallel parts removal.
     std::mutex part_names_mutex;
-    auto runner = threadPoolCallbackRunner<void>(getPartsCleaningThreadPool().get(), "PartsCleaning");
 
     /// This flag disallow straightforward concurrent parts removal. It's required only in case
     /// when we have parts on zero-copy disk + at least some of them were mutated.
@@ -2469,12 +2433,11 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         LOG_DEBUG(
             log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", "));
 
-        std::vector<std::future<void>> parts_to_remove_futures;
-        parts_to_remove_futures.reserve(parts_to_remove.size());
+        ThreadPoolCallbackRunnerLocal<void> runner(getPartsCleaningThreadPool().get(), "PartsCleaning");
 
         for (const DataPartPtr & part : parts_to_remove)
         {
-            parts_to_remove_futures.push_back(runner([&part, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup()]
+            runner([&part, &part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup()]
             {
                 asMutableDeletingPart(part)->remove();
                 if (part_names_succeed)
@@ -2482,10 +2445,10 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
                     std::lock_guard lock(part_names_mutex);
                     part_names_succeed->insert(part->name);
                 }
-            }, Priority{0}));
+            }, Priority{0});
         }
 
-        waitForAllToFinishAndRethrowFirstError(parts_to_remove_futures);
+        runner.waitForAllToFinishAndRethrowFirstError();
 
         return;
     }
@@ -2557,13 +2520,13 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         return independent_ranges;
     };
 
-    std::vector<std::future<void>> part_removal_futures;
+    ThreadPoolCallbackRunnerLocal<void> runner(getPartsCleaningThreadPool().get(), "PartsCleaning");
 
-    auto schedule_parts_removal = [this, &runner, &part_names_mutex, part_names_succeed, &part_removal_futures](
+    auto schedule_parts_removal = [this, &runner, &part_names_mutex, part_names_succeed](
         const MergeTreePartInfo & range, DataPartsVector && parts_in_range)
     {
         /// Below, range should be captured by copy to avoid use-after-scope on exception from pool
-        part_removal_futures.push_back(runner(
+        runner(
             [this, range, &part_names_mutex, part_names_succeed, batch = std::move(parts_in_range)]
         {
             LOG_TRACE(log, "Removing {} parts in blocks range {}", batch.size(), range.getPartNameForLogs());
@@ -2577,7 +2540,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
                     part_names_succeed->insert(part->name);
                 }
             }
-        }, Priority{0}));
+        }, Priority{0});
     };
 
     RemovalRanges independent_ranges = split_into_independent_ranges(parts_to_remove, /* split_times */ 0);
@@ -2641,7 +2604,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
 
     independent_ranges = split_into_independent_ranges(excluded_parts, /* split_times */ 0);
 
-    waitForAllToFinishAndRethrowFirstError(part_removal_futures);
+    runner.waitForAllToFinishAndRethrowFirstError();
 
     for (size_t i = 0; i < independent_ranges.infos.size(); ++i)
     {
@@ -2650,7 +2613,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         schedule_parts_removal(range, std::move(parts_in_range));
     }
 
-    waitForAllToFinishAndRethrowFirstError(part_removal_futures);
+    runner.waitForAllToFinishAndRethrowFirstError();
 
     if (parts_to_remove.size() != sum_of_ranges + excluded_parts.size())
         throw Exception(ErrorCodes::LOGICAL_ERROR,
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
index 6798f97e4942..28d706096644 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
@@ -239,7 +239,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync()
 
 std::future<MarkCache::MappedPtr> MergeTreeMarksLoader::loadMarksAsync()
 {
-    return scheduleFromThreadPool<MarkCache::MappedPtr>(
+    return scheduleFromThreadPoolUnsafe<MarkCache::MappedPtr>(
         [this]() -> MarkCache::MappedPtr
         {
             ProfileEvents::increment(ProfileEvents::BackgroundLoadingMarksTasks);
diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
index c19b4ddd8a23..6d2875b8d9f0 100644
--- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
@@ -154,7 +154,7 @@ std::future<void> MergeTreePrefetchedReadPool::createPrefetchedFuture(IMergeTree
         reader->prefetchBeginOfRange(priority);
     };
 
-    return scheduleFromThreadPool<void>(std::move(task), prefetch_threadpool, "ReadPrepare", priority);
+    return scheduleFromThreadPoolUnsafe<void>(std::move(task), prefetch_threadpool, "ReadPrepare", priority);
 }
 
 void MergeTreePrefetchedReadPool::createPrefetchedReadersForTask(ThreadTask & task)
diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp
index e1d1d0951e40..02b4768f5f26 100644
--- a/src/Storages/MergeTree/MergeTreeSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSource.cpp
@@ -105,7 +105,7 @@ struct MergeTreeSource::AsyncReadingState
     AsyncReadingState()
     {
         control = std::make_shared<Control>();
-        callback_runner = threadPoolCallbackRunner<void>(getIOThreadPool().get(), "MergeTreeRead");
+        callback_runner = threadPoolCallbackRunnerUnsafe<void>(getIOThreadPool().get(), "MergeTreeRead");
     }
 
     ~AsyncReadingState()
@@ -128,7 +128,7 @@ struct MergeTreeSource::AsyncReadingState
     }
 
 private:
-    ThreadPoolCallbackRunner<void> callback_runner;
+    ThreadPoolCallbackRunnerUnsafe<void> callback_runner;
     std::shared_ptr<Control> control;
 };
 #endif
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 306a5eac8e59..8f18426c8513 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1190,7 +1190,7 @@ StorageAzureBlobSource::StorageAzureBlobSource(
     , file_iterator(file_iterator_)
     , need_only_count(need_only_count_)
     , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1)
-    , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "AzureReader"))
+    , create_reader_scheduler(threadPoolCallbackRunnerUnsafe<ReaderHolder>(create_reader_pool, "AzureReader"))
 {
     reader = createReader();
     if (reader)
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index 3f1ba33f6366..5b0d8802657d 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -330,7 +330,7 @@ class StorageAzureBlobSource : public ISource, WithContext
     LoggerPtr log = getLogger("StorageAzureBlobSource");
 
     ThreadPool create_reader_pool;
-    ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
+    ThreadPoolCallbackRunnerUnsafe<ReaderHolder> create_reader_scheduler;
     std::future<ReaderHolder> reader_future;
 
     /// Recreate ReadBuffer and Pipeline for each file.
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index dbcd47c57451..5a2815a30f37 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -830,23 +830,22 @@ bool StorageBuffer::checkThresholdsImpl(bool direct, size_t rows, size_t bytes,
 
 void StorageBuffer::flushAllBuffers(bool check_thresholds)
 {
+    ThreadPoolCallbackRunnerLocal<void> runner(*flush_pool, "BufferFlush");
     for (auto & buf : buffers)
     {
         if (flush_pool)
         {
-            scheduleFromThreadPool<void>([&] ()
+            runner([&]()
             {
                 flushBuffer(buf, check_thresholds, false);
-            }, *flush_pool, "BufferFlush");
+            });
         }
         else
         {
             flushBuffer(buf, check_thresholds, false);
         }
     }
-
-    if (flush_pool)
-        flush_pool->wait();
+    runner.waitForAllToFinishAndRethrowFirstError();
 }
 
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 12c2ad331ad0..69d3cf3ad3b4 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -283,17 +283,6 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus
     return (num_remote_shards + num_local_shards) * settings.max_parallel_replicas;
 }
 
-template <class F>
-void waitFutures(F & futures)
-{
-    for (auto & future : futures)
-        future.wait();
-    /// Make sure there is no exception.
-    for (auto & future : futures)
-        future.get();
-    futures.clear();
-}
-
 }
 
 /// For destruction of std::unique_ptr of type that is incomplete in class definition.
@@ -1296,31 +1285,27 @@ void StorageDistributed::initializeFromDisk()
 
     /// Make initialization for large number of disks parallel.
     ThreadPool pool(CurrentMetrics::StorageDistributedThreads, CurrentMetrics::StorageDistributedThreadsActive, CurrentMetrics::StorageDistributedThreadsScheduled, disks.size());
-    std::vector<std::future<void>> futures;
+    ThreadPoolCallbackRunnerLocal<void> runner(pool, "DistInit");
 
     for (const DiskPtr & disk : disks)
     {
-        auto future = scheduleFromThreadPool<void>([this, disk_to_init = disk]
+        runner([this, disk_to_init = disk]
         {
             initializeDirectoryQueuesForDisk(disk_to_init);
-        }, pool, "DistInit");
-        futures.push_back(std::move(future));
+        });
     }
-    waitFutures(futures);
-    pool.wait();
+    runner.waitForAllToFinishAndRethrowFirstError();
 
     const auto & paths = getDataPaths();
     std::vector<UInt64> last_increment(paths.size());
     for (size_t i = 0; i < paths.size(); ++i)
     {
-        auto future = scheduleFromThreadPool<void>([&paths, &last_increment, i]
+        runner([&paths, &last_increment, i]
         {
             last_increment[i] = getMaximumFileNumber(paths[i]);
-        }, pool, "DistInit");
-        futures.push_back(std::move(future));
+        });
     }
-    waitFutures(futures);
-    pool.wait();
+    runner.waitForAllToFinishAndRethrowFirstError();
 
     for (const auto inc : last_increment)
     {
@@ -1760,19 +1745,17 @@ void StorageDistributed::flushClusterNodesAllDataImpl(ContextPtr local_context,
 
         Stopwatch watch;
         ThreadPool pool(CurrentMetrics::StorageDistributedThreads, CurrentMetrics::StorageDistributedThreadsActive, CurrentMetrics::StorageDistributedThreadsScheduled, directory_queues.size());
-        std::vector<std::future<void>> futures;
+        ThreadPoolCallbackRunnerLocal<void> runner(pool, "DistFlush");
 
         for (const auto & node : directory_queues)
         {
-            auto future = scheduleFromThreadPool<void>([node_to_flush = node, &settings_changes]
+            runner([node_to_flush = node, &settings_changes]
             {
                 node_to_flush->flushAllData(settings_changes);
-            }, pool, "DistFlush");
-            futures.push_back(std::move(future));
+            });
         }
 
-        waitFutures(futures);
-        pool.wait();
+        runner.waitForAllToFinishAndRethrowFirstError();
 
         LOG_INFO(log, "Pending INSERT blocks flushed, took {} ms.", watch.elapsedMilliseconds());
     }
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 2d3aef312bf9..9e49ce6f2dee 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -204,7 +204,7 @@ class StorageS3Source::DisclosedGlobIterator::Impl : WithContext
         , read_keys(read_keys_)
         , request_settings(request_settings_)
         , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
-        , list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
+        , list_objects_scheduler(threadPoolCallbackRunnerUnsafe<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
         , file_progress_callback(file_progress_callback_)
     {
         if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
@@ -413,7 +413,7 @@ class StorageS3Source::DisclosedGlobIterator::Impl : WithContext
     S3Settings::RequestSettings request_settings;
 
     ThreadPool list_objects_pool;
-    ThreadPoolCallbackRunner<ListObjectsOutcome> list_objects_scheduler;
+    ThreadPoolCallbackRunnerUnsafe<ListObjectsOutcome> list_objects_scheduler;
     std::future<ListObjectsOutcome> outcome_future;
     std::function<void(FileProgress)> file_progress_callback;
 };
@@ -527,7 +527,7 @@ StorageS3Source::ReadTaskIterator::ReadTaskIterator(
     : callback(callback_)
 {
     ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count);
-    auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "S3ReadTaskItr");
+    auto pool_scheduler = threadPoolCallbackRunnerUnsafe<String>(pool, "S3ReadTaskItr");
 
     std::vector<std::future<String>> keys;
     keys.reserve(max_threads_count);
@@ -598,7 +598,7 @@ StorageS3Source::StorageS3Source(
     , max_parsing_threads(max_parsing_threads_)
     , need_only_count(need_only_count_)
     , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
-    , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "CreateS3Reader"))
+    , create_reader_scheduler(threadPoolCallbackRunnerUnsafe<ReaderHolder>(create_reader_pool, "CreateS3Reader"))
 {
 }
 
@@ -875,7 +875,7 @@ class StorageS3Sink : public SinkToStorage
                 configuration_.request_settings,
                 std::move(blob_log),
                 std::nullopt,
-                threadPoolCallbackRunner<void>(getIOThreadPool().get(), "S3ParallelWrite"),
+                threadPoolCallbackRunnerUnsafe<void>(getIOThreadPool().get(), "S3ParallelWrite"),
                 context->getWriteSettings()),
             compression_method,
             static_cast<int>(settings.output_format_compression_level),
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 19cbfaa6f08d..c8ab28fb20ed 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -241,7 +241,7 @@ class StorageS3Source : public SourceWithKeyCondition, WithContext
     LoggerPtr log = getLogger("StorageS3Source");
 
     ThreadPool create_reader_pool;
-    ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
+    ThreadPoolCallbackRunnerUnsafe<ReaderHolder> create_reader_scheduler;
     std::future<ReaderHolder> reader_future;
     std::atomic<bool> initialized{false};
 
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index cc46cc8f8dcf..511ccbdef781 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -5,7 +5,6 @@
 #include <Storages/VirtualColumnUtils.h>
 
 #include <Interpreters/evaluateConstantExpression.h>
-#include <Common/threadPoolCallbackRunner.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Parsers/ASTLiteral.h>
diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp
index ebcd8d63a52a..31d566ef8b6a 100644
--- a/src/Storages/System/StorageSystemDetachedParts.cpp
+++ b/src/Storages/System/StorageSystemDetachedParts.cpp
@@ -162,19 +162,9 @@ class DetachedPartsSource : public ISource
             worker_state.tasks.push_back({part.disk, relative_path, &parts_sizes.at(p_id - begin)});
         }
 
-        std::vector<std::future<void>> futures;
-        SCOPE_EXIT_SAFE({
-            /// Cancel all workers
-            worker_state.next_task.store(worker_state.tasks.size());
-            /// Exceptions are not propagated
-            for (auto & future : futures)
-                if (future.valid())
-                    future.wait();
-            futures.clear();
-        });
-
         auto max_thread_to_run = std::max(size_t(1), std::min(support_threads, worker_state.tasks.size() / 10));
-        futures.reserve(max_thread_to_run);
+
+        ThreadPoolCallbackRunnerLocal<void> runner(getIOThreadPool().get(), "DP_BytesOnDisk");
 
         for (size_t i = 0; i < max_thread_to_run; ++i)
         {
@@ -191,16 +181,10 @@ class DetachedPartsSource : public ISource
                 }
             };
 
-            futures.push_back(
-                        scheduleFromThreadPool<void>(
-                            std::move(worker),
-                            getIOThreadPool().get(),
-                            "DP_BytesOnDisk"));
+            runner(std::move(worker));
         }
 
-        /// Exceptions are propagated
-        for (auto & future : futures)
-            future.get();
+        runner.waitForAllToFinishAndRethrowFirstError();
     }
 
     void generateRows(MutableColumns & new_columns, size_t max_rows)
diff --git a/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv b/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv
deleted file mode 100644
index ab35653b8ddd..000000000000
--- a/tests/01947_multiple_pipe_read_sample_data_ZbApel.tsv
+++ /dev/null
@@ -1,7 +0,0 @@
-0	BBB
-1	BBB
-2	BBB
-3	BBB
-4	AAA
-5	BBB
-6	AAA
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 9cfd087bd672..d1132a26ea8c 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -908,6 +908,7 @@ class MergeTreeSettingsRandomizer:
         ),
         "cache_populated_by_fetch": lambda: random.randint(0, 1),
         "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100),
+        "old_parts_lifetime": threshold_generator(0.2, 0.3, 0, 8 * 60),
     }
 
     @staticmethod

From 578c4cfb9deeb023075695804f01ee100106115b Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 5 Apr 2024 00:25:18 +0200
Subject: [PATCH 12/90] fix

---
 src/Common/threadPoolCallbackRunner.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h
index cec07bbd8922..ef22f9038d80 100644
--- a/src/Common/threadPoolCallbackRunner.h
+++ b/src/Common/threadPoolCallbackRunner.h
@@ -9,6 +9,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+extern const int LOGICAL_ERROR;
+}
+
 /// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously.
 template <typename Result, typename Callback = std::function<Result()>>
 using ThreadPoolCallbackRunnerUnsafe = std::function<std::future<Result>(Callback &&, Priority)>;
@@ -172,7 +177,8 @@ class ThreadPoolCallbackRunnerLocal
             /// It can be cancelled only when waiting in dtor
             if (state == CANCELLED)
                 continue;
-            task->future.wait();
+            if (task->future.valid())
+                task->future.wait();
         }
     }
 

From 53a3ad609aa60402b26547b295a12768aec9416c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 7 Apr 2024 21:32:40 +0200
Subject: [PATCH 13/90] Fix trash in documentation

---
 docs/en/operations/system-tables/asynchronous_metric_log.md | 2 +-
 src/Common/AsynchronousMetrics.cpp                          | 2 +-
 src/Common/AsynchronousMetrics.h                            | 4 ++--
 src/Interpreters/SystemLog.cpp                              | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md
index e63ab65ba074..e0d3254fe736 100644
--- a/docs/en/operations/system-tables/asynchronous_metric_log.md
+++ b/docs/en/operations/system-tables/asynchronous_metric_log.md
@@ -3,7 +3,7 @@ slug: /en/operations/system-tables/asynchronous_metric_log
 ---
 # asynchronous_metric_log
 
-Contains the historical values for `system.asynchronous_metrics`, which are saved once per minute. Enabled by default.
+Contains the historical values for `system.asynchronous_metrics`, which are saved once per time interval (one second by default). Enabled by default.
 
 Columns:
 
diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp
index ab54b180fbfc..6b26f65deab1 100644
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@@ -56,7 +56,7 @@ static std::unique_ptr<ReadBufferFromFilePRead> openFileIfExists(const std::stri
 
 
 AsynchronousMetrics::AsynchronousMetrics(
-    int update_period_seconds,
+    unsigned update_period_seconds,
     const ProtocolServerMetricsFunc & protocol_server_metrics_func_)
     : update_period(update_period_seconds)
     , log(getLogger("AsynchronousMetrics"))
diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h
index 4b3d28e80c54..b62529a08e71 100644
--- a/src/Common/AsynchronousMetrics.h
+++ b/src/Common/AsynchronousMetrics.h
@@ -44,7 +44,7 @@ struct ProtocolServerMetrics
     size_t current_threads;
 };
 
-/** Periodically (by default, each minute, starting at 30 seconds offset)
+/** Periodically (by default, each second)
   *  calculates and updates some metrics,
   *  that are not updated automatically (so, need to be asynchronously calculated).
   *
@@ -64,7 +64,7 @@ class AsynchronousMetrics
     using ProtocolServerMetricsFunc = std::function<std::vector<ProtocolServerMetrics>()>;
 
     AsynchronousMetrics(
-        int update_period_seconds,
+        unsigned update_period_seconds,
         const ProtocolServerMetricsFunc & protocol_server_metrics_func_);
 
     virtual ~AsynchronousMetrics();
diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index db73fe038c04..3af8761ff8eb 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -291,7 +291,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf
         global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem.");
     asynchronous_metric_log = createSystemLog<AsynchronousMetricLog>(
         global_context, "system", "asynchronous_metric_log", config,
-        "asynchronous_metric_log", "Contains the historical values for system.asynchronous_metrics, which are saved once per minute.");
+        "asynchronous_metric_log", "Contains the historical values for system.asynchronous_metrics, once per time interval (one second by default).");
     opentelemetry_span_log = createSystemLog<OpenTelemetrySpanLog>(
         global_context, "system", "opentelemetry_span_log", config,
         "opentelemetry_span_log", "Contains information about trace spans for executed queries.");

From b6aff78bf55d98b9b511dd80c7abd8396c9c5a4a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Mon, 8 Apr 2024 16:13:46 +0200
Subject: [PATCH 14/90] fix

---
 src/Storages/StorageBuffer.cpp | 11 +++++++----
 tests/clickhouse-test          |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index 5a2815a30f37..97a459a5e72e 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -830,12 +830,14 @@ bool StorageBuffer::checkThresholdsImpl(bool direct, size_t rows, size_t bytes,
 
 void StorageBuffer::flushAllBuffers(bool check_thresholds)
 {
-    ThreadPoolCallbackRunnerLocal<void> runner(*flush_pool, "BufferFlush");
+    std::optional<ThreadPoolCallbackRunnerLocal<void>> runner;
+    if (flush_pool)
+        runner.emplace(*flush_pool, "BufferFlush");
     for (auto & buf : buffers)
     {
-        if (flush_pool)
+        if (runner)
         {
-            runner([&]()
+            (*runner)([&]()
             {
                 flushBuffer(buf, check_thresholds, false);
             });
@@ -845,7 +847,8 @@ void StorageBuffer::flushAllBuffers(bool check_thresholds)
             flushBuffer(buf, check_thresholds, false);
         }
     }
-    runner.waitForAllToFinishAndRethrowFirstError();
+    if (runner)
+        runner->waitForAllToFinishAndRethrowFirstError();
 }
 
 
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index d1132a26ea8c..eee6eb0c18dd 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -908,7 +908,7 @@ class MergeTreeSettingsRandomizer:
         ),
         "cache_populated_by_fetch": lambda: random.randint(0, 1),
         "concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100),
-        "old_parts_lifetime": threshold_generator(0.2, 0.3, 0, 8 * 60),
+        "old_parts_lifetime": threshold_generator(0.2, 0.3, 30, 8 * 60),
     }
 
     @staticmethod

From 33c0ac5cc6e1befca39f54a2fccfaf8f6eb8ed0c Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 9 Apr 2024 15:50:15 +0200
Subject: [PATCH 15/90] Fix backup restore path for AzureBlobStorage

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     | 89 +++----------------
 .../test.py                                   | 71 +++++++++++++--
 2 files changed, 76 insertions(+), 84 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index b9b208e321cf..2eb5233bd1b3 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -50,44 +50,20 @@ BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
 
 bool BackupReaderAzureBlobStorage::fileExists(const String & file_name)
 {
-    String key;
-    if (startsWith(file_name, "."))
-    {
-        key= configuration.blob_path + file_name;
-    }
-    else
-    {
-        key = file_name;
-    }
+    String key = fs::path(configuration.blob_path) / file_name;
     return object_storage->exists(StoredObject(key));
 }
 
 UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name)
 {
-    String key;
-    if (startsWith(file_name, "."))
-    {
-        key= configuration.blob_path + file_name;
-    }
-    else
-    {
-        key = file_name;
-    }
+    String key = fs::path(configuration.blob_path) / file_name;
     ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
     return object_metadata.size_bytes;
 }
 
 std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const String & file_name)
 {
-    String key;
-    if (startsWith(file_name, "."))
-    {
-        key= configuration.blob_path + file_name;
-    }
-    else
-    {
-        key = file_name;
-    }
+    String key = fs::path(configuration.blob_path) / file_name;
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
         client, key, read_settings, settings->max_single_read_retries,
         settings->max_single_download_retries);
@@ -194,7 +170,7 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
        client,
        client,
        configuration.container,
-       fs::path(source),
+       fs::path(configuration.blob_path)/ source,
        0,
        size,
        /* dest_container */ configuration.container,
@@ -207,7 +183,7 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
 
 void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
 {
-    copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings,
+    copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, fs::path(configuration.blob_path) / path_in_backup, settings,
                      threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
 }
 
@@ -215,29 +191,13 @@ BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
 
 bool BackupWriterAzureBlobStorage::fileExists(const String & file_name)
 {
-    String key;
-    if (startsWith(file_name, "."))
-    {
-        key= configuration.blob_path + file_name;
-    }
-    else
-    {
-        key = file_name;
-    }
+    String key = fs::path(configuration.blob_path) / file_name;
     return object_storage->exists(StoredObject(key));
 }
 
 UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
 {
-    String key;
-    if (startsWith(file_name, "."))
-    {
-        key= configuration.blob_path + file_name;
-    }
-    else
-    {
-        key = file_name;
-    }
+    String key = fs::path(configuration.blob_path) / file_name;
     RelativePathsWithMetadata children;
     object_storage->listObjects(key,children,/*max_keys*/0);
     if (children.empty())
@@ -247,16 +207,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
 
 std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
 {
-    String key;
-    if (startsWith(file_name, "."))
-    {
-        key= configuration.blob_path + file_name;
-    }
-    else
-    {
-        key = file_name;
-    }
-
+    String key = fs::path(configuration.blob_path) / file_name;
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
         client, key, read_settings, settings->max_single_read_retries,
         settings->max_single_download_retries);
@@ -264,15 +215,7 @@ std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String
 
 std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name)
 {
-    String key;
-    if (startsWith(file_name, "."))
-    {
-        key= configuration.blob_path + file_name;
-    }
-    else
-    {
-        key = file_name;
-    }
+    String key = fs::path(configuration.blob_path) / file_name;
     return std::make_unique<WriteBufferFromAzureBlobStorage>(
         client,
         key,
@@ -283,15 +226,7 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
 
 void BackupWriterAzureBlobStorage::removeFile(const String & file_name)
 {
-    String key;
-    if (startsWith(file_name, "."))
-    {
-        key= configuration.blob_path + file_name;
-    }
-    else
-    {
-        key = file_name;
-    }
+    String key = fs::path(configuration.blob_path) / file_name;
     StoredObject object(key);
     object_storage->removeObjectIfExists(object);
 }
@@ -300,7 +235,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names)
 {
     StoredObjects objects;
     for (const auto & file_name : file_names)
-        objects.emplace_back(file_name);
+        objects.emplace_back(fs::path(configuration.blob_path) / file_name);
 
     object_storage->removeObjectsIfExist(objects);
 
@@ -310,7 +245,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names)
 {
     StoredObjects objects;
     for (const auto & file_name : file_names)
-        objects.emplace_back(file_name);
+        objects.emplace_back(fs::path(configuration.blob_path) / file_name);
 
     object_storage->removeObjectsIfExist(objects);
 }
diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py
index a7c7b4395604..09a7f12bea16 100644
--- a/tests/integration/test_backup_restore_azure_blob_storage/test.py
+++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py
@@ -41,6 +41,38 @@ def generate_cluster_def(port):
             <account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
         </azure_conf2>
     </named_collections>
+    <storage_configuration>
+        <disks>
+            <blob_storage_disk>
+                <type>azure_blob_storage</type>
+                <storage_account_url>http://azurite1:{port}/devstoreaccount1</storage_account_url>
+                <container_name>cont</container_name>
+                <skip_access_check>false</skip_access_check>
+                <account_name>devstoreaccount1</account_name>
+                <account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
+                <max_single_part_upload_size>100000</max_single_part_upload_size>
+                <min_upload_part_size>100000</min_upload_part_size>
+                <max_single_download_retries>10</max_single_download_retries>
+                <max_single_read_retries>10</max_single_read_retries>
+            </blob_storage_disk>
+            <hdd>
+                <type>local</type>
+                <path>/</path>
+            </hdd>
+        </disks>
+        <policies>
+            <blob_storage_policy>
+                <volumes>
+                    <main>
+                        <disk>blob_storage_disk</disk>
+                    </main>
+                    <external>
+                        <disk>hdd</disk>
+                    </external>
+                </volumes>
+            </blob_storage_policy>
+        </policies>
+    </storage_configuration>
 </clickhouse>
 """
         )
@@ -169,12 +201,12 @@ def test_backup_restore(cluster):
     print(get_azure_file_content("test_simple_write_c.csv", port))
     assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n'
 
-    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup.csv')"
+    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup')"
     azure_query(
         node,
         f"BACKUP TABLE test_simple_write_connection_string TO {backup_destination}",
     )
-    print(get_azure_file_content("test_simple_write_c_backup.csv.backup", port))
+    print(get_azure_file_content("test_simple_write_c_backup/.backup", port))
     azure_query(
         node,
         f"RESTORE TABLE test_simple_write_connection_string AS test_simple_write_connection_string_restored FROM {backup_destination};",
@@ -195,7 +227,7 @@ def test_backup_restore_diff_container(cluster):
     azure_query(
         node, f"INSERT INTO test_simple_write_connection_string_cont1 VALUES (1, 'a')"
     )
-    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont1', 'test_simple_write_c_backup_cont1.csv')"
+    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont1', 'test_simple_write_c_backup_cont1')"
     azure_query(
         node,
         f"BACKUP TABLE test_simple_write_connection_string_cont1 TO {backup_destination}",
@@ -224,13 +256,13 @@ def test_backup_restore_with_named_collection_azure_conf1(cluster):
     assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n'
 
     backup_destination = (
-        f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup.csv')"
+        f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup')"
     )
     azure_query(
         node,
         f"BACKUP TABLE test_write_connection_string TO {backup_destination}",
     )
-    print(get_azure_file_content("test_simple_write_nc_backup.csv.backup", port))
+    print(get_azure_file_content("test_simple_write_nc_backup/.backup", port))
     azure_query(
         node,
         f"RESTORE TABLE test_write_connection_string AS test_write_connection_string_restored FROM {backup_destination};",
@@ -253,13 +285,13 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster):
     assert get_azure_file_content("test_simple_write_2.csv", port) == '1,"a"\n'
 
     backup_destination = (
-        f"AzureBlobStorage(azure_conf2, 'test_simple_write_nc_backup_2.csv')"
+        f"AzureBlobStorage(azure_conf2, 'test_simple_write_nc_backup_2')"
     )
     azure_query(
         node,
         f"BACKUP TABLE test_write_connection_string_2 TO {backup_destination}",
     )
-    print(get_azure_file_content("test_simple_write_nc_backup_2.csv.backup", port))
+    print(get_azure_file_content("test_simple_write_nc_backup_2/.backup", port))
     azure_query(
         node,
         f"RESTORE TABLE test_write_connection_string_2 AS test_write_connection_string_restored_2 FROM {backup_destination};",
@@ -268,3 +300,28 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster):
         azure_query(node, f"SELECT * from test_write_connection_string_restored_2")
         == "1\ta\n"
     )
+
+def test_backup_restore_on_merge_tree(cluster):
+    node = cluster.instances["node"]
+    port = cluster.env_variables["AZURITE_PORT"]
+    azure_query(
+        node,
+        f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='blob_storage_policy'",
+    )
+    azure_query(
+        node, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')"
+    )
+
+    backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_backup')"
+    azure_query(
+        node,
+        f"BACKUP TABLE test_simple_merge_tree TO {backup_destination}",
+    )
+    azure_query(
+        node,
+        f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};",
+    )
+    assert (
+            azure_query(node, f"SELECT * from test_simple_merge_tree_restored")
+            == "1\ta\n"
+    )

From 950d1dfb86fc24250f4a172091861ded2a95155d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 9 Apr 2024 14:02:35 +0000
Subject: [PATCH 16/90] Automatic style fix

---
 .../test_backup_restore_azure_blob_storage/test.py   | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py
index 09a7f12bea16..b3e8b65b5dcf 100644
--- a/tests/integration/test_backup_restore_azure_blob_storage/test.py
+++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py
@@ -255,9 +255,7 @@ def test_backup_restore_with_named_collection_azure_conf1(cluster):
     print(get_azure_file_content("test_simple_write.csv", port))
     assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n'
 
-    backup_destination = (
-        f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup')"
-    )
+    backup_destination = f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup')"
     azure_query(
         node,
         f"BACKUP TABLE test_write_connection_string TO {backup_destination}",
@@ -301,6 +299,7 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster):
         == "1\ta\n"
     )
 
+
 def test_backup_restore_on_merge_tree(cluster):
     node = cluster.instances["node"]
     port = cluster.env_variables["AZURITE_PORT"]
@@ -308,9 +307,7 @@ def test_backup_restore_on_merge_tree(cluster):
         node,
         f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='blob_storage_policy'",
     )
-    azure_query(
-        node, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')"
-    )
+    azure_query(node, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')")
 
     backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_backup')"
     azure_query(
@@ -322,6 +319,5 @@ def test_backup_restore_on_merge_tree(cluster):
         f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};",
     )
     assert (
-            azure_query(node, f"SELECT * from test_simple_merge_tree_restored")
-            == "1\ta\n"
+        azure_query(node, f"SELECT * from test_simple_merge_tree_restored") == "1\ta\n"
     )

From c5eda195750246669e833f8a9640b7afdcb7397e Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 9 Apr 2024 14:33:06 +0000
Subject: [PATCH 17/90] impl

---
 .../Transforms/SquashingChunksTransform.cpp        | 14 ++++++++++++++
 .../00182_simple_squashing_transform_bug.reference |  2 ++
 .../00182_simple_squashing_transform_bug.sql       |  6 ++++++
 3 files changed, 22 insertions(+)
 create mode 100644 tests/queries/1_stateful/00182_simple_squashing_transform_bug.reference
 create mode 100644 tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql

diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp
index 7de9538e435c..62c86a274535 100644
--- a/src/Processors/Transforms/SquashingChunksTransform.cpp
+++ b/src/Processors/Transforms/SquashingChunksTransform.cpp
@@ -64,8 +64,22 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk)
     }
     else
     {
+        if (chunk.hasRows())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost");
+
         auto block = squashing.add({});
         chunk.setColumns(block.getColumns(), block.rows());
+
+        /// ISimpleTransform keeps output chunk (result of transform() execution) for some time and push it in the output port within subsequent prepare() call.
+        /// Because of our custom prepare() implementation we have to take care of both places where data could be buffered: `output_data` and `squashing`.
+        if (output_data.chunk.hasRows())
+        {
+            auto res = std::move(output_data.chunk);
+            output_data.chunk.clear();
+            if (chunk.hasRows())
+                res.append(chunk);
+            chunk = std::move(res);
+        }
     }
 }
 
diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.reference b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.reference
new file mode 100644
index 000000000000..9c49da1ab8ad
--- /dev/null
+++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.reference
@@ -0,0 +1,2 @@
+17747796
+17747796
diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql
new file mode 100644
index 000000000000..e73de4b33fb9
--- /dev/null
+++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql
@@ -0,0 +1,6 @@
+-- Tags: global
+
+set allow_prefetched_read_pool_for_remote_filesystem=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0, max_threads=2, max_block_size=65387;
+
+SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits);
+SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits);

From eb9ed4161c7ea732611d579e2c99176e117244e4 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 9 Apr 2024 15:41:15 +0000
Subject: [PATCH 18/90] fix style

---
 src/Processors/Transforms/SquashingChunksTransform.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp
index 62c86a274535..67cf22c7d4de 100644
--- a/src/Processors/Transforms/SquashingChunksTransform.cpp
+++ b/src/Processors/Transforms/SquashingChunksTransform.cpp
@@ -3,6 +3,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+extern const int LOGICAL_ERROR;
+}
+
 SquashingChunksTransform::SquashingChunksTransform(
     const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes)
     : ExceptionKeepingTransform(header, header, false)

From 9d4f1d890eea467706b0272e987a5896f2c795d1 Mon Sep 17 00:00:00 2001
From: Joshua Hildred <jthildred@gmail.com>
Date: Tue, 2 Apr 2024 05:24:16 -0700
Subject: [PATCH 19/90] Add an optimization that removes redundant equality
 checks on boolean functions. This fixes a bug in which the primary index is
 not used for queries like SELECT * FROM <table> WHERE <pk> in (<n>) = 1

---
 .../Passes/LogicalExpressionOptimizerPass.cpp | 76 +++++++++++++++++
 .../Passes/LogicalExpressionOptimizerPass.h   | 12 +++
 .../03032_redundant_equals.reference          | 23 +++++
 .../0_stateless/03032_redundant_equals.sql    | 83 +++++++++++++++++++
 4 files changed, 194 insertions(+)
 create mode 100644 tests/queries/0_stateless/03032_redundant_equals.reference
 create mode 100644 tests/queries/0_stateless/03032_redundant_equals.sql

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 5f08bb9035e6..546959c4d9c0 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -19,6 +19,19 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+using namespace std::literals;
+static constexpr std::array boolean_functions{
+    "equals"sv,   "notEquals"sv,   "less"sv,   "greaterOrEquals"sv, "greater"sv,      "lessOrEquals"sv,    "in"sv,     "notIn"sv,
+    "globalIn"sv, "globalNotIn"sv, "nullIn"sv, "notNullIn"sv,       "globalNullIn"sv, "globalNullNotIn"sv, "isNull"sv, "isNotNull"sv,
+    "like"sv,     "notLike"sv,     "ilike"sv,  "notILike"sv,        "empty"sv,        "notEmpty"sv,        "not"sv,    "and"sv,
+    "or"sv};
+
+static bool isBooleanFunction(const String & func_name)
+{
+    return std::any_of(
+        boolean_functions.begin(), boolean_functions.end(), [&](const auto boolean_func) { return func_name == boolean_func; });
+}
+
 /// Visitor that optimizes logical expressions _only_ in JOIN ON section
 class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<JoinOnLogicalExpressionOptimizerVisitor>
 {
@@ -253,6 +266,12 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont
             tryOptimizeAndEqualsNotEqualsChain(node);
             return;
         }
+
+        if (function_node->getFunctionName() == "equals")
+        {
+            tryOptimizeOutRedundantEquals(node);
+            return;
+        }
     }
 
 private:
@@ -552,6 +571,63 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont
         function_node.getArguments().getNodes() = std::move(or_operands);
         function_node.resolveAsFunction(or_function_resolver);
     }
+
+    void tryOptimizeOutRedundantEquals(QueryTreeNodePtr & node)
+    {
+        auto & function_node = node->as<FunctionNode &>();
+        assert(function_node.getFunctionName() == "equals");
+
+        bool lhs_const;
+        bool maybe_invert;
+
+        const ConstantNode * constant;
+        const FunctionNode * child_function;
+
+        const auto function_arguments = function_node.getArguments().getNodes();
+        if (function_arguments.size() != 2)
+            return;
+
+        const auto & lhs = function_arguments[0];
+        const auto & rhs = function_arguments[1];
+
+        if ((constant = lhs->as<ConstantNode>()))
+            lhs_const = true;
+        else if ((constant = rhs->as<ConstantNode>()))
+            lhs_const = false;
+        else
+            return;
+
+        UInt64 val;
+        if (!constant->getValue().tryGet<UInt64>(val))
+            return;
+
+        if (val == 1)
+            maybe_invert = false;
+        else if (val == 0)
+            maybe_invert = true;
+        else
+            return;
+
+        if (lhs_const)
+            child_function = rhs->as<FunctionNode>();
+        else
+            child_function = lhs->as<FunctionNode>();
+
+        if (!child_function || !isBooleanFunction(child_function->getFunctionName()))
+            return;
+        if (maybe_invert)
+        {
+            auto not_resolver = FunctionFactory::instance().get("not", getContext());
+            const auto not_node = std::make_shared<FunctionNode>("not");
+            auto & arguments = not_node->getArguments().getNodes();
+            arguments.reserve(1);
+            arguments.push_back(lhs_const ? rhs : lhs);
+            not_node->resolveAsFunction(not_resolver->build(not_node->getArgumentColumns()));
+            node = not_node;
+        }
+        else
+            node = lhs_const ? rhs : lhs;
+    }
 };
 
 void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
index 7f8853232696..e3d9cf8a370a 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
@@ -96,6 +96,18 @@ namespace DB
  *
  * SELECT * FROM t1 JOIN t2 ON a <=> b
  * -------------------------------
+ *
+ * 7. Remove redundant equality checks on boolean functions.
+ *  - these requndant checks cause the primary index to not be used when if the query involves any primary key columns  
+ * -------------------------------
+ * SELECT * FROM t1 WHERE a IN (n) = 1 
+ * SELECT * FROM t1 WHERE a IN (n) = 0
+ * 
+ * will be transformed into 
+ * 
+ * SELECT * FROM t1 WHERE a IN (n) 
+ * SELECT * FROM t1 WHERE NOT a IN (n) 
+ * -------------------------------
  */
 
 class LogicalExpressionOptimizerPass final : public IQueryTreePass
diff --git a/tests/queries/0_stateless/03032_redundant_equals.reference b/tests/queries/0_stateless/03032_redundant_equals.reference
new file mode 100644
index 000000000000..d477c98b6048
--- /dev/null
+++ b/tests/queries/0_stateless/03032_redundant_equals.reference
@@ -0,0 +1,23 @@
+100
+100
+100
+100
+100
+100
+0
+0
+0
+1
+100
+101
+100
+101
+100
+101
+100
+1
+1
+1
+1
+1
+1
diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql
new file mode 100644
index 000000000000..afb9c8878661
--- /dev/null
+++ b/tests/queries/0_stateless/03032_redundant_equals.sql
@@ -0,0 +1,83 @@
+DROP TABLE IF EXISTS test_table;
+
+CREATE TABLE test_table
+(
+    k UInt64,
+)
+ENGINE = MergeTree
+ORDER BY k;
+
+INSERT INTO test_table SELECT number FROM numbers(10000000);
+
+SELECT * FROM test_table WHERE k in (100) = 1;
+SELECT * FROM test_table WHERE k = (100) = 1;
+SELECT * FROM test_table WHERE k not in (100) = 0;
+SELECT * FROM test_table WHERE k != (100) = 0;
+SELECT * FROM test_table WHERE 1 = (k = 100);
+SELECT * FROM test_table WHERE 0 = (k not in (100));
+SELECT * FROM test_table WHERE k < 1 = 1;
+SELECT * FROM test_table WHERE k >= 1 = 0;
+SELECT * FROM test_table WHERE k > 1 = 0;
+SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1;
+SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1))) = 0;
+SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1;
+SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1;
+SELECT * FROM test_table WHERE ((k not in (99) = 1) AND (k in (100) = 1)) = 1;
+
+SELECT count()
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM test_table WHERE k in (100) = 1
+)
+WHERE
+    explain LIKE '%Granules: 1/%';
+
+SELECT count()
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM test_table WHERE k >= 1 = 0
+)
+WHERE
+    explain LIKE '%Granules: 1/%';
+
+SELECT count()
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM test_table WHERE k not in (100) = 0
+)
+WHERE
+    explain LIKE '%Granules: 1/%';
+
+SELECT count()
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM test_table WHERE k > 1 = 0
+)
+WHERE
+    explain LIKE '%Granules: 1/%';
+
+SELECT count()
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1))) = 0
+)
+WHERE
+    explain LIKE '%Granules: 1/%';
+
+
+SELECT count()
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1
+)
+WHERE
+    explain LIKE '%Granules: 1/%';
+
+
+DROP TABLE test_table;

From 626b7b12538fb1bb938620710718d9c6273fb44a Mon Sep 17 00:00:00 2001
From: Joshua Hildred <jthildred@gmail.com>
Date: Tue, 9 Apr 2024 14:12:13 -0700
Subject: [PATCH 20/90] Fix style

---
 .../Passes/LogicalExpressionOptimizerPass.h        | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
index e3d9cf8a370a..5f109993f3f1 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.h
@@ -98,15 +98,15 @@ namespace DB
  * -------------------------------
  *
  * 7. Remove redundant equality checks on boolean functions.
- *  - these requndant checks cause the primary index to not be used when if the query involves any primary key columns  
+ *  - these requndant checks cause the primary index to not be used when if the query involves any primary key columns
  * -------------------------------
- * SELECT * FROM t1 WHERE a IN (n) = 1 
+ * SELECT * FROM t1 WHERE a IN (n) = 1
  * SELECT * FROM t1 WHERE a IN (n) = 0
- * 
- * will be transformed into 
- * 
- * SELECT * FROM t1 WHERE a IN (n) 
- * SELECT * FROM t1 WHERE NOT a IN (n) 
+ *
+ * will be transformed into
+ *
+ * SELECT * FROM t1 WHERE a IN (n)
+ * SELECT * FROM t1 WHERE NOT a IN (n)
  * -------------------------------
  */
 

From dedc25fd341abacd6c9d8719aabb8feb1e824518 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 9 Apr 2024 23:43:40 +0200
Subject: [PATCH 21/90] fix

---
 src/Client/Connection.cpp      | 16 ++++++++++------
 src/Client/Connection.h        |  3 ++-
 src/Client/IServerConnection.h |  2 +-
 src/Client/LocalConnection.h   |  2 +-
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index e5ac7ad66b9d..f791a77a261d 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -214,7 +214,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
         DNSResolver::instance().removeHostFromCache(host);
 
         /// Add server address to exception. Exception will preserve stack trace.
-        e.addMessage("({})", getDescription());
+        e.addMessage("({})", getDescription(/*with_extra*/ true));
         throw;
     }
     catch (Poco::Net::NetException & e)
@@ -225,7 +225,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
         DNSResolver::instance().removeHostFromCache(host);
 
         /// Add server address to exception. Also Exception will remember new stack trace. It's a pity that more precise exception type is lost.
-        throw NetException(ErrorCodes::NETWORK_ERROR, "{} ({})", e.displayText(), getDescription());
+        throw NetException(ErrorCodes::NETWORK_ERROR, "{} ({})", e.displayText(), getDescription(/*with_extra*/ true));
     }
     catch (Poco::TimeoutException & e)
     {
@@ -241,7 +241,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
             ErrorCodes::SOCKET_TIMEOUT,
             "{} ({}, connection timeout {} ms)",
             e.displayText(),
-            getDescription(),
+            getDescription(/*with_extra*/ true),
             connection_timeout.totalMilliseconds());
     }
 }
@@ -473,8 +473,10 @@ const String & Connection::getDefaultDatabase() const
     return default_database;
 }
 
-const String & Connection::getDescription() const
+const String & Connection::getDescription(bool with_extra) const
 {
+    if (with_extra)
+        return full_description;
     return description;
 }
 
@@ -1227,10 +1229,12 @@ void Connection::setDescription()
             description += ", " + ip_address;
     }
 
+    full_description = description;
+
     if (const auto * socket_ = getSocket())
     {
-        description += ", local address: ";
-        description += socket_->address().toString();
+        full_description += ", local address: ";
+        full_description += socket_->address().toString();
     }
 }
 
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index 5d0411027a1c..20c66caa7448 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -89,7 +89,7 @@ class Connection : public IServerConnection
     const String & getServerDisplayName(const ConnectionTimeouts & timeouts) override;
 
     /// For log and exception messages.
-    const String & getDescription() const override;
+    const String & getDescription(bool with_extra = false) const override;
     const String & getHost() const;
     UInt16 getPort() const;
     const String & getDefaultDatabase() const;
@@ -187,6 +187,7 @@ class Connection : public IServerConnection
 
     /// For messages in log and in exceptions.
     String description;
+    String full_description;
     void setDescription();
 
     /// Returns resolved address if it was resolved.
diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h
index a0c029c79fb0..724afa95d7a6 100644
--- a/src/Client/IServerConnection.h
+++ b/src/Client/IServerConnection.h
@@ -88,7 +88,7 @@ class IServerConnection : boost::noncopyable
     virtual const String & getServerTimezone(const ConnectionTimeouts & timeouts) = 0;
     virtual const String & getServerDisplayName(const ConnectionTimeouts & timeouts) = 0;
 
-    virtual const String & getDescription() const = 0;
+    virtual const String & getDescription(bool with_extra = false) const = 0;
 
     virtual std::vector<std::pair<String, String>> getPasswordComplexityRules() const = 0;
 
diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h
index 9c2d0a81d8d5..6218fbe341f4 100644
--- a/src/Client/LocalConnection.h
+++ b/src/Client/LocalConnection.h
@@ -90,7 +90,7 @@ class LocalConnection : public IServerConnection, WithContext
     const String & getServerTimezone(const ConnectionTimeouts & timeouts) override;
     const String & getServerDisplayName(const ConnectionTimeouts & timeouts) override;
 
-    const String & getDescription() const override { return description; }
+    const String & getDescription([[maybe_unused]] bool with_extra = false) const override { return description; }
 
     std::vector<std::pair<String, String>> getPasswordComplexityRules() const override { return {}; }
 

From 52635d2b8fb35bf8c2b69822ca47c9b672d7d8e4 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 9 Apr 2024 17:03:15 +0000
Subject: [PATCH 22/90] add profile events for azure disk

---
 src/Common/ProfileEvents.cpp                  | 24 ++++--
 .../IO/ReadBufferFromAzureBlobStorage.cpp     | 26 +++++-
 .../AzureBlobStorage/AzureObjectStorage.cpp   | 80 ++++++++++++-------
 .../AzureBlobStorage/AzureObjectStorage.h     |  3 +
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  3 +
 5 files changed, 97 insertions(+), 39 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 33ccb4e9f025..23eed53509ee 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -403,13 +403,6 @@ The server successfully detected this situation and will download merged part fr
     M(S3PutObject, "Number of S3 API PutObject calls.") \
     M(S3GetObject, "Number of S3 API GetObject calls.") \
     \
-    M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \
-    M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \
-    M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \
-    M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \
-    M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
-    M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \
-    \
     M(DiskS3DeleteObjects, "Number of DiskS3 API DeleteObject(s) calls.") \
     M(DiskS3CopyObject, "Number of DiskS3 API CopyObject calls.") \
     M(DiskS3ListObjects, "Number of DiskS3 API ListObjects calls.") \
@@ -441,6 +434,23 @@ The server successfully detected this situation and will download merged part fr
     M(WriteBufferFromS3WaitInflightLimitMicroseconds, "Time spent on waiting while some of the current requests are done when its number reached the limit defined by s3_max_inflight_parts_for_one_file.") \
     M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \
     \
+    M(AzureGetObject, "Number of Azure API GetObject calls.") \
+    M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \
+    M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \
+    M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
+    M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \
+    \
+    M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \
+    M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \
+    M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \
+    M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \
+    M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
+    \
+    M(ReadBufferFromAzureMicroseconds, "Time spent on reading from Azure.") \
+    M(ReadBufferFromAzureInitMicroseconds, "Time spent initializing connection to Azure.") \
+    M(ReadBufferFromAzureBytes, "Bytes read from Azure.") \
+    M(ReadBufferFromAzureRequestsErrors, "Number of exceptions while reading from Azure") \
+    \
     M(CachedReadBufferReadFromCacheHits, "Number of times the read from filesystem cache hit the cache.") \
     M(CachedReadBufferReadFromCacheMisses, "Number of times the read from filesystem cache miss the cache.") \
     M(CachedReadBufferReadFromSourceMicroseconds, "Time reading from filesystem cache source (from remote filesystem, etc)") \
diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp
index 5947b742339e..48b40f8f8c68 100644
--- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp
+++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp
@@ -3,6 +3,7 @@
 #if USE_AZURE_BLOB_STORAGE
 
 #include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
 #include <IO/ReadBufferFromString.h>
 #include <Common/logger_useful.h>
 #include <Common/Throttler.h>
@@ -14,6 +15,12 @@ namespace ProfileEvents
 {
     extern const Event RemoteReadThrottlerBytes;
     extern const Event RemoteReadThrottlerSleepMicroseconds;
+    extern const Event ReadBufferFromAzureMicroseconds;
+    extern const Event ReadBufferFromAzureBytes;
+    extern const Event ReadBufferFromAzureRequestsErrors;
+    extern const Event AzureGetObject;
+    extern const Event DiskAzureGetObject;
+    extern const Event ReadBufferFromAzureInitMicroseconds;
 }
 
 namespace DB
@@ -67,7 +74,6 @@ void ReadBufferFromAzureBlobStorage::setReadUntilEnd()
             initialized = false;
         }
     }
-
 }
 
 void ReadBufferFromAzureBlobStorage::setReadUntilPosition(size_t position)
@@ -103,7 +109,9 @@ bool ReadBufferFromAzureBlobStorage::nextImpl()
 
     auto handle_exception = [&, this](const auto & e, size_t i)
     {
+        ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureRequestsErrors);
         LOG_DEBUG(log, "Exception caught during Azure Read for file {} at attempt {}/{}: {}", path, i + 1, max_single_read_retries, e.Message);
+
         if (i + 1 == max_single_read_retries)
             throw;
 
@@ -115,6 +123,7 @@ bool ReadBufferFromAzureBlobStorage::nextImpl()
 
     for (size_t i = 0; i < max_single_read_retries; ++i)
     {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromAzureMicroseconds);
         try
         {
             bytes_read = data_stream->ReadToCount(reinterpret_cast<uint8_t *>(data_ptr), to_read_bytes);
@@ -131,6 +140,7 @@ bool ReadBufferFromAzureBlobStorage::nextImpl()
     if (bytes_read == 0)
         return false;
 
+    ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureBytes, bytes_read);
     BufferBase::set(data_ptr, bytes_read, 0);
     offset += bytes_read;
 
@@ -215,7 +225,9 @@ void ReadBufferFromAzureBlobStorage::initialize()
 
     auto handle_exception = [&, this](const auto & e, size_t i)
     {
+        ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureRequestsErrors);
         LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message);
+
         if (i + 1 == max_single_download_retries)
             throw;
 
@@ -225,8 +237,14 @@ void ReadBufferFromAzureBlobStorage::initialize()
 
     for (size_t i = 0; i < max_single_download_retries; ++i)
     {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromAzureInitMicroseconds);
+
         try
         {
+            ProfileEvents::increment(ProfileEvents::AzureGetObject);
+            if (read_settings.for_object_storage)
+                ProfileEvents::increment(ProfileEvents::DiskAzureGetObject);
+
             auto download_response = blob_client->Download(download_options);
             data_stream = std::move(download_response.Value.BodyStream);
             break;
@@ -266,6 +284,8 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran
     for (size_t i = 0; i < max_single_download_retries && n > 0; ++i)
     {
         size_t bytes_copied = 0;
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromAzureMicroseconds);
+
         try
         {
             Azure::Storage::Blobs::DownloadBlobOptions download_options;
@@ -282,7 +302,9 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran
         }
         catch (const Azure::Core::RequestFailedException & e)
         {
+            ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureRequestsErrors);
             LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message);
+
             if (i + 1 == max_single_download_retries)
                 throw;
 
@@ -290,6 +312,8 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran
             sleep_time_with_backoff_milliseconds *= 2;
         }
 
+        ProfileEvents::increment(ProfileEvents::ReadBufferFromAzureBytes, bytes_copied);
+
         range_begin += bytes_copied;
         to += bytes_copied;
         n -= bytes_copied;
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index e0614613c3fb..fb3a35301c0b 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -22,6 +22,14 @@ namespace CurrentMetrics
     extern const Metric ObjectStorageAzureThreadsScheduled;
 }
 
+namespace ProfileEvents
+{
+    extern const Event AzureListObjects;
+    extern const Event DiskAzureListObjects;
+    extern const Event AzureDeleteObjects;
+    extern const Event DiskAzureDeleteObjects;
+}
+
 namespace DB
 {
 
@@ -58,6 +66,9 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
 private:
     bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override
     {
+        ProfileEvents::increment(ProfileEvents::AzureListObjects);
+        ProfileEvents::increment(ProfileEvents::DiskAzureListObjects);
+
         batch.clear();
         auto outcome = client->ListBlobs(options);
         auto blob_list_response = client->ListBlobs(options);
@@ -116,6 +127,9 @@ bool AzureObjectStorage::exists(const StoredObject & object) const
     options.Prefix = object.remote_path;
     options.PageSizeHint = 1;
 
+    ProfileEvents::increment(ProfileEvents::AzureListObjects);
+    ProfileEvents::increment(ProfileEvents::DiskAzureListObjects);
+
     auto blobs_list_response = client_ptr->ListBlobs(options);
     auto blobs_list = blobs_list_response.Blobs;
 
@@ -147,10 +161,14 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
         options.PageSizeHint = max_keys;
     else
         options.PageSizeHint = settings.get()->list_object_keys_size;
+
     Azure::Storage::Blobs::ListBlobsPagedResponse blob_list_response;
 
     while (true)
     {
+        ProfileEvents::increment(ProfileEvents::AzureListObjects);
+        ProfileEvents::increment(ProfileEvents::DiskAzureListObjects);
+
         blob_list_response = client_ptr->ListBlobs(options);
         auto blobs_list = blob_list_response.Blobs;
 
@@ -270,59 +288,59 @@ std::unique_ptr<WriteBufferFromFileBase> AzureObjectStorage::writeObject( /// NO
         settings.get());
 }
 
-/// Remove file. Throws exception if file doesn't exists or it's a directory.
-void AzureObjectStorage::removeObject(const StoredObject & object)
+void AzureObjectStorage::removeObjectImpl(const StoredObject & object, const SharedAzureClientPtr & client_ptr, bool if_exists)
 {
+    ProfileEvents::increment(ProfileEvents::AzureDeleteObjects);
+    ProfileEvents::increment(ProfileEvents::DiskAzureDeleteObjects);
+
     const auto & path = object.remote_path;
     LOG_TEST(log, "Removing single object: {}", path);
-    auto client_ptr = client.get();
-    auto delete_info = client_ptr->DeleteBlob(path);
-    if (!delete_info.Value.Deleted)
-        throw Exception(
-            ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}",
-            path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown");
-}
 
-void AzureObjectStorage::removeObjects(const StoredObjects & objects)
-{
-    auto client_ptr = client.get();
-    for (const auto & object : objects)
+    try
     {
-        LOG_TEST(log, "Removing object: {} (total: {})", object.remote_path, objects.size());
-        auto delete_info = client_ptr->DeleteBlob(object.remote_path);
-        if (!delete_info.Value.Deleted)
+        auto delete_info = client_ptr->DeleteBlob(path);
+        if (!if_exists && !delete_info.Value.Deleted)
             throw Exception(
                 ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}",
-                object.remote_path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown");
-    }
-}
-
-void AzureObjectStorage::removeObjectIfExists(const StoredObject & object)
-{
-    auto client_ptr = client.get();
-    try
-    {
-        LOG_TEST(log, "Removing single object: {}", object.remote_path);
-        auto delete_info = client_ptr->DeleteBlob(object.remote_path);
+                path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown");
     }
     catch (const Azure::Storage::StorageException & e)
     {
+        if (!if_exists)
+            throw;
+
         /// If object doesn't exist...
         if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
             return;
+
         tryLogCurrentException(__PRETTY_FUNCTION__);
         throw;
     }
 }
 
-void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
+/// Remove file. Throws exception if file doesn't exists or it's a directory.
+void AzureObjectStorage::removeObject(const StoredObject & object)
+{
+    removeObjectImpl(object, client.get(), false);
+}
+
+void AzureObjectStorage::removeObjects(const StoredObjects & objects)
 {
     auto client_ptr = client.get();
     for (const auto & object : objects)
-    {
-        removeObjectIfExists(object);
-    }
+        removeObjectImpl(object, client_ptr, false);
+}
 
+void AzureObjectStorage::removeObjectIfExists(const StoredObject & object)
+{
+    removeObjectImpl(object, client.get(), true);
+}
+
+void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
+{
+    auto client_ptr = client.get();
+    for (const auto & object : objects)
+        removeObjectImpl(object, client_ptr, true);
 }
 
 
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index b05fc7afc96c..f52ab803012b 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -164,6 +164,9 @@ class AzureObjectStorage : public IObjectStorage
     }
 
 private:
+    using SharedAzureClientPtr = std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient>;
+    void removeObjectImpl(const StoredObject & object, const SharedAzureClientPtr & client_ptr, bool if_exists);
+
     const String name;
     /// client used to access the files in the Blob Storage cloud
     MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index b343b73f7bd6..c4737f1a5ae4 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -120,6 +120,7 @@ class S3IteratorAsync final : public IObjectStorageIteratorAsync
     bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) override
     {
         ProfileEvents::increment(ProfileEvents::S3ListObjects);
+        ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
 
         bool result = false;
         auto outcome = client->ListObjectsV2(request);
@@ -292,6 +293,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
     {
         ProfileEvents::increment(ProfileEvents::S3ListObjects);
         ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
+
         outcome = client.get()->ListObjectsV2(request);
         throwIfError(outcome);
 
@@ -325,6 +327,7 @@ void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exis
 {
     ProfileEvents::increment(ProfileEvents::S3DeleteObjects);
     ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects);
+
     S3::DeleteObjectRequest request;
     request.SetBucket(uri.bucket);
     request.SetKey(object.remote_path);

From aba3bbaeb63d31bdfef02bfd0d734dc6f35a9409 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 4 Apr 2024 21:41:32 +0200
Subject: [PATCH 23/90] Replace Tcl version with Python

---
 ...1676_clickhouse_client_autocomplete.python | 121 +++++++++++++++
 .../01676_clickhouse_client_autocomplete.sh   | 138 +-----------------
 2 files changed, 123 insertions(+), 136 deletions(-)
 create mode 100644 tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python

diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
new file mode 100644
index 000000000000..02198eb77c33
--- /dev/null
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
@@ -0,0 +1,121 @@
+import pty
+import os
+import shlex
+import time
+import multiprocessing
+
+COMPLETION_TIMEOUT_SECONDS = 10
+
+
+def run_with_timeout(func, args, timeout):
+    process = multiprocessing.Process(target=func, args=args)
+    process.start()
+    process.join(timeout)
+
+    if process.is_alive():
+        process.terminate()
+        print("Timeout")
+
+
+def test_completion(program, argv, comp_word):
+    comp_begin = comp_word[:-3]
+
+    shell_pid, master = pty.fork()
+    if shell_pid == 0:
+        os.execv(program, argv)
+    else:
+        try:
+            output = os.read(master, 4096).decode()
+            while not ":)" in output:
+                output += os.read(master, 4096).decode()
+
+            os.write(master, b"SET " + bytes(comp_begin.encode()))
+            output = os.read(master, 4096).decode()
+            while not comp_begin in output:
+                output += os.read(master, 4096).decode()
+
+            time.sleep(0.15)
+            os.write(master, b"\t")
+
+            output = os.read(master, 4096).decode()
+            # fail fast if there is a bell character in the output,
+            # meaning no concise completion is found
+            if "\x07" in output:
+                print(f"{comp_word}: FAIL")
+                return
+
+            while not comp_word in output:
+                output += os.read(master, 4096).decode()
+
+            print(f"{comp_word}: OK")
+        finally:
+            os.close(master)
+
+
+client_compwords_positive = [
+    # system.functions
+    "concatAssumeInjective",
+    # system.table_engines
+    "ReplacingMergeTree",
+    # system.formats
+    "JSONEachRow",
+    # system.table_functions
+    "clusterAllReplicas",
+    # system.data_type_families
+    "SimpleAggregateFunction",
+    # system.settings
+    "max_concurrent_queries_for_all_users",
+    # system.clusters
+    "test_shard_localhost",
+    # system.macros
+    "default_path_test",
+    # system.storage_policies, egh not uniq
+    "default",
+    # system.aggregate_function_combinators
+    "uniqCombined64ForEach",
+    # FIXME: one may add separate case for suggestion_limit
+    # system.databases
+    "system",
+    # system.tables
+    "aggregate_function_combinators",
+    # system.columns
+    "primary_key_bytes_in_memory_allocated",
+    # system.dictionaries
+    # FIXME: none
+    "definitely_broken_and_should_fail",
+]
+
+local_compwords_positive = [
+    # system.functions
+    "concatAssumeInjective",
+    # system.table_engines
+    "ReplacingMergeTree",
+    # system.formats
+    "JSONEachRow",
+    # system.table_functions
+    "clusterAllReplicas",
+    # system.data_type_families
+    "SimpleAggregateFunction",
+]
+
+
+if __name__ == "__main__":
+    print("# clickhouse-client")
+    clickhouse_client = os.environ["CLICKHOUSE_CLIENT"]
+    args = shlex.split(clickhouse_client)
+    [
+        run_with_timeout(
+            test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS
+        )
+        for comp_word in client_compwords_positive
+    ]
+
+    print("# clickhouse-local")
+    clickhouse_local = os.environ["CLICKHOUSE_LOCAL"]
+    args = shlex.split(clickhouse_local)
+    [
+        run_with_timeout(
+            test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS
+        )
+        for comp_word in local_compwords_positive
+    ]
diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
index ebd6490077e4..88f18a5bb018 100755
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
@@ -1,142 +1,8 @@
 #!/usr/bin/env bash
-# Tags: long, no-ubsan
+# Tags: long
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-SCRIPT_PATH="$CURDIR/$CLICKHOUSE_TEST_UNIQUE_NAME.generated-expect"
-
-# NOTE: database = $CLICKHOUSE_DATABASE is superfluous
-
-function test_completion_word()
-{
-    local w=$1 && shift
-
-    local w_len=${#w}
-    local compword_begin=${w:0:$((w_len-3))}
-    local compword_end=${w:$((w_len-3))}
-
-    # NOTE:
-    # - here and below you should escape variables of the expect.
-    # - you should not use "expect <<..." since in this case timeout/eof will
-    #   not work (I guess due to attached stdin)
-
-    # TODO: get build sanitizer and debug/release info to dynamically change test
-    # like here timeout 120 seconds is too big for release build
-    # but ok for sanitizer builds
-    cat > "$SCRIPT_PATH" << EOF
-# NOTE: log will be appended
-exp_internal -f $CLICKHOUSE_TMP/$(basename "${BASH_SOURCE[0]}").debuglog 0
-
-# NOTE: when expect have EOF on stdin it also closes stdout, so let's reopen it
-# again for logging
-set stdout_channel [open "/dev/stdout" w]
-
-log_user 0
-set timeout 120
-match_max 100000
-expect_after {
-    # Do not ignore eof from expect
-    -i \$any_spawn_id eof { exp_continue }
-    # A default timeout action is to do nothing, change it to fail
-    -i \$any_spawn_id timeout { exit 1 }
-}
-
-spawn bash -c "$*"
-expect ":) "
-
-# Make a query
-send -- "SET $compword_begin"
-expect "SET $compword_begin"
-
-# Wait for suggestions to load, they are loaded in background
-set is_done 0
-set timeout 1
-while {\$is_done == 0} {
-    send -- "\\t"
-    expect {
-        "$compword_begin$compword_end" {
-            puts \$stdout_channel "$compword_begin$compword_end: OK"
-            set is_done 1
-        }
-        default {
-            sleep 1
-        }
-    }
-}
-
-close \$stdout_channel
-
-send -- "\\3\\4"
-expect eof
-EOF
-
-    # NOTE: run expect under timeout since there is while loop that is not
-    # limited with timeout.
-    #
-    # NOTE: cat is required to serialize stdout for expect (without this pipe
-    # it will reopen the file again, and the output will be mixed).
-    timeout 2m expect -f "$SCRIPT_PATH" | cat
-}
-
-# last 3 bytes will be completed,
-# so take this in mind when you will update the list.
-client_compwords_positive=(
-    # system.functions
-    concatAssumeInjective
-    # system.table_engines
-    ReplacingMergeTree
-    # system.formats
-    JSONEachRow
-    # system.table_functions
-    clusterAllReplicas
-    # system.data_type_families
-    SimpleAggregateFunction
-    # system.settings
-    max_concurrent_queries_for_all_users
-    # system.clusters
-    test_shard_localhost
-    # system.macros
-    default_path_test
-    # system.storage_policies, egh not uniq
-    default
-    # system.aggregate_function_combinators
-    uniqCombined64ForEach
-
-    # FIXME: one may add separate case for suggestion_limit
-    # system.databases
-    system
-    # system.tables
-    aggregate_function_combinators
-    # system.columns
-    primary_key_bytes_in_memory_allocated
-    # system.dictionaries
-    # FIXME: none
-)
-
-local_compwords_positive=(
-    # system.functions
-    concatAssumeInjective
-    # system.table_engines
-    ReplacingMergeTree
-    # system.formats
-    JSONEachRow
-    # system.table_functions
-    clusterAllReplicas
-    # system.data_type_families
-    SimpleAggregateFunction
-)
-
-echo "# clickhouse-client"
-for w in "${client_compwords_positive[@]}"; do
-    test_completion_word "$w" "$CLICKHOUSE_CLIENT"
-done
-echo "# clickhouse-local"
-for w in "${local_compwords_positive[@]}"; do
-    test_completion_word "$w" "$CLICKHOUSE_LOCAL"
-done
-
-rm -f "${SCRIPT_PATH:?}"
-
-exit 0
+python3 "$CURDIR"/01676_clickhouse_client_autocomplete.python

From c8598bdb5448af64bf61aab78c89096dfa4a42ec Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Mon, 8 Apr 2024 20:54:28 +0200
Subject: [PATCH 24/90] Add a flag to load suggestion data synchronously

---
 programs/client/Client.cpp                               | 1 +
 src/Client/ClientBase.cpp                                | 7 +++++--
 src/Client/ClientBase.h                                  | 1 +
 src/Client/Suggest.cpp                                   | 9 ++++++---
 src/Client/Suggest.h                                     | 2 +-
 .../01676_clickhouse_client_autocomplete.python          | 4 +++-
 6 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 72cad1dac076..e27a4f0f529b 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -482,6 +482,7 @@ void Client::connect()
 
     server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch);
     load_suggestions = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION) && !config().getBool("disable_suggestion", false);
+    wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false);
 
     if (server_display_name = connection->getServerDisplayName(connection_parameters.timeouts); server_display_name.empty())
         server_display_name = config().getString("host", "localhost");
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index db910de07f31..7ad8383d4605 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -2474,9 +2474,9 @@ void ClientBase::runInteractive()
     {
         /// Load suggestion data from the server.
         if (global_context->getApplicationType() == Context::ApplicationType::CLIENT)
-            suggest->load<Connection>(global_context, connection_parameters, config().getInt("suggestion_limit"));
+            suggest->load<Connection>(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load);
         else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL)
-            suggest->load<LocalConnection>(global_context, connection_parameters, config().getInt("suggestion_limit"));
+            suggest->load<LocalConnection>(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load);
     }
 
     if (home_path.empty())
@@ -2972,6 +2972,7 @@ void ClientBase::init(int argc, char ** argv)
         ("progress", po::value<ProgressOption>()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::DEFAULT, "default"), "Print progress of queries execution - to TTY: tty|on|1|true|yes; to STDERR non-interactive mode: err; OFF: off|0|false|no; DEFAULT - interactive to TTY, non-interactive is off")
 
         ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.")
+        ("wait_for_suggestions_to_load", "Load suggestion data synchonously.")
         ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)")
 
         ("echo", "in batch mode, print query before execution")
@@ -3101,6 +3102,8 @@ void ClientBase::init(int argc, char ** argv)
         config().setBool("echo", true);
     if (options.count("disable_suggestion"))
         config().setBool("disable_suggestion", true);
+    if (options.count("wait_for_suggestions_to_load"))
+        config().setBool("wait_for_suggestions_to_load", true);
     if (options.count("suggestion_limit"))
         config().setInt("suggestion_limit", options["suggestion_limit"].as<int>());
     if (options.count("highlight"))
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index 9ec87ababfc9..dc5c65530462 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -209,6 +209,7 @@ class ClientBase : public Poco::Util::Application, public IHints<2>
 
     std::optional<Suggest> suggest;
     bool load_suggestions = false;
+    bool wait_for_suggestions_to_load = false;
 
     std::vector<String> queries; /// Queries passed via '--query'
     std::vector<String> queries_files; /// If not empty, queries will be read from these files
diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp
index 03df582de10a..f63dbc64d271 100644
--- a/src/Client/Suggest.cpp
+++ b/src/Client/Suggest.cpp
@@ -110,7 +110,7 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti
 }
 
 template <typename ConnectionType>
-void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit)
+void Suggest::load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit, bool wait_for_load)
 {
     loading_thread = std::thread([my_context = Context::createCopy(context), connection_parameters, suggestion_limit, this]
     {
@@ -152,6 +152,9 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p
 
         /// Note that keyword suggestions are available even if we cannot load data from server.
     });
+
+    if (wait_for_load)
+        loading_thread.join();
 }
 
 void Suggest::load(IServerConnection & connection,
@@ -228,8 +231,8 @@ void Suggest::fillWordsFromBlock(const Block & block)
 }
 
 template
-void Suggest::load<Connection>(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit);
+void Suggest::load<Connection>(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit, bool wait_for_load);
 
 template
-void Suggest::load<LocalConnection>(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit);
+void Suggest::load<LocalConnection>(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit, bool wait_for_load);
 }
diff --git a/src/Client/Suggest.h b/src/Client/Suggest.h
index 5cecdc4501b0..aac8a73f7020 100644
--- a/src/Client/Suggest.h
+++ b/src/Client/Suggest.h
@@ -27,7 +27,7 @@ class Suggest : public LineReader::Suggest, boost::noncopyable
 
     /// Load suggestions for clickhouse-client.
     template <typename ConnectionType>
-    void load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit);
+    void load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit, bool wait_for_load);
 
     void load(IServerConnection & connection,
               const ConnectionTimeouts & timeouts,
diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
index 02198eb77c33..5433a8d4199e 100644
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
@@ -34,7 +34,7 @@ def test_completion(program, argv, comp_word):
             while not comp_begin in output:
                 output += os.read(master, 4096).decode()
 
-            time.sleep(0.15)
+            time.sleep(0.25)
             os.write(master, b"\t")
 
             output = os.read(master, 4096).decode()
@@ -103,6 +103,7 @@ if __name__ == "__main__":
     print("# clickhouse-client")
     clickhouse_client = os.environ["CLICKHOUSE_CLIENT"]
     args = shlex.split(clickhouse_client)
+    args.append("--wait_for_suggestions_to_load")
     [
         run_with_timeout(
             test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS
@@ -112,6 +113,7 @@ if __name__ == "__main__":
 
     print("# clickhouse-local")
     clickhouse_local = os.environ["CLICKHOUSE_LOCAL"]
+    args.append("--wait_for_suggestions_to_load")
     args = shlex.split(clickhouse_local)
     [
         run_with_timeout(

From afb52b6369e94b1143d83fc8fc41575eb3289b10 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Mon, 8 Apr 2024 23:58:53 +0200
Subject: [PATCH 25/90] Undo breaking statement

---
 .../0_stateless/01676_clickhouse_client_autocomplete.python      | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
index 5433a8d4199e..b4380dc71473 100644
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
@@ -82,7 +82,6 @@ client_compwords_positive = [
     "primary_key_bytes_in_memory_allocated",
     # system.dictionaries
     # FIXME: none
-    "definitely_broken_and_should_fail",
 ]
 
 local_compwords_positive = [

From 414b0289310f8830b26f5f59bd74cc60c3b2d8bb Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Tue, 9 Apr 2024 21:51:27 +0200
Subject: [PATCH 26/90] Support synchronous completions in local server too

---
 programs/local/LocalServer.cpp                                 | 3 +++
 .../0_stateless/01676_clickhouse_client_autocomplete.python    | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 72920fbd8559..8f5afdb90223 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -560,6 +560,7 @@ void LocalServer::processConfig()
     const std::string clickhouse_dialect{"clickhouse"};
     load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false)
         && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
+    wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false);
 
     auto logging = (config().has("logger.console")
                     || config().has("logger.level")
@@ -835,6 +836,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
         config().setString("logger.level", options["logger.level"].as<std::string>());
     if (options.count("send_logs_level"))
         config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
+    if (options.count("wait_for_suggestions_to_load"))
+        config().setBool("wait_for_suggestions_to_load", true);
 }
 
 void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
index b4380dc71473..e62c35cd17d3 100644
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
@@ -112,8 +112,8 @@ if __name__ == "__main__":
 
     print("# clickhouse-local")
     clickhouse_local = os.environ["CLICKHOUSE_LOCAL"]
-    args.append("--wait_for_suggestions_to_load")
     args = shlex.split(clickhouse_local)
+    args.append("--wait_for_suggestions_to_load")
     [
         run_with_timeout(
             test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS

From fb0c28a5b31154903d43c3ef48033cb9c29509a2 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 10 Apr 2024 09:18:23 +0200
Subject: [PATCH 27/90] Add debug logging

---
 ...1676_clickhouse_client_autocomplete.python | 33 ++++++++++++++-----
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
index e62c35cd17d3..9f0354ff961e 100644
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
@@ -4,8 +4,8 @@ import shlex
 import time
 import multiprocessing
 
-COMPLETION_TIMEOUT_SECONDS = 10
-
+COMPLETION_TIMEOUT_SECONDS = 30
+DEBUG_LOG = os.path.join(os.environ["CLICKHOUSE_TMP"], os.path.basename(os.path.abspath(__file__)) + ".debuglog")
 
 def run_with_timeout(func, args, timeout):
     process = multiprocessing.Process(target=func, args=args)
@@ -25,19 +25,31 @@ def test_completion(program, argv, comp_word):
         os.execv(program, argv)
     else:
         try:
-            output = os.read(master, 4096).decode()
+            debug_log_fd = open(DEBUG_LOG, "a")
+
+            output_b = os.read(master, 4096)
+            output = output_b.decode()
+            debug_log_fd.write(repr(output_b) + "\n")
             while not ":)" in output:
-                output += os.read(master, 4096).decode()
+                output_b = os.read(master, 4096)
+                output += output_b.decode()
+                debug_log_fd.write(repr(output_b) + "\n")
 
             os.write(master, b"SET " + bytes(comp_begin.encode()))
-            output = os.read(master, 4096).decode()
+            output_b = os.read(master, 4096)
+            output = output_b.decode()
+            debug_log_fd.write(repr(output_b) + "\n")
             while not comp_begin in output:
-                output += os.read(master, 4096).decode()
+                output_b = os.read(master, 4096)
+                output += output_b.decode()
+                debug_log_fd.write(repr(output_b) + "\n")
 
-            time.sleep(0.25)
+            time.sleep(0.01)
             os.write(master, b"\t")
 
-            output = os.read(master, 4096).decode()
+            output_b = os.read(master, 4096)
+            output = output_b.decode()
+            debug_log_fd.write(repr(output_b) + "\n")
             # fail fast if there is a bell character in the output,
             # meaning no concise completion is found
             if "\x07" in output:
@@ -45,11 +57,14 @@ def test_completion(program, argv, comp_word):
                 return
 
             while not comp_word in output:
-                output += os.read(master, 4096).decode()
+                output_b = os.read(master, 4096)
+                output += output_b.decode()
+                debug_log_fd.write(repr(output_b) + "\n")
 
             print(f"{comp_word}: OK")
         finally:
             os.close(master)
+            debug_log_fd.close()
 
 
 client_compwords_positive = [

From 216a3e2eeb911b63c733bd6b5d22e41cb0f5b491 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Wed, 10 Apr 2024 10:39:22 +0200
Subject: [PATCH 28/90] Reformat

---
 .../0_stateless/01676_clickhouse_client_autocomplete.python | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
index 9f0354ff961e..7bb9209f55ce 100644
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
@@ -5,7 +5,11 @@ import time
 import multiprocessing
 
 COMPLETION_TIMEOUT_SECONDS = 30
-DEBUG_LOG = os.path.join(os.environ["CLICKHOUSE_TMP"], os.path.basename(os.path.abspath(__file__)) + ".debuglog")
+DEBUG_LOG = os.path.join(
+    os.environ["CLICKHOUSE_TMP"],
+    os.path.basename(os.path.abspath(__file__)) + ".debuglog",
+)
+
 
 def run_with_timeout(func, args, timeout):
     process = multiprocessing.Process(target=func, args=args)

From 11d2fbcf49208f8a0ade5fcc911dbf20d5e127ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 10 Apr 2024 18:17:29 +0200
Subject: [PATCH 29/90] Don't use virtual columns to filter if they have been
 overwritten

---
 .../optimizeUseAggregateProjection.cpp        |  5 ++---
 .../QueryPlan/ReadFromMergeTree.cpp           |  3 ++-
 src/Storages/MergeTree/MergeTreeData.cpp      | 21 ++++++++++++-------
 src/Storages/MergeTree/MergeTreeData.h        |  5 +++--
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  5 +++--
 .../MergeTree/MergeTreeDataSelectExecutor.h   |  1 +
 ...virtual_column_override_group_by.reference |  1 +
 ...03093_virtual_column_override_group_by.sql |  2 ++
 8 files changed, 28 insertions(+), 15 deletions(-)
 create mode 100644 tests/queries/0_stateless/03093_virtual_column_override_group_by.reference
 create mode 100644 tests/queries/0_stateless/03093_virtual_column_override_group_by.sql

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index b40fea47b3cc..64111602458f 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -432,13 +432,12 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
 {
     const auto & keys = aggregating.getParams().keys;
     const auto & aggregates = aggregating.getParams().aggregates;
-    Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter();
+    const auto metadata = reading.getStorageMetadata();
+    Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter(metadata);
 
     AggregateProjectionCandidates candidates;
 
     const auto & parts = reading.getParts();
-
-    const auto metadata = reading.getStorageMetadata();
     ContextPtr context = reading.getContext();
 
     const auto & projections = metadata->projections;
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index bee42c3dddec..6bdd060513c9 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1415,7 +1415,8 @@ static void buildIndexes(
         indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */);
     }
 
-    indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
+    indexes->part_values
+        = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(metadata_snapshot, data, parts, filter_actions_dag, context);
     MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(indexes->part_offset_condition, filter_actions_dag, context);
 
     indexes->use_skip_indexes = settings.use_skip_indexes;
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 5d4c3ab078e5..a948d80396a7 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1031,19 +1031,26 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
 
 const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value", "_part_data_version"};
 
-Block MergeTreeData::getHeaderWithVirtualsForFilter() const
+Block MergeTreeData::getHeaderWithVirtualsForFilter(const StorageMetadataPtr & metadata) const
 {
+    const auto columns = metadata->getColumns().getAllPhysical();
     Block header;
     auto virtuals_desc = getVirtualsPtr();
     for (const auto & name : virtuals_useful_for_filter)
+    {
+        if (columns.contains(name))
+            continue;
         if (auto column = virtuals_desc->tryGet(name))
             header.insert({column->type->createColumn(), column->type, name});
+    }
+
     return header;
 }
 
-Block MergeTreeData::getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const
+Block MergeTreeData::getBlockWithVirtualsForFilter(
+    const StorageMetadataPtr & metadata, const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const
 {
-    auto block = getHeaderWithVirtualsForFilter();
+    auto block = getHeaderWithVirtualsForFilter(metadata);
 
     for (const auto & part_or_projection : parts)
     {
@@ -1072,7 +1079,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
         return 0;
 
     auto metadata_snapshot = getInMemoryMetadataPtr();
-    auto virtual_columns_block = getBlockWithVirtualsForFilter({parts[0]});
+    auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});
 
     auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr);
     if (!filter_dag)
@@ -1091,7 +1098,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
     std::unordered_set<String> part_values;
     if (valid)
     {
-        virtual_columns_block = getBlockWithVirtualsForFilter(parts);
+        virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, parts);
         VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context);
         part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
         if (part_values.empty())
@@ -6694,11 +6701,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
     };
 
     Block virtual_columns_block;
-    auto virtual_block = getHeaderWithVirtualsForFilter();
+    auto virtual_block = getHeaderWithVirtualsForFilter(metadata_snapshot);
     bool has_virtual_column = std::any_of(required_columns.begin(), required_columns.end(), [&](const auto & name) { return virtual_block.has(name); });
     if (has_virtual_column || filter_dag)
     {
-        virtual_columns_block = getBlockWithVirtualsForFilter(parts, /*ignore_empty=*/ true);
+        virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, parts, /*ignore_empty=*/true);
         if (virtual_columns_block.rows() == 0)
             return {};
     }
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index d21f87c337ef..85537ce4a24a 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -990,10 +990,11 @@ class MergeTreeData : public IStorage, public WithMutableContext
     static const Names virtuals_useful_for_filter;
 
     /// Construct a sample block of virtual columns.
-    Block getHeaderWithVirtualsForFilter() const;
+    Block getHeaderWithVirtualsForFilter(const StorageMetadataPtr & metadata) const;
 
     /// Construct a block consisting only of possible virtual columns for part pruning.
-    Block getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const;
+    Block getBlockWithVirtualsForFilter(
+        const StorageMetadataPtr & metadata, const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const;
 
     /// In merge tree we do inserts with several steps. One of them:
     /// X. write part to temporary directory with some temp name
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index bcc936c57396..345872efddf9 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -473,6 +473,7 @@ void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(
 }
 
 std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(
+    const StorageMetadataPtr & metadata_snapshot,
     const MergeTreeData & data,
     const MergeTreeData::DataPartsVector & parts,
     const ActionsDAGPtr & filter_dag,
@@ -481,12 +482,12 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
     if (!filter_dag)
         return {};
 
-    auto sample = data.getHeaderWithVirtualsForFilter();
+    auto sample = data.getHeaderWithVirtualsForFilter(metadata_snapshot);
     auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample);
     if (!dag)
         return {};
 
-    auto virtual_columns_block = data.getBlockWithVirtualsForFilter(parts);
+    auto virtual_columns_block = data.getBlockWithVirtualsForFilter(metadata_snapshot, parts);
     VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context);
     return VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index b1afd7e66683..ecccd6d55e39 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -166,6 +166,7 @@ class MergeTreeDataSelectExecutor
     /// Example: SELECT count() FROM table WHERE _part = 'part_name'
     /// If expression found, return a set with allowed part names (std::nullopt otherwise).
     static std::optional<std::unordered_set<String>> filterPartsByVirtualColumns(
+        const StorageMetadataPtr & metadata_snapshot,
         const MergeTreeData & data,
         const MergeTreeData::DataPartsVector & parts,
         const ActionsDAGPtr & filter_dag,
diff --git a/tests/queries/0_stateless/03093_virtual_column_override_group_by.reference b/tests/queries/0_stateless/03093_virtual_column_override_group_by.reference
new file mode 100644
index 000000000000..d00491fd7e5b
--- /dev/null
+++ b/tests/queries/0_stateless/03093_virtual_column_override_group_by.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/03093_virtual_column_override_group_by.sql b/tests/queries/0_stateless/03093_virtual_column_override_group_by.sql
new file mode 100644
index 000000000000..168d38a15b5a
--- /dev/null
+++ b/tests/queries/0_stateless/03093_virtual_column_override_group_by.sql
@@ -0,0 +1,2 @@
+CREATE TABLE override_test__fuzz_45 (`_part` Float32) ENGINE = MergeTree ORDER BY tuple() AS SELECT 1;
+SELECT _part FROM override_test__fuzz_45 GROUP BY materialize(6), 1;

From 12569cc5fe880f9a25728158884db1ac2af00472 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 10 Apr 2024 18:18:47 +0200
Subject: [PATCH 30/90] Don't allow the fuzzer to change
 allow_experimental_analyzer

---
 docker/test/fuzzer/query-fuzzer-tweaks-users.xml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
index 023f257253a4..c31d2fd7f397 100644
--- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
+++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
@@ -26,6 +26,11 @@
                 <table_function_remote_max_addresses>
                     <max>200</max>
                 </table_function_remote_max_addresses>
+
+                <!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead -->
+                <allow_experimental_analyzer>
+                    <readonly/>
+                </allow_experimental_analyzer>
             </constraints>
         </default>
     </profiles>

From f06dca1a5093a605a39b099da42fd5d59b387973 Mon Sep 17 00:00:00 2001
From: Murat Khairulin <murat.khairulin@gmail.com>
Date: Fri, 5 Apr 2024 14:53:32 +0500
Subject: [PATCH 31/90] Fix primary key in materialized view

---
 src/Storages/StorageMaterializedView.cpp      |  6 ++++
 .../03035_materialized_primary_key.reference  |  3 ++
 .../03035_materialized_primary_key.sql        | 28 +++++++++++++++++++
 3 files changed, 37 insertions(+)
 create mode 100644 tests/queries/0_stateless/03035_materialized_primary_key.reference
 create mode 100644 tests/queries/0_stateless/03035_materialized_primary_key.sql

diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 344b5dfce9b7..9e98b9830555 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -91,6 +91,12 @@ StorageMaterializedView::StorageMaterializedView(
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
+    auto storage_def = query.storage;
+    if (storage_def && storage_def->primary_key)
+        storage_metadata.primary_key = KeyDescription::getKeyFromAST(storage_def->primary_key->ptr(),
+                                                                     storage_metadata.columns,
+                                                                     local_context->getGlobalContext());
+
     if (query.sql_security)
         storage_metadata.setSQLSecurity(query.sql_security->as<ASTSQLSecurity &>());
 
diff --git a/tests/queries/0_stateless/03035_materialized_primary_key.reference b/tests/queries/0_stateless/03035_materialized_primary_key.reference
new file mode 100644
index 000000000000..4ee050c1d92c
--- /dev/null
+++ b/tests/queries/0_stateless/03035_materialized_primary_key.reference
@@ -0,0 +1,3 @@
+test	id
+test_mv	
+test_mv_pk	value
diff --git a/tests/queries/0_stateless/03035_materialized_primary_key.sql b/tests/queries/0_stateless/03035_materialized_primary_key.sql
new file mode 100644
index 000000000000..961b61851c3a
--- /dev/null
+++ b/tests/queries/0_stateless/03035_materialized_primary_key.sql
@@ -0,0 +1,28 @@
+DROP TABLE IF EXISTS test;
+CREATE TABLE test
+(
+    id UInt64,
+    value String
+) ENGINE=MergeTree ORDER BY id;
+
+INSERT INTO test VALUES (1, 'Alice'), (2, 'Bob');
+
+DROP VIEW IF EXISTS test_mv;
+CREATE MATERIALIZED VIEW test_mv
+(
+    id UInt64,
+    value String
+) ENGINE=MergeTree
+ORDER BY id AS SELECT id, value FROM test;
+
+DROP VIEW IF EXISTS test_mv_pk;
+CREATE MATERIALIZED VIEW test_mv_pk
+(
+    value String,
+    id UInt64
+) ENGINE=MergeTree PRIMARY KEY value
+POPULATE AS SELECT value, id FROM test;
+
+SELECT name, primary_key
+FROM system.tables
+WHERE name LIKE 'test%';
\ No newline at end of file

From 1938184273e972328ab494b39e3d6a6cf06ab391 Mon Sep 17 00:00:00 2001
From: Murat Khairulin <murat.khairulin@gmail.com>
Date: Fri, 5 Apr 2024 23:56:23 +0500
Subject: [PATCH 32/90] Fix for style check

---
 tests/queries/0_stateless/03035_materialized_primary_key.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03035_materialized_primary_key.sql b/tests/queries/0_stateless/03035_materialized_primary_key.sql
index 961b61851c3a..928aebc340b3 100644
--- a/tests/queries/0_stateless/03035_materialized_primary_key.sql
+++ b/tests/queries/0_stateless/03035_materialized_primary_key.sql
@@ -25,4 +25,4 @@ POPULATE AS SELECT value, id FROM test;
 
 SELECT name, primary_key
 FROM system.tables
-WHERE name LIKE 'test%';
\ No newline at end of file
+WHERE database = currentDatabase() AND name LIKE 'test%';
\ No newline at end of file

From 9783ae2a82bfa68e6e41cd74ef21dd9798365bf0 Mon Sep 17 00:00:00 2001
From: Murat Khairulin <murat.khairulin@gmail.com>
Date: Sun, 7 Apr 2024 22:27:10 +0500
Subject: [PATCH 33/90] Fix style

---
 src/Storages/StorageMaterializedView.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 9e98b9830555..696865dfa2f6 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -91,7 +91,7 @@ StorageMaterializedView::StorageMaterializedView(
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
-    auto storage_def = query.storage;
+    auto * storage_def = query.storage;
     if (storage_def && storage_def->primary_key)
         storage_metadata.primary_key = KeyDescription::getKeyFromAST(storage_def->primary_key->ptr(),
                                                                      storage_metadata.columns,

From dab3f55bdbe622a05eb7eae2b45f1bce159696d9 Mon Sep 17 00:00:00 2001
From: Murat Khairulin <murat.khairulin@gmail.com>
Date: Wed, 10 Apr 2024 01:20:39 +0500
Subject: [PATCH 34/90] Restart ci


From 73db78fe43f5b6097e724a1f91965ba1bded92e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 10 Apr 2024 19:13:03 +0200
Subject: [PATCH 35/90] Add test for #26674

---
 .../03093_analyzer_column_alias.reference     |  1 +
 .../03093_analyzer_column_alias.sql           | 21 +++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 tests/queries/0_stateless/03093_analyzer_column_alias.reference
 create mode 100644 tests/queries/0_stateless/03093_analyzer_column_alias.sql

diff --git a/tests/queries/0_stateless/03093_analyzer_column_alias.reference b/tests/queries/0_stateless/03093_analyzer_column_alias.reference
new file mode 100644
index 000000000000..4d9ef9832ddf
--- /dev/null
+++ b/tests/queries/0_stateless/03093_analyzer_column_alias.reference
@@ -0,0 +1 @@
+1	0	10	9
diff --git a/tests/queries/0_stateless/03093_analyzer_column_alias.sql b/tests/queries/0_stateless/03093_analyzer_column_alias.sql
new file mode 100644
index 000000000000..9ff0f78ba245
--- /dev/null
+++ b/tests/queries/0_stateless/03093_analyzer_column_alias.sql
@@ -0,0 +1,21 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/26674
+SET allow_experimental_analyzer = true;
+
+SELECT
+    Carrier,
+    sum(toFloat64(C3)) AS C1,
+    sum(toFloat64(C1)) AS C2,
+    sum(toFloat64(C2)) AS C3
+FROM
+    (
+        SELECT
+            1 AS Carrier,
+            count(CAST(1, 'Nullable(Int32)')) AS C1,
+            max(number) AS C2,
+            min(number) AS C3
+        FROM numbers(10)
+        GROUP BY Carrier
+    ) AS ITBL
+GROUP BY Carrier
+LIMIT 1000001
+SETTINGS prefer_column_name_to_alias=1;

From 958d36eecbe9f1177ba4bc032cb2b3c5e5ec0c81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 10 Apr 2024 19:31:12 +0200
Subject: [PATCH 36/90] Typo

---
 src/Processors/Formats/Impl/ProtobufListInputFormat.cpp | 4 ++--
 src/Processors/Formats/Impl/ProtobufListInputFormat.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp
index 2382b3cf27ac..c643ae060d62 100644
--- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp
@@ -86,7 +86,7 @@ size_t ProtobufListInputFormat::countRows(size_t max_block_size)
 ProtobufListSchemaReader::ProtobufListSchemaReader(const FormatSettings & format_settings)
     : schema_info(
         format_settings.schema.format_schema, "Protobuf", true, format_settings.schema.is_server, format_settings.schema.format_schema_path)
-    , skip_unsopported_fields(format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference)
+    , skip_unsupported_fields(format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference)
     , google_protos_path(format_settings.protobuf.google_protos_path)
 {
 }
@@ -95,7 +95,7 @@ NamesAndTypesList ProtobufListSchemaReader::readSchema()
 {
     const auto * message_descriptor
         = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::Yes, google_protos_path);
-    return protobufSchemaToCHSchema(message_descriptor, skip_unsopported_fields);
+    return protobufSchemaToCHSchema(message_descriptor, skip_unsupported_fields);
 }
 
 void registerInputFormatProtobufList(FormatFactory & factory)
diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.h b/src/Processors/Formats/Impl/ProtobufListInputFormat.h
index 947696bba820..8305af285063 100644
--- a/src/Processors/Formats/Impl/ProtobufListInputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.h
@@ -56,7 +56,7 @@ class ProtobufListSchemaReader : public IExternalSchemaReader
 
 private:
     const FormatSchemaInfo schema_info;
-    bool skip_unsopported_fields;
+    bool skip_unsupported_fields;
     const String google_protos_path;
 };
 

From d6260e984cb1261d28ed7f2d77031839b4977b5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 10 Apr 2024 19:46:52 +0200
Subject: [PATCH 37/90] Avoid crash when reading protobuf with recursive types

---
 src/Formats/ProtobufSerializer.cpp            | 31 +++++++++++++++++--
 .../03094_recursive_type_proto.reference      |  1 +
 .../0_stateless/03094_recursive_type_proto.sh |  8 +++++
 .../format_schemas/03094_recursive_type.proto | 17 ++++++++++
 4 files changed, 54 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/03094_recursive_type_proto.reference
 create mode 100755 tests/queries/0_stateless/03094_recursive_type_proto.sh
 create mode 100644 tests/queries/0_stateless/format_schemas/03094_recursive_type.proto

diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp
index f2f1d985cc9c..744cea9f4dc4 100644
--- a/src/Formats/ProtobufSerializer.cpp
+++ b/src/Formats/ProtobufSerializer.cpp
@@ -3721,8 +3721,23 @@ namespace
         return std::make_shared<DataTypeEnum<Type>>(std::move(values));
     }
 
-    std::optional<NameAndTypePair> getNameAndDataTypeFromField(const google::protobuf::FieldDescriptor * field_descriptor, bool skip_unsupported_fields, bool allow_repeat = true)
+    std::optional<NameAndTypePair> getNameAndDataTypeFromField(
+        const google::protobuf::FieldDescriptor * field_descriptor, bool skip_unsupported_fields, bool allow_repeat);
+
+    std::optional<NameAndTypePair> getNameAndDataTypeFromFieldRecursive(
+        const google::protobuf::FieldDescriptor * field_descriptor,
+        bool skip_unsupported_fields,
+        bool allow_repeat,
+        std::unordered_set<const google::protobuf::FieldDescriptor *> & pending_resolution)
     {
+        if (pending_resolution.contains(field_descriptor))
+        {
+            if (skip_unsupported_fields)
+                return std::nullopt;
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ClickHouse doesn't support type recursion ({})", field_descriptor->full_name());
+        }
+        pending_resolution.emplace(field_descriptor);
+
         if (allow_repeat && field_descriptor->is_map())
         {
             auto name_and_type = getNameAndDataTypeFromField(field_descriptor, skip_unsupported_fields, false);
@@ -3804,7 +3819,8 @@ namespace
                 else if (message_descriptor->field_count() == 1)
                 {
                     const auto * nested_field_descriptor = message_descriptor->field(0);
-                    auto nested_name_and_type = getNameAndDataTypeFromField(nested_field_descriptor, skip_unsupported_fields);
+                    auto nested_name_and_type
+                        = getNameAndDataTypeFromFieldRecursive(nested_field_descriptor, skip_unsupported_fields, true, pending_resolution);
                     if (!nested_name_and_type)
                         return std::nullopt;
                     return NameAndTypePair{field_descriptor->name() + "_" + nested_name_and_type->name, nested_name_and_type->type};
@@ -3815,7 +3831,8 @@ namespace
                     Strings nested_names;
                     for (int i = 0; i != message_descriptor->field_count(); ++i)
                     {
-                        auto nested_name_and_type = getNameAndDataTypeFromField(message_descriptor->field(i), skip_unsupported_fields);
+                        auto nested_name_and_type = getNameAndDataTypeFromFieldRecursive(
+                            message_descriptor->field(i), skip_unsupported_fields, true, pending_resolution);
                         if (!nested_name_and_type)
                             continue;
                         nested_types.push_back(nested_name_and_type->type);
@@ -3831,6 +3848,14 @@ namespace
 
         UNREACHABLE();
     }
+
+    std::optional<NameAndTypePair> getNameAndDataTypeFromField(
+        const google::protobuf::FieldDescriptor * field_descriptor, bool skip_unsupported_fields, bool allow_repeat = true)
+    {
+        /// Keep track of the fields that are pending resolution to avoid recursive types, which are unsupported
+        std::unordered_set<const google::protobuf::FieldDescriptor *> pending_resolution{};
+        return getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, allow_repeat, pending_resolution);
+    }
 }
 
 std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
diff --git a/tests/queries/0_stateless/03094_recursive_type_proto.reference b/tests/queries/0_stateless/03094_recursive_type_proto.reference
new file mode 100644
index 000000000000..d00491fd7e5b
--- /dev/null
+++ b/tests/queries/0_stateless/03094_recursive_type_proto.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/03094_recursive_type_proto.sh b/tests/queries/0_stateless/03094_recursive_type_proto.sh
new file mode 100755
index 000000000000..6fa374f98d50
--- /dev/null
+++ b/tests/queries/0_stateless/03094_recursive_type_proto.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+SCHEMADIR=$CURDIR/format_schemas
+$CLICKHOUSE_LOCAL -q "DESCRIBE TABLE file('nonexist', 'Protobuf') SETTINGS format_schema='$SCHEMADIR/03094_recursive_type.proto:Struct'" |& grep -c CANNOT_PARSE_PROTOBUF_SCHEMA
diff --git a/tests/queries/0_stateless/format_schemas/03094_recursive_type.proto b/tests/queries/0_stateless/format_schemas/03094_recursive_type.proto
new file mode 100644
index 000000000000..97b2c9480a1f
--- /dev/null
+++ b/tests/queries/0_stateless/format_schemas/03094_recursive_type.proto
@@ -0,0 +1,17 @@
+syntax = "proto3";
+
+message Struct {
+  map<string, Value> fields = 1;
+}
+
+message Value {
+  // The kind of value.
+  oneof kind {
+    string string_value = 1;
+    ListValue list_value = 2;
+  }
+}
+
+message ListValue {
+  repeated Value values = 1;
+}

From dfa7a9704ad1ed34d5893234bc1bf852d3390ca2 Mon Sep 17 00:00:00 2001
From: Joshua Hildred <jthildred@gmail.com>
Date: Wed, 10 Apr 2024 11:47:10 -0700
Subject: [PATCH 38/90] Fix an isssue with constants being wrapped in nullables

---
 .../Passes/LogicalExpressionOptimizerPass.cpp     | 15 +++++++++++++++
 .../0_stateless/03032_redundant_equals.reference  |  2 ++
 .../0_stateless/03032_redundant_equals.sql        |  2 ++
 3 files changed, 19 insertions(+)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 546959c4d9c0..ee0ddf24233e 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -274,7 +274,18 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont
         }
     }
 
+    void leaveImpl(QueryTreeNodePtr & node)
+    {
+        if (!need_rerun_resolve)
+            return;
+
+        if (auto * function_node = node->as<FunctionNode>())
+            rerunFunctionResolve(function_node, getContext());
+    }
+
 private:
+    bool need_rerun_resolve = false;
+
     void tryOptimizeAndEqualsNotEqualsChain(QueryTreeNodePtr & node)
     {
         auto & function_node = node->as<FunctionNode &>();
@@ -615,6 +626,10 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont
 
         if (!child_function || !isBooleanFunction(child_function->getFunctionName()))
             return;
+
+        if (function_node.getResultType()->isNullable() && !child_function->getResultType()->isNullable())
+            need_rerun_resolve = true;
+
         if (maybe_invert)
         {
             auto not_resolver = FunctionFactory::instance().get("not", getContext());
diff --git a/tests/queries/0_stateless/03032_redundant_equals.reference b/tests/queries/0_stateless/03032_redundant_equals.reference
index d477c98b6048..09f4d8e3646a 100644
--- a/tests/queries/0_stateless/03032_redundant_equals.reference
+++ b/tests/queries/0_stateless/03032_redundant_equals.reference
@@ -15,6 +15,8 @@
 100
 101
 100
+100
+101
 1
 1
 1
diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql
index afb9c8878661..427845d9c12e 100644
--- a/tests/queries/0_stateless/03032_redundant_equals.sql
+++ b/tests/queries/0_stateless/03032_redundant_equals.sql
@@ -23,6 +23,8 @@ SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1)))
 SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1;
 SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1;
 SELECT * FROM test_table WHERE ((k not in (99) = 1) AND (k in (100) = 1)) = 1;
+SELECT * FROM test_table WHERE ((k not in (101) = toNullable(0)) OR (k in (100) = toNullable(1))) = toNullable(1);
+
 
 SELECT count()
 FROM

From 70438f7e6e2596f40c0b4a2502e27afc49bc778a Mon Sep 17 00:00:00 2001
From: Joshua Hildred <jthildred@gmail.com>
Date: Wed, 10 Apr 2024 12:11:12 -0700
Subject: [PATCH 39/90] Update tests to set allow_experimental_analyzer

---
 tests/queries/0_stateless/03032_redundant_equals.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql
index 427845d9c12e..ae0b9651e12c 100644
--- a/tests/queries/0_stateless/03032_redundant_equals.sql
+++ b/tests/queries/0_stateless/03032_redundant_equals.sql
@@ -9,6 +9,8 @@ ORDER BY k;
 
 INSERT INTO test_table SELECT number FROM numbers(10000000);
 
+SET allow_experimental_analyzer = 1;
+
 SELECT * FROM test_table WHERE k in (100) = 1;
 SELECT * FROM test_table WHERE k = (100) = 1;
 SELECT * FROM test_table WHERE k not in (100) = 0;

From 4f38bf4f6b6566a7e746d0f2c72967027692a016 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 10 Apr 2024 21:39:02 +0200
Subject: [PATCH 40/90] Revert "Revert "Rich syntax highlighting in the
 client""

---
 src/Client/ClientBase.cpp                     |   8 +-
 src/Client/ClientBaseHelpers.cpp              | 162 ++++++++++--------
 src/Parsers/ASTOrderByElement.cpp             |   1 -
 src/Parsers/CommonParsers.h                   |   2 +
 src/Parsers/ExpressionElementParsers.cpp      |   4 +-
 src/Parsers/ExpressionElementParsers.h        |  13 +-
 src/Parsers/ExpressionListParsers.cpp         |  83 +++++----
 src/Parsers/IParser.cpp                       |  23 +++
 src/Parsers/IParser.h                         |  39 +++++
 src/Parsers/IParserBase.cpp                   |  19 +-
 src/Parsers/ParserInsertQuery.cpp             |   4 +-
 src/Parsers/parseDatabaseAndTableName.cpp     |  15 --
 src/Parsers/parseQuery.cpp                    |  58 ++++---
 src/Parsers/parseQuery.h                      |   5 +
 ..._autocomplete_word_break_characters.expect |   2 +-
 ...01565_query_loop_after_client_error.expect |  19 +-
 .../01676_clickhouse_client_autocomplete.sh   |   2 +-
 .../01702_system_query_log.reference          |  20 +--
 ...160_client_autocomplete_parse_query.expect |   2 +-
 19 files changed, 301 insertions(+), 180 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 4948402bb7fb..f37b391eb664 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -2061,7 +2061,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
         return MultiQueryProcessingStage::QUERIES_END;
 
     // Remove leading empty newlines and other whitespace, because they
-    // are annoying to filter in query log. This is mostly relevant for
+    // are annoying to filter in the query log. This is mostly relevant for
     // the tests.
     while (this_query_begin < all_queries_end && isWhitespaceASCII(*this_query_begin))
         ++this_query_begin;
@@ -2091,7 +2091,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
     {
         parsed_query = parseQuery(this_query_end, all_queries_end, true);
     }
-    catch (Exception & e)
+    catch (const Exception & e)
     {
         current_exception.reset(e.clone());
         return MultiQueryProcessingStage::PARSING_EXCEPTION;
@@ -2116,9 +2116,9 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
     // INSERT queries may have the inserted data in the query text
     // that follow the query itself, e.g. "insert into t format CSV 1;2".
     // They need special handling. First of all, here we find where the
-    // inserted data ends. In multy-query mode, it is delimited by a
+    // inserted data ends. In multi-query mode, it is delimited by a
     // newline.
-    // The VALUES format needs even more handling -- we also allow the
+    // The VALUES format needs even more handling - we also allow the
     // data to be delimited by semicolon. This case is handled later by
     // the format parser itself.
     // We can't do multiline INSERTs with inline data, because most
diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp
index b08626962957..b1d29b34ffc6 100644
--- a/src/Client/ClientBaseHelpers.cpp
+++ b/src/Client/ClientBaseHelpers.cpp
@@ -1,11 +1,14 @@
 #include "ClientBaseHelpers.h"
 
-
 #include <Common/DateLUT.h>
 #include <Common/LocalDate.h>
-#include <Parsers/Lexer.h>
+#include <Parsers/ParserQuery.h>
+#include <Parsers/parseQuery.h>
 #include <Common/UTF8Helpers.h>
 
+#include <iostream>
+
+
 namespace DB
 {
 
@@ -96,77 +99,102 @@ void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors
 {
     using namespace replxx;
 
-    static const std::unordered_map<TokenType, Replxx::Color> token_to_color
-        = {{TokenType::Whitespace, Replxx::Color::DEFAULT},
-            {TokenType::Comment, Replxx::Color::GRAY},
-            {TokenType::BareWord, Replxx::Color::DEFAULT},
-            {TokenType::Number, Replxx::Color::GREEN},
-            {TokenType::StringLiteral, Replxx::Color::CYAN},
-            {TokenType::QuotedIdentifier, Replxx::Color::MAGENTA},
-            {TokenType::OpeningRoundBracket, Replxx::Color::BROWN},
-            {TokenType::ClosingRoundBracket, Replxx::Color::BROWN},
-            {TokenType::OpeningSquareBracket, Replxx::Color::BROWN},
-            {TokenType::ClosingSquareBracket, Replxx::Color::BROWN},
-            {TokenType::DoubleColon, Replxx::Color::BROWN},
-            {TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)},
-
-            {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::VerticalDelimiter, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::HereDoc, Replxx::Color::CYAN},
-            {TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Spaceship, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)},
-            {TokenType::DoubleAt, Replxx::Color::MAGENTA},
-
-            {TokenType::EndOfStream, Replxx::Color::DEFAULT},
-
-            {TokenType::Error, Replxx::Color::RED},
-            {TokenType::ErrorMultilineCommentIsNotClosed, Replxx::Color::RED},
-            {TokenType::ErrorSingleQuoteIsNotClosed, Replxx::Color::RED},
-            {TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED},
-            {TokenType::ErrorSinglePipeMark, Replxx::Color::RED},
-            {TokenType::ErrorWrongNumber, Replxx::Color::RED},
-            {TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}};
-
-    const Replxx::Color unknown_token_color = Replxx::Color::RED;
-
-    Lexer lexer(query.data(), query.data() + query.size());
-    size_t pos = 0;
+    /// The `colors` array maps to a Unicode code point position in a string into a color.
+    /// A color is set for every position individually (not for a range).
 
-    for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken())
+    /// Empty input.
+    if (colors.empty())
+        return;
+
+    /// The colors should be legible (and look gorgeous) in both dark and light themes.
+    /// When modifying this, check it in both themes.
+
+    static const std::unordered_map<Highlight, Replxx::Color> type_to_color =
     {
-        if (token.type == TokenType::Semicolon || token.type == TokenType::VerticalDelimiter)
-            ReplxxLineReader::setLastIsDelimiter(true);
-        else if (token.type != TokenType::Whitespace)
-            ReplxxLineReader::setLastIsDelimiter(false);
+        {Highlight::keyword, replxx::color::bold(Replxx::Color::DEFAULT)},
+        {Highlight::identifier, Replxx::Color::CYAN},
+        {Highlight::function, Replxx::Color::BROWN},
+        {Highlight::alias, replxx::color::rgb666(0, 4, 4)},
+        {Highlight::substitution, Replxx::Color::MAGENTA},
+        {Highlight::number, replxx::color::rgb666(0, 4, 0)},
+        {Highlight::string, Replxx::Color::GREEN},
+    };
+
+    /// We set reasonably small limits for size/depth, because we don't want the CLI to be slow.
+    /// While syntax highlighting is unneeded for long queries, which the user couldn't read anyway.
 
-        size_t utf8_len = UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(token.begin), token.size());
-        for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index)
+    const char * begin = query.data();
+    const char * end = begin + query.size();
+    Tokens tokens(begin, end, 1000, true);
+    IParser::Pos token_iterator(tokens, static_cast<uint32_t>(1000), static_cast<uint32_t>(10000));
+    Expected expected;
+
+    /// We don't do highlighting for foreign dialects, such as PRQL and Kusto.
+    /// Only normal ClickHouse SQL queries are highlighted.
+
+    /// Currently we highlight only the first query in the multi-query mode.
+
+    ParserQuery parser(end);
+    ASTPtr ast;
+    bool parse_res = false;
+
+    try
+    {
+        parse_res = parser.parse(token_iterator, ast, expected);
+    }
+    catch (...)
+    {
+        /// Skip highlighting in the case of exceptions during parsing.
+        /// It is ok to ignore unknown exceptions here.
+        return;
+    }
+
+    size_t pos = 0;
+    const char * prev = begin;
+    for (const auto & range : expected.highlights)
+    {
+        auto it = type_to_color.find(range.highlight);
+        if (it != type_to_color.end())
         {
-            if (token_to_color.find(token.type) != token_to_color.end())
-                colors[pos + code_point_index] = token_to_color.at(token.type);
-            else
-                colors[pos + code_point_index] = unknown_token_color;
+            /// We have to map from byte positions to Unicode positions.
+            pos += UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(prev), range.begin - prev);
+            size_t utf8_len = UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(range.begin), range.end - range.begin);
+
+            for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index)
+                colors[pos + code_point_index] = it->second;
+
+            pos += utf8_len;
+            prev = range.end;
         }
+    }
 
-        pos += utf8_len;
+    Token last_token = token_iterator.max();
+    /// Raw data in INSERT queries, which is not necessarily tokenized.
+    const char * insert_data = ast ? getInsertData(ast) : nullptr;
+
+    /// Highlight the last error in red. If the parser failed or the lexer found an invalid token,
+    /// or if it didn't parse all the data (except, the data for INSERT query, which is legitimately unparsed)
+    if ((!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon))
+        && !(insert_data && expected.max_parsed_pos >= insert_data)
+        && expected.max_parsed_pos >= prev)
+    {
+        pos += UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(prev), expected.max_parsed_pos - prev);
+
+        if (pos >= colors.size())
+            pos = colors.size() - 1;
+
+        colors[pos] = Replxx::Color::BRIGHTRED;
+    }
+
+    /// This is a callback for the client/local app to better find query end. Note: this is a kludge, remove it.
+    if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter
+        || query.ends_with(';') || query.ends_with("\\G"))  /// This is for raw data in INSERT queries, which is not necessarily tokenized.
+    {
+        ReplxxLineReader::setLastIsDelimiter(true);
+    }
+    else if (last_token.type != TokenType::Whitespace)
+    {
+        ReplxxLineReader::setLastIsDelimiter(false);
     }
 }
 #endif
diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp
index be0416359a18..09193a8b5e16 100644
--- a/src/Parsers/ASTOrderByElement.cpp
+++ b/src/Parsers/ASTOrderByElement.cpp
@@ -1,4 +1,3 @@
-#include <Columns/Collator.h>
 #include <Parsers/ASTOrderByElement.h>
 #include <Common/SipHash.h>
 #include <IO/Operators.h>
diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h
index 49964b5c7281..2277e348b0f2 100644
--- a/src/Parsers/CommonParsers.h
+++ b/src/Parsers/CommonParsers.h
@@ -601,6 +601,8 @@ class ParserKeyword : public IParserBase
 
     constexpr const char * getName() const override { return s.data(); }
 
+    Highlight highlight() const override { return Highlight::keyword; }
+
 protected:
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 };
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 2c8ab65d1fc6..dce0bc62d5b5 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -278,7 +278,7 @@ bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, E
 bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTPtr id_list;
-    if (!ParserList(std::make_unique<ParserIdentifier>(allow_query_parameter), std::make_unique<ParserToken>(TokenType::Dot), false)
+    if (!ParserList(std::make_unique<ParserIdentifier>(allow_query_parameter, highlight_type), std::make_unique<ParserToken>(TokenType::Dot), false)
              .parse(pos, id_list, expected))
         return false;
 
@@ -1491,7 +1491,7 @@ const char * ParserAlias::restricted_keywords[] =
 bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ParserKeyword s_as(Keyword::AS);
-    ParserIdentifier id_p;
+    ParserIdentifier id_p(false, Highlight::alias);
 
     bool has_as_word = s_as.ignore(pos, expected);
     if (!allow_alias_without_as_keyword && !has_as_word)
diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h
index b29f5cc42510..6dbb75450edd 100644
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@@ -25,12 +25,15 @@ class ParserSubquery : public IParserBase
 class ParserIdentifier : public IParserBase
 {
 public:
-    explicit ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {}
+    explicit ParserIdentifier(bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier)
+        : allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) {}
+    Highlight highlight() const override { return highlight_type; }
 
 protected:
     const char * getName() const override { return "identifier"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
     bool allow_query_parameter;
+    Highlight highlight_type;
 };
 
 
@@ -53,8 +56,8 @@ class ParserTableAsStringLiteralIdentifier : public IParserBase
 class ParserCompoundIdentifier : public IParserBase
 {
 public:
-    explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false)
-        : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_)
+    explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier)
+        : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_)
     {
     }
 
@@ -63,6 +66,7 @@ class ParserCompoundIdentifier : public IParserBase
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
     bool table_name_with_optional_uuid;
     bool allow_query_parameter;
+    Highlight highlight_type;
 };
 
 /** *, t.*, db.table.*, COLUMNS('<regular expression>') APPLY(...) or EXCEPT(...) or REPLACE(...)
@@ -253,6 +257,7 @@ class ParserNumber : public IParserBase
 protected:
     const char * getName() const override { return "number"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+    Highlight highlight() const override { return Highlight::number; }
 };
 
 /** Unsigned integer, used in right hand side of tuple access operator (x.1).
@@ -273,6 +278,7 @@ class ParserStringLiteral : public IParserBase
 protected:
     const char * getName() const override { return "string literal"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+    Highlight highlight() const override { return Highlight::string; }
 };
 
 
@@ -385,6 +391,7 @@ class ParserSubstitution : public IParserBase
 protected:
     const char * getName() const override { return "substitution"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+    Highlight highlight() const override { return Highlight::substitution; }
 };
 
 
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 05691529f430..276b4e820742 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -441,6 +441,21 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
     return parser.parse(pos, node, expected);
 }
 
+namespace
+{
+    /// This wrapper is needed to highlight function names differently.
+    class ParserFunctionName : public IParserBase
+    {
+    protected:
+        const char * getName() const override { return "function name"; }
+        bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
+        {
+            ParserCompoundIdentifier parser(false, true, Highlight::function);
+            return parser.parse(pos, node, expected);
+        }
+    };
+}
+
 
 enum class Action
 {
@@ -809,6 +824,7 @@ struct ParserExpressionImpl
 
     static const Operator finish_between_operator;
 
+    ParserFunctionName function_name_parser;
     ParserCompoundIdentifier identifier_parser{false, true};
     ParserNumber number_parser;
     ParserAsterisk asterisk_parser;
@@ -2359,7 +2375,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTPtr identifier;
 
-    if (ParserCompoundIdentifier(false,true).parse(pos, identifier, expected)
+    if (ParserFunctionName().parse(pos, identifier, expected)
         && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected))
     {
         auto start = getFunctionLayer(identifier, is_table_function, allow_function_parameters);
@@ -2497,7 +2513,7 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos
     {
         if (typeid_cast<ViewLayer *>(layers.back().get()) || typeid_cast<KustoLayer *>(layers.back().get()))
         {
-            if (identifier_parser.parse(pos, tmp, expected)
+            if (function_name_parser.parse(pos, tmp, expected)
                 && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected))
             {
                 layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function));
@@ -2629,49 +2645,52 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos
     {
         layers.back()->pushOperand(std::move(tmp));
     }
-    else if (identifier_parser.parse(pos, tmp, expected))
+    else
     {
-        if (pos->type == TokenType::OpeningRoundBracket)
+        old_pos = pos;
+        if (function_name_parser.parse(pos, tmp, expected) && pos->type == TokenType::OpeningRoundBracket)
         {
             ++pos;
             layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function));
             return Action::OPERAND;
         }
-        else
+        pos = old_pos;
+
+        if (identifier_parser.parse(pos, tmp, expected))
         {
             layers.back()->pushOperand(std::move(tmp));
         }
-    }
-    else if (substitution_parser.parse(pos, tmp, expected))
-    {
-        layers.back()->pushOperand(std::move(tmp));
-    }
-    else if (pos->type == TokenType::OpeningRoundBracket)
-    {
-
-        if (subquery_parser.parse(pos, tmp, expected))
+        else if (substitution_parser.parse(pos, tmp, expected))
         {
             layers.back()->pushOperand(std::move(tmp));
-            return Action::OPERATOR;
         }
+        else if (pos->type == TokenType::OpeningRoundBracket)
+        {
 
-        ++pos;
-        layers.push_back(std::make_unique<RoundBracketsLayer>());
-        return Action::OPERAND;
-    }
-    else if (pos->type == TokenType::OpeningSquareBracket)
-    {
-        ++pos;
-        layers.push_back(std::make_unique<ArrayLayer>());
-        return Action::OPERAND;
-    }
-    else if (mysql_global_variable_parser.parse(pos, tmp, expected))
-    {
-        layers.back()->pushOperand(std::move(tmp));
-    }
-    else
-    {
-        return Action::NONE;
+            if (subquery_parser.parse(pos, tmp, expected))
+            {
+                layers.back()->pushOperand(std::move(tmp));
+                return Action::OPERATOR;
+            }
+
+            ++pos;
+            layers.push_back(std::make_unique<RoundBracketsLayer>());
+            return Action::OPERAND;
+        }
+        else if (pos->type == TokenType::OpeningSquareBracket)
+        {
+            ++pos;
+            layers.push_back(std::make_unique<ArrayLayer>());
+            return Action::OPERAND;
+        }
+        else if (mysql_global_variable_parser.parse(pos, tmp, expected))
+        {
+            layers.back()->pushOperand(std::move(tmp));
+        }
+        else
+        {
+            return Action::NONE;
+        }
     }
 
     return Action::OPERATOR;
diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp
index 41981a4bb8aa..eb4ddfa01d24 100644
--- a/src/Parsers/IParser.cpp
+++ b/src/Parsers/IParser.cpp
@@ -9,6 +9,7 @@ namespace ErrorCodes
     extern const int TOO_SLOW_PARSING;
 }
 
+
 IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs)
 {
     depth = rhs.depth;
@@ -32,4 +33,26 @@ IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs)
     return *this;
 }
 
+
+template <typename T>
+static bool intersects(T a_begin, T a_end, T b_begin, T b_end)
+{
+    return (a_begin <= b_begin && b_begin < a_end)
+        || (b_begin <= a_begin && a_begin < b_end);
+}
+
+
+void Expected::highlight(HighlightedRange range)
+{
+    auto it = highlights.lower_bound(range);
+    while (it != highlights.end() && range.begin < it->end)
+    {
+        if (intersects(range.begin, range.end, it->begin, it->end))
+            it = highlights.erase(it);
+        else
+            ++it;
+    }
+    highlights.insert(range);
+}
+
 }
diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h
index 291f8ee7d44a..f8146c0a4f6d 100644
--- a/src/Parsers/IParser.h
+++ b/src/Parsers/IParser.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <absl/container/inlined_vector.h>
+#include <set>
 #include <algorithm>
 #include <memory>
 
@@ -21,14 +22,42 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+enum class Highlight
+{
+    none = 0,
+    keyword,
+    identifier,
+    function,
+    alias,
+    substitution,
+    number,
+    string,
+};
+
+struct HighlightedRange
+{
+    const char * begin;
+    const char * end;
+    Highlight highlight;
+
+    auto operator<=>(const HighlightedRange & other) const
+    {
+        return begin <=> other.begin;
+    }
+};
+
 
 /** Collects variants, how parser could proceed further at rightmost position.
+  * Also collects a mapping of parsed ranges for highlighting,
+  * which is accumulated through the parsing.
   */
 struct Expected
 {
     absl::InlinedVector<const char *, 7> variants;
     const char * max_parsed_pos = nullptr;
 
+    std::set<HighlightedRange> highlights;
+
     /// 'description' should be statically allocated string.
     ALWAYS_INLINE void add(const char * current_pos, const char * description)
     {
@@ -48,6 +77,8 @@ struct Expected
     {
         add(it->begin, description);
     }
+
+    void highlight(HighlightedRange range);
 };
 
 
@@ -158,6 +189,14 @@ class IParser
         return parse(pos, node, expected);
     }
 
+    /** If the parsed fragment should be highlighted in the query editor,
+      * which type of highlighting to use?
+      */
+    virtual Highlight highlight() const
+    {
+        return Highlight::none;
+    }
+
     virtual ~IParser() = default;
 };
 
diff --git a/src/Parsers/IParserBase.cpp b/src/Parsers/IParserBase.cpp
index 0241250926dc..9d39056a8f16 100644
--- a/src/Parsers/IParserBase.cpp
+++ b/src/Parsers/IParserBase.cpp
@@ -10,8 +10,25 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected)
 
     return wrapParseImpl(pos, IncreaseDepthTag{}, [&]
     {
+        const char * begin = pos->begin;
         bool res = parseImpl(pos, node, expected);
-        if (!res)
+        if (res)
+        {
+            Highlight type = highlight();
+            if (pos->begin > begin && type != Highlight::none)
+            {
+                Pos prev_token = pos;
+                --prev_token;
+
+                HighlightedRange range;
+                range.begin = begin;
+                range.end = prev_token->end;
+                range.highlight = type;
+
+                expected.highlight(range);
+            }
+        }
+        else
             node = nullptr;
         return res;
     });
diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp
index 9373e6a1c936..0bbb181b39c6 100644
--- a/src/Parsers/ParserInsertQuery.cpp
+++ b/src/Parsers/ParserInsertQuery.cpp
@@ -40,7 +40,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword s_with(Keyword::WITH);
     ParserToken s_lparen(TokenType::OpeningRoundBracket);
     ParserToken s_rparen(TokenType::ClosingRoundBracket);
-    ParserToken s_semicolon(TokenType::Semicolon);
     ParserIdentifier name_p(true);
     ParserList columns_p(std::make_unique<ParserInsertElement>(), std::make_unique<ParserToken>(TokenType::Comma), false);
     ParserFunction table_function_p{false};
@@ -147,8 +146,9 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     {
         /// If VALUES is defined in query, everything except setting will be parsed as data,
         /// and if values followed by semicolon, the data should be null.
-        if (!s_semicolon.checkWithoutMoving(pos, expected))
+        if (pos->type != TokenType::Semicolon)
             data = pos->begin;
+
         format_str = "Values";
     }
     else if (s_format.ignore(pos, expected))
diff --git a/src/Parsers/parseDatabaseAndTableName.cpp b/src/Parsers/parseDatabaseAndTableName.cpp
index 81660bc46008..eaf020e445bf 100644
--- a/src/Parsers/parseDatabaseAndTableName.cpp
+++ b/src/Parsers/parseDatabaseAndTableName.cpp
@@ -60,21 +60,6 @@ bool parseDatabaseAndTableAsAST(IParser::Pos & pos, Expected & expected, ASTPtr
 }
 
 
-bool parseDatabase(IParser::Pos & pos, Expected & expected, String & database_str)
-{
-    ParserToken s_dot(TokenType::Dot);
-    ParserIdentifier identifier_parser;
-
-    ASTPtr database;
-    database_str = "";
-
-    if (!identifier_parser.parse(pos, database, expected))
-        return false;
-
-    tryGetIdentifierNameInto(database, database_str);
-    return true;
-}
-
 bool parseDatabaseAsAST(IParser::Pos & pos, Expected & expected, ASTPtr & database)
 {
     ParserIdentifier identifier_parser(/* allow_query_parameter */true);
diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp
index 51878efa7067..2a6abc234065 100644
--- a/src/Parsers/parseQuery.cpp
+++ b/src/Parsers/parseQuery.cpp
@@ -226,6 +226,32 @@ std::string getUnmatchedParenthesesErrorMessage(
 }
 
 
+static ASTInsertQuery * getInsertAST(const ASTPtr & ast)
+{
+    /// Either it is INSERT or EXPLAIN INSERT.
+    if (auto * explain = ast->as<ASTExplainQuery>())
+    {
+        if (auto explained_query = explain->getExplainedQuery())
+        {
+            return explained_query->as<ASTInsertQuery>();
+        }
+    }
+    else
+    {
+        return ast->as<ASTInsertQuery>();
+    }
+
+    return nullptr;
+}
+
+const char * getInsertData(const ASTPtr & ast)
+{
+    if (const ASTInsertQuery * insert = getInsertAST(ast))
+        return insert->data;
+    return nullptr;
+}
+
+
 ASTPtr tryParseQuery(
     IParser & parser,
     const char * & _out_query_end, /* also query begin as input parameter */
@@ -270,29 +296,11 @@ ASTPtr tryParseQuery(
     if (res && max_parser_depth)
         res->checkDepth(max_parser_depth);
 
-    ASTInsertQuery * insert = nullptr;
-    if (parse_res)
-    {
-        if (auto * explain = res->as<ASTExplainQuery>())
-        {
-            if (auto explained_query = explain->getExplainedQuery())
-            {
-                insert = explained_query->as<ASTInsertQuery>();
-            }
-        }
-        else
-        {
-            insert = res->as<ASTInsertQuery>();
-        }
-    }
-
-    // If parsed query ends at data for insertion. Data for insertion could be
-    // in any format and not necessary be lexical correct, so we can't perform
-    // most of the checks.
-    if (insert && insert->data)
-    {
+    /// If parsed query ends at data for insertion. Data for insertion could be
+    /// in any format and not necessary be lexical correct, so we can't perform
+    /// most of the checks.
+    if (res && getInsertData(res))
         return res;
-    }
 
     // More granular checks for queries other than INSERT w/inline data.
     /// Lexical error
@@ -434,11 +442,9 @@ std::pair<const char *, bool> splitMultipartQuery(
 
         ast = parseQueryAndMovePosition(parser, pos, end, "", true, max_query_size, max_parser_depth, max_parser_backtracks);
 
-        auto * insert = ast->as<ASTInsertQuery>();
-
-        if (insert && insert->data)
+        if (ASTInsertQuery * insert = getInsertAST(ast))
         {
-            /// Data for INSERT is broken on new line
+            /// Data for INSERT is broken on the new line
             pos = insert->data;
             while (*pos && *pos != '\n')
                 ++pos;
diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h
index 93c1a4652671..564415d0b85c 100644
--- a/src/Parsers/parseQuery.h
+++ b/src/Parsers/parseQuery.h
@@ -71,4 +71,9 @@ std::pair<const char *, bool> splitMultipartQuery(
     size_t max_parser_backtracks,
     bool allow_settings_after_format_in_insert);
 
+/** If the query contains raw data part, such as INSERT ... FORMAT ..., return a pointer to it.
+  * The SQL parser stops at the raw data part, which is parsed by a separate parser.
+  */
+const char * getInsertData(const ASTPtr & ast);
+
 }
diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect
index 44f3ba9681a9..ffd3e742cec8 100755
--- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect
+++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect
@@ -20,7 +20,7 @@ expect_after {
     -i $any_spawn_id timeout { exit 1 }
 }
 
-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0"
 expect ":) "
 
 # Make a query
diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
index ac69c18ce392..6253840c63cf 100755
--- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
+++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect
@@ -24,30 +24,21 @@ expect_after {
     -i $any_spawn_id timeout { exit 1 }
 }
 
-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file --highlight 0"
 expect "\n:) "
 
-send -- "DROP TABLE IF EXISTS t01565;\n"
-# NOTE: this is important for -mn mode, you should send "\r" only after reading echoed command
-expect "\r\n"
-send -- "\r"
+send -- "DROP TABLE IF EXISTS t01565;\r"
 expect "\nOk."
 expect "\n:)"
 
-send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\n"
-expect "\r\n"
-send -- "\r"
+send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\r"
 expect "\nOk."
 expect "\n:) "
 
-send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\n"
-expect "\r\n"
-send -- "\r"
+send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\r"
 expect "\n:) "
 
-send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\n"
-expect "\r\n"
-send -- "\r"
+send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\r"
 expect "\nOk."
 expect "\n:) "
 
diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
index ebd6490077e4..f04ffdae229f 100755
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh
@@ -43,7 +43,7 @@ expect_after {
     -i \$any_spawn_id timeout { exit 1 }
 }
 
-spawn bash -c "$*"
+spawn bash -c "$* --highlight 0"
 expect ":) "
 
 # Make a query
diff --git a/tests/queries/0_stateless/01702_system_query_log.reference b/tests/queries/0_stateless/01702_system_query_log.reference
index c653021aa5ae..5498b5377ba5 100644
--- a/tests/queries/0_stateless/01702_system_query_log.reference
+++ b/tests/queries/0_stateless/01702_system_query_log.reference
@@ -43,16 +43,16 @@ Alter	ALTER TABLE sqllt.table UPDATE i = i + 1 WHERE 1;
 Alter	ALTER TABLE sqllt.table DELETE WHERE i > 65535;
 Select	-- not done, seems to hard, so I\'ve skipped queries of ALTER-X, where X is:\n-- PARTITION\n-- ORDER BY\n-- SAMPLE BY\n-- INDEX\n-- CONSTRAINT\n-- TTL\n-- USER\n-- QUOTA\n-- ROLE\n-- ROW POLICY\n-- SETTINGS PROFILE\n\nSELECT \'SYSTEM queries\';
 System	SYSTEM FLUSH LOGS;
-System	SYSTEM STOP MERGES sqllt.table
-System	SYSTEM START MERGES sqllt.table
-System	SYSTEM STOP TTL MERGES sqllt.table
-System	SYSTEM START TTL MERGES sqllt.table
-System	SYSTEM STOP MOVES sqllt.table
-System	SYSTEM START MOVES sqllt.table
-System	SYSTEM STOP FETCHES sqllt.table
-System	SYSTEM START FETCHES sqllt.table
-System	SYSTEM STOP REPLICATED SENDS sqllt.table
-System	SYSTEM START REPLICATED SENDS sqllt.table
+System	SYSTEM STOP MERGES sqllt.table;
+System	SYSTEM START MERGES sqllt.table;
+System	SYSTEM STOP TTL MERGES sqllt.table;
+System	SYSTEM START TTL MERGES sqllt.table;
+System	SYSTEM STOP MOVES sqllt.table;
+System	SYSTEM START MOVES sqllt.table;
+System	SYSTEM STOP FETCHES sqllt.table;
+System	SYSTEM START FETCHES sqllt.table;
+System	SYSTEM STOP REPLICATED SENDS sqllt.table;
+System	SYSTEM START REPLICATED SENDS sqllt.table;
 Select	-- SYSTEM RELOAD DICTIONARY sqllt.dictionary; -- temporary out of order: Code: 210, Connection refused (localhost:9001) (version 21.3.1.1)\n-- DROP REPLICA\n-- haha, no\n-- SYSTEM KILL;\n-- SYSTEM SHUTDOWN;\n\n-- Since we don\'t really care about the actual output, suppress it with `FORMAT Null`.\nSELECT \'SHOW queries\';
 Show	SHOW CREATE TABLE sqllt.table FORMAT Null;
 Show	SHOW CREATE DICTIONARY sqllt.dictionary FORMAT Null;
diff --git a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect
index 2d404b005c71..30d725e6a2a2 100755
--- a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect
+++ b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect
@@ -21,7 +21,7 @@ expect_after {
     -i $any_spawn_id timeout { exit 1 }
 }
 
-spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file"
+spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0"
 expect ":) "
 
 # Make a query

From b9a08caa46e10e5e812615b754f6f3d0d3b7bb47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 10 Apr 2024 22:02:44 +0200
Subject: [PATCH 41/90] No fast test (no protobuf)

---
 tests/queries/0_stateless/03094_recursive_type_proto.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/03094_recursive_type_proto.sh b/tests/queries/0_stateless/03094_recursive_type_proto.sh
index 6fa374f98d50..98a1b54ff9e0 100755
--- a/tests/queries/0_stateless/03094_recursive_type_proto.sh
+++ b/tests/queries/0_stateless/03094_recursive_type_proto.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Tags: no-fasttest
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From e1646165c8e4c1b5c52d924a2b8e7b5c5d2e1b09 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 10 Apr 2024 23:08:25 +0200
Subject: [PATCH 42/90] fix backups

---
 src/Backups/BackupFileInfo.cpp | 53 +++++++------------------------
 src/Backups/BackupsWorker.cpp  | 58 ++++++++--------------------------
 src/Common/ThreadPool.cpp      |  3 --
 3 files changed, 25 insertions(+), 89 deletions(-)

diff --git a/src/Backups/BackupFileInfo.cpp b/src/Backups/BackupFileInfo.cpp
index f14b955149e3..84b6d67f5033 100644
--- a/src/Backups/BackupFileInfo.cpp
+++ b/src/Backups/BackupFileInfo.cpp
@@ -210,48 +210,25 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr
     BackupFileInfos infos;
     infos.resize(backup_entries.size());
 
-    size_t num_active_jobs = 0;
-    std::mutex mutex;
-    std::condition_variable event;
-    std::exception_ptr exception;
+    std::atomic_bool failed = false;
 
-    auto thread_group = CurrentThread::getGroup();
     LoggerPtr log = getLogger("FileInfosFromBackupEntries");
 
+    ThreadPoolCallbackRunnerLocal<void> runner(thread_pool, "BackupWorker");
     for (size_t i = 0; i != backup_entries.size(); ++i)
     {
-        {
-            std::lock_guard lock{mutex};
-            if (exception)
-                break;
-            ++num_active_jobs;
-        }
+        if (failed)
+            break;
 
-        auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &read_settings, &base_backup, &thread_group, &process_list_element, i, log]()
+        runner([&infos, &backup_entries, &read_settings, &base_backup, &process_list_element, i, log, &failed]()
         {
-            SCOPE_EXIT_SAFE({
-                std::lock_guard lock{mutex};
-                if (!--num_active_jobs)
-                    event.notify_all();
-                CurrentThread::detachFromGroupIfNotDetached();
-            });
-
+            if (failed)
+                return;
             try
             {
                 const auto & name = backup_entries[i].first;
                 const auto & entry = backup_entries[i].second;
 
-                if (thread_group)
-                    CurrentThread::attachToGroup(thread_group);
-
-                setThreadName("BackupWorker");
-
-                {
-                    std::lock_guard lock{mutex};
-                    if (exception)
-                        return;
-                }
-
                 if (process_list_element)
                     process_list_element->checkTimeLimit();
 
@@ -259,21 +236,13 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr
             }
             catch (...)
             {
-                std::lock_guard lock{mutex};
-                if (!exception)
-                    exception = std::current_exception();
+                failed = true;
+                throw;
             }
-        };
-
-        thread_pool.scheduleOrThrowOnError(job);
+        });
     }
 
-    {
-        std::unique_lock lock{mutex};
-        event.wait(lock, [&] { return !num_active_jobs; });
-        if (exception)
-            std::rethrow_exception(exception);
-    }
+    runner.waitForAllToFinishAndRethrowFirstError();
 
     return infos;
 }
diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp
index 9a3e1052e0bf..c81f08288536 100644
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@@ -705,51 +705,27 @@ void BackupsWorker::writeBackupEntries(
             backup_entries.size());
     }
 
-    size_t num_active_jobs = 0;
-    std::mutex mutex;
-    std::condition_variable event;
-    std::exception_ptr exception;
+
+    std::atomic_bool failed = false;
 
     bool always_single_threaded = !backup->supportsWritingInMultipleThreads();
     auto & thread_pool = getThreadPool(ThreadPoolId::BACKUP_COPY_FILES);
-    auto thread_group = CurrentThread::getGroup();
 
+    ThreadPoolCallbackRunnerLocal<void> runner(thread_pool, "BackupWorker");
     for (size_t i = 0; i != backup_entries.size(); ++i)
     {
+        if (failed)
+            break;
+
         auto & entry = backup_entries[i].second;
         const auto & file_info = file_infos[i];
 
+        auto job = [&]()
         {
-            std::unique_lock lock{mutex};
-            if (exception)
-                break;
-            ++num_active_jobs;
-        }
-
-        auto job = [&](bool async)
-        {
-            SCOPE_EXIT_SAFE(
-                std::lock_guard lock{mutex};
-                if (!--num_active_jobs)
-                    event.notify_all();
-                if (async)
-                    CurrentThread::detachFromGroupIfNotDetached();
-            );
-
+            if (failed)
+                return;
             try
             {
-                if (async && thread_group)
-                    CurrentThread::attachToGroup(thread_group);
-
-                if (async)
-                    setThreadName("BackupWorker");
-
-                {
-                    std::lock_guard lock{mutex};
-                    if (exception)
-                        return;
-                }
-
                 if (process_list_element)
                     process_list_element->checkTimeLimit();
 
@@ -772,27 +748,21 @@ void BackupsWorker::writeBackupEntries(
             }
             catch (...)
             {
-                std::lock_guard lock{mutex};
-                if (!exception)
-                    exception = std::current_exception();
+                failed = true;
+                throw;
             }
         };
 
         if (always_single_threaded)
         {
-            job(false);
+            job();
             continue;
         }
 
-        thread_pool.scheduleOrThrowOnError([job] { job(true); });
+        runner(std::move(job));
     }
 
-    {
-        std::unique_lock lock{mutex};
-        event.wait(lock, [&] { return !num_active_jobs; });
-        if (exception)
-            std::rethrow_exception(exception);
-    }
+    runner.waitForAllToFinishAndRethrowFirstError();
 }
 
 
diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp
index 9bea5ab4d5e6..b9029d9287df 100644
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@@ -585,9 +585,6 @@ thread_local bool CannotAllocateThreadFaultInjector::block_fault_injections = fa
 scope_guard CannotAllocateThreadFaultInjector::blockFaultInjections()
 {
     auto & ins = instance();
-    if (!ins.enabled.load(std::memory_order_relaxed))
-        return {};
-
     ins.block_fault_injections = true;
     return [&ins](){ ins.block_fault_injections = false; };
 }

From 8e26c4460b9fc8f4f5913ff2e1480330a02eec14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 10 Apr 2024 23:39:47 +0200
Subject: [PATCH 43/90] Make transform always return the first match

---
 docs/en/sql-reference/functions/other-functions.md  |  2 +-
 src/Common/HashTable/HashMap.h                      | 13 +++++++++++++
 src/Functions/transform.cpp                         |  7 +++----
 .../03094_transform_return_first.reference          |  4 ++++
 .../0_stateless/03094_transform_return_first.sql    |  7 +++++++
 5 files changed, 28 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/03094_transform_return_first.reference
 create mode 100644 tests/queries/0_stateless/03094_transform_return_first.sql

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 187f248e92df..26351301a3bc 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -675,7 +675,7 @@ There are two variations of this function:
 
 Signature:
 
-For `x` equal to one of the elements in `array_from`, the function returns the corresponding element in `array_to`, i.e. the one at the same array index. Otherwise, it returns `default`. If multiple matching elements exist `array_from`, an arbitrary corresponding element from `array_to` is returned.
+For `x` equal to one of the elements in `array_from`, the function returns the corresponding element in `array_to`, i.e. the one at the same array index. Otherwise, it returns `default`. If multiple matching elements exist `array_from`, it returns the element corresponding to the first of them.
 
 `transform(T, Array(T), Array(U), U) -> U`
 
diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h
index dc601bf13198..f104fea72cbb 100644
--- a/src/Common/HashTable/HashMap.h
+++ b/src/Common/HashTable/HashMap.h
@@ -296,6 +296,19 @@ class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
         return it->getMapped();
     }
 
+    /// Only inserts the value if key isn't already present
+    void ALWAYS_INLINE insertIfNotPresent(const Key & x, const Cell::Mapped & value)
+    {
+        LookupResult it;
+        bool inserted;
+        this->emplace(x, it, inserted);
+        if (inserted)
+        {
+            new (&it->getMapped()) typename Cell::Mapped();
+            it->getMapped() = value;
+        }
+    }
+
     const typename Cell::Mapped & ALWAYS_INLINE at(const Key & x) const
     {
         if (auto it = this->find(x); it != this->end())
diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index 3c9654740f4e..0dbc99467101 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -755,7 +755,6 @@ namespace
 
             WhichDataType which(from_type);
 
-            /// Note: Doesn't check the duplicates in the `from` array.
             /// Field may be of Float type, but for the purpose of bitwise equality we can treat them as UInt64
             if (isNativeNumber(which) || which.isDecimal32() || which.isDecimal64() || which.isEnum())
             {
@@ -777,7 +776,7 @@ namespace
 #pragma clang diagnostic pop
 
                         memcpy(dst, ref.data, ref.size);
-                        table[key] = i;
+                        table.insertIfNotPresent(key, i);
                     }
                 }
             }
@@ -790,7 +789,7 @@ namespace
                     if (applyVisitor(FieldVisitorAccurateEquals(), (*cache.from_column)[i], (*from_column_uncasted)[i]))
                     {
                         StringRef ref = cache.from_column->getDataAt(i);
-                        table[ref] = i;
+                        table.insertIfNotPresent(ref, i);
                     }
                 }
             }
@@ -804,7 +803,7 @@ namespace
                     {
                         SipHash hash;
                         cache.from_column->updateHashWithValue(i, hash);
-                        table[hash.get128()] = i;
+                        table.insertIfNotPresent(hash.get128(), i);
                     }
                 }
             }
diff --git a/tests/queries/0_stateless/03094_transform_return_first.reference b/tests/queries/0_stateless/03094_transform_return_first.reference
new file mode 100644
index 000000000000..4f62b9488829
--- /dev/null
+++ b/tests/queries/0_stateless/03094_transform_return_first.reference
@@ -0,0 +1,4 @@
+1
+1
+(2,2)
+2
diff --git a/tests/queries/0_stateless/03094_transform_return_first.sql b/tests/queries/0_stateless/03094_transform_return_first.sql
new file mode 100644
index 000000000000..fa18440f7217
--- /dev/null
+++ b/tests/queries/0_stateless/03094_transform_return_first.sql
@@ -0,0 +1,7 @@
+SELECT transform(1, [1, 1, 1], [1, 4, 5]);
+SELECT transform('1', ['1', '1', '1'], ['1', '4', '5']);
+SELECT transform((0, 0), [(0, 0), (0, 0), (0, 0)], [(2, 2), (5, 5), (10, 10)]);
+
+-- https://github.com/ClickHouse/ClickHouse/issues/62183
+-- Case is turned into caseWithExpression, which then it's turned into transform
+select case 1 when 1 then 2 when 1 then 4 end;

From dcd0831f4c4fce416368eca70f1f2201cf974903 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 11 Apr 2024 00:12:52 +0200
Subject: [PATCH 44/90] fix

---
 src/Client/Connection.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 91b86ded5002..4e2456134793 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -1224,15 +1224,15 @@ void Connection::setDescription()
     auto resolved_address = getResolvedAddress();
     description = host + ":" + toString(port);
 
+    full_description = description;
+
     if (resolved_address)
     {
         auto ip_address = resolved_address->host().toString();
         if (host != ip_address)
-            description += ", " + ip_address;
+            full_description += ", " + ip_address;
     }
 
-    full_description = description;
-
     if (const auto * socket_ = getSocket())
     {
         full_description += ", local address: ";

From 7344daec8f295baf1d11b8b51d82821e53fece19 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Wed, 10 Apr 2024 20:25:08 -0400
Subject: [PATCH 45/90] add Composable Protocols

---
 .../settings/composable-protocols.md          | 155 ++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 docs/en/operations/settings/composable-protocols.md

diff --git a/docs/en/operations/settings/composable-protocols.md b/docs/en/operations/settings/composable-protocols.md
new file mode 100644
index 000000000000..8a5ea584f4e7
--- /dev/null
+++ b/docs/en/operations/settings/composable-protocols.md
@@ -0,0 +1,155 @@
+---
+slug: /en/operations/settings/composable-protocols
+sidebar_position: 64
+sidebar_label: Composable Protocols
+---
+
+# Composable Protocols
+
+Composable protocols allows more flexible configuration of TCP access to the ClickHouse server. This configuration can co-exist with or replace conventional configuration.
+
+## Composable protocols section is denoted as `protocols` in configuration xml
+**Example:**
+``` xml
+<protocols>
+
+</protocols>
+```
+
+## Basic modules define protocol layers
+**Example:**
+``` xml
+<protocols>
+
+  <!-- plain_http module -->
+  <plain_http>
+    <type>http</type>
+  </plain_http>
+
+</protocols>
+```
+where:
+- `plain_http` - name which can be referred by another layer
+- `type` - denotes protocol handler which will be instantiated to process data, set of protocol handlers is predefined:
+  * `tcp` - native clickhouse protocol handler
+  * `http` - http clickhouse protocol handler
+  * `tls` - TLS encryption layer
+  * `proxy1` - PROXYv1 layer
+  * `mysql` - MySQL compatibility protocol handler
+  * `postgres` - PostgreSQL compatibility protocol handler
+  * `prometheus` - Prometheus protocol handler
+  * `interserver` - clickhouse interserver handler
+
+:::note
+`gRPC` protocol handler is not inmplemented for `Composable protocols`
+:::
+ 
+## Endpoint (i.e. listening port) is denoted by `<port>` and (optional) `<host>` tags
+**Example:**
+``` xml
+<protocols>
+
+  <plain_http>
+
+    <type>http</type>
+    <!-- endpoint -->
+    <host>127.0.0.1</host>
+    <port>8123</port>
+
+  </plain_http>
+
+</protocols>
+```
+If `<host>` is omitted, then `<listen_host>` from root config is used.
+
+## Layers sequence is defined by `<impl>` tag, referencing another module
+**Example:** definition for HTTPS protocol
+``` xml
+<protocols>
+
+  <!-- http module -->
+  <plain_http>
+    <type>http</type>
+  </plain_http>
+
+  <!-- https module configured as a tls layer on top of plain_http module -->
+  <https>
+    <type>tls</type>
+    <impl>plain_http</impl>
+    <host>127.0.0.1</host>
+    <port>8443</port>
+  </https>
+
+</protocols>
+```
+
+## Endpoint can be attached to any layer
+**Example:** definition for HTTP (port 8123) and HTTPS (port 8443) endpoints
+``` xml
+<protocols>
+
+  <plain_http>
+    <type>http</type>
+    <host>127.0.0.1</host>
+    <port>8123</port>
+  </plain_http>
+
+  <https>
+    <type>tls</type>
+    <impl>plain_http</impl>
+    <host>127.0.0.1</host>
+    <port>8443</port>
+  </https>
+
+</protocols>
+```
+
+## Additional endpoints can be defined by referencing any module and omitting `<type>` tag
+**Example:** `another_http` endpoint is defined for `plain_http` module
+``` xml
+<protocols>
+
+  <plain_http>
+    <type>http</type>
+    <host>127.0.0.1</host>
+    <port>8123</port>
+  </plain_http>
+
+  <https>
+    <type>tls</type>
+    <impl>plain_http</impl>
+    <host>127.0.0.1</host>
+    <port>8443</port>
+  </https>
+
+  <another_http>
+    <impl>plain_http</impl>
+    <host>127.0.0.1</host>
+    <port>8223</port>
+  </another_http>
+
+</protocols>
+```
+
+## Some modules can contain specific for its layer parameters
+**Example:** for TLS layer private key (`privateKeyFile`) and certificate files (`certificateFile`) can be specified
+``` xml
+<protocols>
+
+  <plain_http>
+    <type>http</type>
+    <host>127.0.0.1</host>
+    <port>8123</port>
+  </plain_http>
+
+  <https>
+    <type>tls</type>
+    <impl>plain_http</impl>
+    <host>127.0.0.1</host>
+    <port>8443</port>
+    <privateKeyFile>another_server.key</privateKeyFile>
+    <certificateFile>another_server.crt</certificateFile>
+  </https>
+
+</protocols>
+```

From 057747ccd2a6aef76121c3b87a52b22f65e32687 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Wed, 10 Apr 2024 20:33:42 -0400
Subject: [PATCH 46/90] fix

---
 docs/en/operations/settings/composable-protocols.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/composable-protocols.md b/docs/en/operations/settings/composable-protocols.md
index 8a5ea584f4e7..b68a5906abf1 100644
--- a/docs/en/operations/settings/composable-protocols.md
+++ b/docs/en/operations/settings/composable-protocols.md
@@ -41,7 +41,7 @@ where:
   * `interserver` - clickhouse interserver handler
 
 :::note
-`gRPC` protocol handler is not inmplemented for `Composable protocols`
+`gRPC` protocol handler is not implemented for `Composable protocols`
 :::
  
 ## Endpoint (i.e. listening port) is denoted by `<port>` and (optional) `<host>` tags

From e793b0e148c7db3cd0053b9ba27dc769e50a9878 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Wed, 10 Apr 2024 20:37:26 -0400
Subject: [PATCH 47/90] Update aspell-dict.txt

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 30c2de2b5076..9f7776f5201d 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -183,6 +183,8 @@ CompiledExpressionCacheCount
 ComplexKeyCache
 ComplexKeyDirect
 ComplexKeyHashed
+Composable
+composable
 Config
 ConnectionDetails
 Const
@@ -697,6 +699,7 @@ PCRE
 PRCP
 PREWHERE
 PROCESSLIST
+PROXYv
 PSUN
 PagerDuty
 ParallelFormattingOutputFormatThreads

From 13774d897b8ab290ce64203d09f32bee60247cce Mon Sep 17 00:00:00 2001
From: Joshua Hildred <jthildred@gmail.com>
Date: Wed, 10 Apr 2024 19:09:03 -0700
Subject: [PATCH 48/90] Add additional tests for queries with toLowCardinality
 and toNullable

---
 src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp     | 2 +-
 tests/queries/0_stateless/03032_redundant_equals.reference | 4 ++++
 tests/queries/0_stateless/03032_redundant_equals.sql       | 3 ++-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index ee0ddf24233e..05efe983b42e 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -627,7 +627,7 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont
         if (!child_function || !isBooleanFunction(child_function->getFunctionName()))
             return;
 
-        if (function_node.getResultType()->isNullable() && !child_function->getResultType()->isNullable())
+        if (removeLowCardinality(constant->getResultType())->isNullable())
             need_rerun_resolve = true;
 
         if (maybe_invert)
diff --git a/tests/queries/0_stateless/03032_redundant_equals.reference b/tests/queries/0_stateless/03032_redundant_equals.reference
index 09f4d8e3646a..b154addf55fb 100644
--- a/tests/queries/0_stateless/03032_redundant_equals.reference
+++ b/tests/queries/0_stateless/03032_redundant_equals.reference
@@ -17,6 +17,10 @@
 100
 100
 101
+100
+101
+100
+101
 1
 1
 1
diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql
index ae0b9651e12c..bd2306c7575c 100644
--- a/tests/queries/0_stateless/03032_redundant_equals.sql
+++ b/tests/queries/0_stateless/03032_redundant_equals.sql
@@ -26,7 +26,8 @@ SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1;
 SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1;
 SELECT * FROM test_table WHERE ((k not in (99) = 1) AND (k in (100) = 1)) = 1;
 SELECT * FROM test_table WHERE ((k not in (101) = toNullable(0)) OR (k in (100) = toNullable(1))) = toNullable(1);
-
+SELECT * FROM test_table WHERE (((k NOT IN toLowCardinality(toNullable(101))) = toLowCardinality(toNullable(0))) OR ((k IN (toLowCardinality(100))) = toNullable(1)));
+SELECT * FROM test_table WHERE (((k IN toLowCardinality(toNullable(101))) = toLowCardinality(toNullable(0))) AND ((k NOT IN (toLowCardinality(100))) = toNullable(1))) = toNullable(toLowCardinality(0));
 
 SELECT count()
 FROM

From 0e117ed197011aff8b746010ceac733450a892b6 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 11 Apr 2024 04:17:06 +0200
Subject: [PATCH 49/90] Update debug log filename

---
 .../0_stateless/01676_clickhouse_client_autocomplete.python     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
index 7bb9209f55ce..dcb1d2581828 100644
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
@@ -7,7 +7,7 @@ import multiprocessing
 COMPLETION_TIMEOUT_SECONDS = 30
 DEBUG_LOG = os.path.join(
     os.environ["CLICKHOUSE_TMP"],
-    os.path.basename(os.path.abspath(__file__)) + ".debuglog",
+    os.path.basename(os.path.abspath(__file__)).strip(".python") + ".debuglog",
 )
 
 
From 7916cf8355f0494d578ffe3b3288bd54485f5be6 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <konstantin@clickhouse.com>
Date: Thu, 11 Apr 2024 04:21:12 +0200
Subject: [PATCH 50/90] Add debug log flushing

---
 .../0_stateless/01676_clickhouse_client_autocomplete.python | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
index dcb1d2581828..13160d4e561c 100644
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
@@ -34,19 +34,23 @@ def test_completion(program, argv, comp_word):
             output_b = os.read(master, 4096)
             output = output_b.decode()
             debug_log_fd.write(repr(output_b) + "\n")
+            debug_log_fd.flush()
             while not ":)" in output:
                 output_b = os.read(master, 4096)
                 output += output_b.decode()
                 debug_log_fd.write(repr(output_b) + "\n")
+                debug_log_fd.flush()
 
             os.write(master, b"SET " + bytes(comp_begin.encode()))
             output_b = os.read(master, 4096)
             output = output_b.decode()
             debug_log_fd.write(repr(output_b) + "\n")
+            debug_log_fd.flush()
             while not comp_begin in output:
                 output_b = os.read(master, 4096)
                 output += output_b.decode()
                 debug_log_fd.write(repr(output_b) + "\n")
+                debug_log_fd.flush()
 
             time.sleep(0.01)
             os.write(master, b"\t")
@@ -54,6 +58,7 @@ def test_completion(program, argv, comp_word):
             output_b = os.read(master, 4096)
             output = output_b.decode()
             debug_log_fd.write(repr(output_b) + "\n")
+            debug_log_fd.flush()
             # fail fast if there is a bell character in the output,
             # meaning no concise completion is found
             if "\x07" in output:
@@ -64,6 +69,7 @@ def test_completion(program, argv, comp_word):
                 output_b = os.read(master, 4096)
                 output += output_b.decode()
                 debug_log_fd.write(repr(output_b) + "\n")
+                debug_log_fd.flush()
 
             print(f"{comp_word}: OK")
         finally:

From edb22a89410cb576209733503cf11f3eb988250a Mon Sep 17 00:00:00 2001
From: Jayme Bird <jayme.bird@clickhouse.com>
Date: Thu, 11 Apr 2024 10:22:02 +0100
Subject: [PATCH 51/90] add event_time to backup_log system table

---
 docs/en/operations/system-tables/backup_log.md | 2 ++
 src/Interpreters/BackupLog.cpp                 | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/docs/en/operations/system-tables/backup_log.md b/docs/en/operations/system-tables/backup_log.md
index c73fd26683ea..d9c2a61cb818 100644
--- a/docs/en/operations/system-tables/backup_log.md
+++ b/docs/en/operations/system-tables/backup_log.md
@@ -9,6 +9,7 @@ Columns:
 
 - `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
 - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry.
+- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time of the entry.
 - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision.
 - `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation.
 - `name` ([String](../../sql-reference/data-types/string.md)) — Name of the backup storage (the contents of the `FROM` or `TO` clause).
@@ -67,6 +68,7 @@ Row 2:
 ──────
 hostname:                clickhouse.eu-central1.internal
 event_date:              2023-08-19
+event_time:              2023-08-19 11:08:56
 event_time_microseconds: 2023-08-19 11:08:56.916192
 id:                      e5b74ecb-f6f1-426a-80be-872f90043885
 name:                    Disk('backups_disk', '1.zip')
diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp
index af6c7cf62346..a22c6e322bf8 100644
--- a/src/Interpreters/BackupLog.cpp
+++ b/src/Interpreters/BackupLog.cpp
@@ -24,6 +24,7 @@ ColumnsDescription BackupLogElement::getColumnsDescription()
     {
         {"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Hostname of the server executing the query."},
         {"event_date", std::make_shared<DataTypeDate>(), "Date of the entry."},
+        {"event_time", std::make_shared<DataTypeDateTime>(), "Time of the entry."},
         {"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6), "Time of the entry with microseconds precision."},
         {"id", std::make_shared<DataTypeString>(), "Identifier of the backup or restore operation."},
         {"name", std::make_shared<DataTypeString>(), "Name of the backup storage (the contents of the FROM or TO clause)."},
@@ -48,6 +49,7 @@ void BackupLogElement::appendToBlock(MutableColumns & columns) const
     size_t i = 0;
     columns[i++]->insert(getFQDNOrHostName());
     columns[i++]->insert(DateLUT::instance().toDayNum(std::chrono::system_clock::to_time_t(event_time)).toUnderType());
+    columns[i++]->insert(std::chrono::system_clock::to_time_t(event_time));
     columns[i++]->insert(event_time_usec);
     columns[i++]->insert(info.id);
     columns[i++]->insert(info.name);

From c1c7cf56bde1dd07ced5ad41a0dc4e7d2d5fef94 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Thu, 11 Apr 2024 13:43:51 +0200
Subject: [PATCH 52/90] Revert "[feature]: allow to attach parts from a
 different disk"

---
 .../statements/alter/partition.md             |   2 -
 src/Storages/MergeTree/MergeTreeData.cpp      |  50 ++---
 src/Storages/MergeTree/MergeTreeData.h        |   2 +-
 src/Storages/MergeTree/MutateTask.cpp         |   2 +-
 src/Storages/StorageMergeTree.cpp             |   4 +-
 src/Storages/StorageReplicatedMergeTree.cpp   |  17 +-
 .../__init__.py                               |   0
 .../configs/remote_servers.xml                |  17 --
 .../test_attach_partition_using_copy/test.py  | 187 ------------------
 tests/integration/test_multiple_disks/test.py |  36 ++--
 10 files changed, 54 insertions(+), 263 deletions(-)
 delete mode 100644 tests/integration/test_attach_partition_using_copy/__init__.py
 delete mode 100644 tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml
 delete mode 100644 tests/integration/test_attach_partition_using_copy/test.py

diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md
index 941dc000a028..ce5cecf6fd64 100644
--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@@ -133,8 +133,6 @@ For the query to run successfully, the following conditions must be met:
 - Both tables must have the same indices and projections.
 - Both tables must have the same storage policy.
 
-If both tables have the same storage policy, use hardlink to attach partition. Otherwise, use copying the data to attach partition.
-
 ## REPLACE PARTITION
 
 ``` sql
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 5d4c3ab078e5..927001dd0f6b 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -7074,7 +7074,7 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(
     return checkStructureAndGetMergeTreeData(*source_table, src_snapshot, my_snapshot);
 }
 
-std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAndLoadDataPart(
+std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAndLoadDataPartOnSameDisk(
     const MergeTreeData::DataPartPtr & src_part,
     const String & tmp_part_prefix,
     const MergeTreePartInfo & dst_part_info,
@@ -7084,23 +7084,28 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
     const WriteSettings & write_settings)
 {
     chassert(!isStaticStorage());
-    bool on_same_disk = false;
-    for (const DiskPtr & disk : this->getStoragePolicy()->getDisks())
+
+    /// Check that the storage policy contains the disk where the src_part is located.
+    bool does_storage_policy_allow_same_disk = false;
+    for (const DiskPtr & disk : getStoragePolicy()->getDisks())
     {
         if (disk->getName() == src_part->getDataPartStorage().getDiskName())
         {
-            on_same_disk = true;
+            does_storage_policy_allow_same_disk = true;
             break;
         }
     }
-
+    if (!does_storage_policy_allow_same_disk)
+        throw Exception(
+            ErrorCodes::BAD_ARGUMENTS,
+            "Could not clone and load part {} because disk does not belong to storage policy",
+            quoteString(src_part->getDataPartStorage().getFullPath()));
 
     String dst_part_name = src_part->getNewName(dst_part_info);
     String tmp_dst_part_name = tmp_part_prefix + dst_part_name;
     auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name);
 
     /// Why it is needed if we only hardlink files?
-    /// Answer: In issue #59377, add copy when attach from different disk.
     auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk());
     auto src_part_storage = src_part->getDataPartStoragePtr();
 
@@ -7108,30 +7113,16 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
     MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;
 
     String with_copy;
-    if (params.copy_instead_of_hardlink || !on_same_disk)
+    if (params.copy_instead_of_hardlink)
         with_copy = " (copying data)";
 
-
-    std::shared_ptr<IDataPartStorage> dst_part_storage{};
-    if (on_same_disk && !params.copy_instead_of_hardlink)
-    {
-        dst_part_storage = src_part_storage->freeze(
-            relative_data_path,
-            tmp_dst_part_name,
-            read_settings,
-            write_settings,
-            /* save_metadata_callback= */ {},
-            params);
-    }
-    else
-    {
-        auto reservation_on_dst = getStoragePolicy()->reserve(src_part->getBytesOnDisk());
-        if (!reservation_on_dst)
-            throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space on disk.");
-        dst_part_storage = src_part_storage->clonePart(
-            this->getRelativeDataPath(), tmp_dst_part_name, reservation_on_dst->getDisk(), read_settings, write_settings, {}, {});
-    }
-
+    auto dst_part_storage = src_part_storage->freeze(
+        relative_data_path,
+        tmp_dst_part_name,
+        read_settings,
+        write_settings,
+        /* save_metadata_callback= */ {},
+        params);
 
     if (params.metadata_version_to_write.has_value())
     {
@@ -7153,7 +7144,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
         .withPartFormatFromDisk()
         .build();
 
-    if (on_same_disk && !params.copy_instead_of_hardlink && params.hardlinked_files)
+    if (!params.copy_instead_of_hardlink && params.hardlinked_files)
     {
         params.hardlinked_files->source_part_name = src_part->name;
         params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID();
@@ -7197,7 +7188,6 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
     return std::make_pair(dst_data_part, std::move(temporary_directory_lock));
 }
 
-
 String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const
 {
     return disk->getPath() + relative_data_path;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index d21f87c337ef..b1fbadc57f07 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -839,7 +839,7 @@ class MergeTreeData : public IStorage, public WithMutableContext
     MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const;
     MergeTreeData & checkStructureAndGetMergeTreeData(IStorage & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const;
 
-    std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> cloneAndLoadDataPart(
+    std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> cloneAndLoadDataPartOnSameDisk(
         const MergeTreeData::DataPartPtr & src_part,
         const String & tmp_part_prefix,
         const MergeTreePartInfo & dst_part_info,
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index a971c4fda1c6..5e388d6a8ac7 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -2146,7 +2146,7 @@ bool MutateTask::prepare()
         scope_guard lock;
 
         {
-            std::tie(part, lock) = ctx->data->cloneAndLoadDataPart(
+            std::tie(part, lock) = ctx->data->cloneAndLoadDataPartOnSameDisk(
                 ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, clone_params, ctx->context->getReadSettings(), ctx->context->getWriteSettings());
             part->getDataPartStorage().beginTransaction();
             ctx->temporary_directory_lock = std::move(lock);
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 86af02be8990..0235a74400c8 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -2096,7 +2096,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
         MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level);
 
         IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()};
-        auto [dst_part, part_lock] = cloneAndLoadDataPart(
+        auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
             src_part,
             TMP_PREFIX,
             dst_part_info,
@@ -2207,7 +2207,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const
             .copy_instead_of_hardlink = getSettings()->always_use_copy_instead_of_hardlinks,
         };
 
-        auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPart(
+        auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(
             src_part,
             TMP_PREFIX,
             dst_part_info,
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 15d1b7f40103..fcb946c089cb 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2788,7 +2788,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry)
 
     auto obtain_part = [&] (PartDescriptionPtr & part_desc)
     {
-        /// Fetches with zero-copy-replication are cheap, but cloneAndLoadDataPart(OnSameDisk) will do full copy.
+        /// Fetches with zero-copy-replication are cheap, but cloneAndLoadDataPartOnSameDisk will do full copy.
         /// It's okay to check the setting for current table and disk for the source table, because src and dst part are on the same disk.
         bool prefer_fetch_from_other_replica = !part_desc->replica.empty() && storage_settings_ptr->allow_remote_fs_zero_copy_replication
             && part_desc->src_table_part && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport();
@@ -2807,7 +2807,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry)
                 .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()),
                 .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
             };
-            auto [res_part, temporary_part_lock] = cloneAndLoadDataPart(
+            auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
                 part_desc->src_table_part,
                 TMP_PREFIX + "clone_",
                 part_desc->new_part_info,
@@ -4888,7 +4888,7 @@ bool StorageReplicatedMergeTree::fetchPart(
                 .keep_metadata_version = true,
             };
 
-            auto [cloned_part, lock] = cloneAndLoadDataPart(
+            auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk(
                 part_to_clone,
                 "tmp_clone_",
                 part_info,
@@ -8078,14 +8078,12 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
 
             bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication
                 || dynamic_cast<const MergeTreeData *>(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
-
             IDataPartStorage::ClonePartParams clone_params
             {
                 .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()),
                 .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
             };
-
-            auto [dst_part, part_lock] = cloneAndLoadDataPart(
+            auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
                 src_part,
                 TMP_PREFIX,
                 dst_part_info,
@@ -8093,10 +8091,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                 clone_params,
                 query_context->getReadSettings(),
                 query_context->getWriteSettings());
-
-            dst_parts.emplace_back(std::move(dst_part));
-            dst_parts_locks.emplace_back(std::move(part_lock));
             src_parts.emplace_back(src_part);
+            dst_parts.emplace_back(dst_part);
+            dst_parts_locks.emplace_back(std::move(part_lock));
             ephemeral_locks.emplace_back(std::move(*lock));
             block_id_paths.emplace_back(block_id_path);
             part_checksums.emplace_back(hash_hex);
@@ -8349,7 +8346,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
                 .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()),
                 .metadata_version_to_write = dest_metadata_snapshot->getMetadataVersion()
             };
-            auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPart(
+            auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(
                 src_part,
                 TMP_PREFIX,
                 dst_part_info,
diff --git a/tests/integration/test_attach_partition_using_copy/__init__.py b/tests/integration/test_attach_partition_using_copy/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml b/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml
deleted file mode 100644
index b40730e9f7d5..000000000000
--- a/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <test_cluster>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>replica1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>replica2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster>
-    </remote_servers>
-</clickhouse>
diff --git a/tests/integration/test_attach_partition_using_copy/test.py b/tests/integration/test_attach_partition_using_copy/test.py
deleted file mode 100644
index df5378742aef..000000000000
--- a/tests/integration/test_attach_partition_using_copy/test.py
+++ /dev/null
@@ -1,187 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-from helpers.test_tools import assert_eq_with_retry
-
-cluster = ClickHouseCluster(__file__)
-
-replica1 = cluster.add_instance(
-    "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"]
-)
-replica2 = cluster.add_instance(
-    "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"]
-)
-
-
-@pytest.fixture(scope="module")
-def start_cluster():
-    try:
-        cluster.start()
-        yield cluster
-    except Exception as ex:
-        print(ex)
-    finally:
-        cluster.shutdown()
-
-
-def cleanup(nodes):
-    for node in nodes:
-        node.query("DROP TABLE IF EXISTS source SYNC")
-        node.query("DROP TABLE IF EXISTS destination SYNC")
-
-
-def create_source_table(node, table_name, replicated):
-    replica = node.name
-    engine = (
-        f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')"
-        if replicated
-        else "MergeTree()"
-    )
-    node.query_with_retry(
-        """
-        ATTACH TABLE {table_name} UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
-        (
-        price UInt32,
-        date Date,
-        postcode1 LowCardinality(String),
-        postcode2 LowCardinality(String),
-        type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
-        is_new UInt8,
-        duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
-        addr1 String,
-        addr2 String,
-        street LowCardinality(String),
-        locality LowCardinality(String),
-        town LowCardinality(String),
-        district LowCardinality(String),
-        county LowCardinality(String)
-        )
-        ENGINE = {engine}
-        ORDER BY (postcode1, postcode2, addr1, addr2)
-        SETTINGS disk = disk(type = web, endpoint = 'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/')
-        """.format(
-            table_name=table_name, engine=engine
-        )
-    )
-
-
-def create_destination_table(node, table_name, replicated):
-    replica = node.name
-    engine = (
-        f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')"
-        if replicated
-        else "MergeTree()"
-    )
-    node.query_with_retry(
-        """
-        CREATE TABLE {table_name}
-        (
-        price UInt32,
-        date Date,
-        postcode1 LowCardinality(String),
-        postcode2 LowCardinality(String),
-        type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
-        is_new UInt8,
-        duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
-        addr1 String,
-        addr2 String,
-        street LowCardinality(String),
-        locality LowCardinality(String),
-        town LowCardinality(String),
-        district LowCardinality(String),
-        county LowCardinality(String)
-        )
-        ENGINE = {engine} 
-        ORDER BY (postcode1, postcode2, addr1, addr2)
-        """.format(
-            table_name=table_name, engine=engine
-        )
-    )
-
-
-def test_both_mergtree(start_cluster):
-    create_source_table(replica1, "source", False)
-    create_destination_table(replica1, "destination", False)
-
-    replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source")
-
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC",
-        replica1.query(
-            f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC"
-        ),
-    )
-
-    assert_eq_with_retry(
-        replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH"
-    )
-
-    cleanup([replica1])
-
-
-def test_all_replicated(start_cluster):
-    create_source_table(replica1, "source", True)
-    create_destination_table(replica1, "destination", True)
-    create_destination_table(replica2, "destination", True)
-
-    replica1.query("SYSTEM SYNC REPLICA destination")
-    replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source")
-
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC",
-        replica1.query(
-            f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC"
-        ),
-    )
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC",
-        replica2.query(
-            f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC"
-        ),
-    )
-
-    assert_eq_with_retry(
-        replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH"
-    )
-
-    assert_eq_with_retry(
-        replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH"
-    )
-
-    cleanup([replica1, replica2])
-
-
-def test_only_destination_replicated(start_cluster):
-    create_source_table(replica1, "source", False)
-    create_destination_table(replica1, "destination", True)
-    create_destination_table(replica2, "destination", True)
-
-    replica1.query("SYSTEM SYNC REPLICA destination")
-    replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source")
-
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC",
-        replica1.query(
-            f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC"
-        ),
-    )
-    assert_eq_with_retry(
-        replica1,
-        f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC",
-        replica2.query(
-            f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC"
-        ),
-    )
-
-    assert_eq_with_retry(
-        replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH"
-    )
-
-    assert_eq_with_retry(
-        replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH"
-    )
-
-    cleanup([replica1, replica2])
diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py
index 9584ace7f456..fdd81284b2a2 100644
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@@ -5,7 +5,6 @@
 import threading
 import time
 from multiprocessing.dummy import Pool
-from helpers.test_tools import assert_eq_with_retry
 
 import pytest
 from helpers.client import QueryRuntimeException
@@ -1746,9 +1745,9 @@ def alter():
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
 
 
-def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster):
+def test_move_across_policies_does_not_work(start_cluster):
     try:
-        name = "test_move_across_policies_work_for_attach_not_work_for_move"
+        name = "test_move_across_policies_does_not_work"
 
         node1.query(
             """
@@ -1784,18 +1783,25 @@ def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster):
         except QueryRuntimeException:
             """All parts of partition 'all' are already on disk 'jbod2'."""
 
-        node1.query(
-            """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format(
-                name=name
+        with pytest.raises(
+            QueryRuntimeException,
+            match=".*because disk does not belong to storage policy.*",
+        ):
+            node1.query(
+                """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format(
+                    name=name
+                )
             )
-        )
-        assert_eq_with_retry(
-            node1,
-            """SELECT * FROM {name}2""".format(name=name),
+
+        with pytest.raises(
+            QueryRuntimeException,
+            match=".*because disk does not belong to storage policy.*",
+        ):
             node1.query(
-                """SELECT * FROM {name}""".format(name=name),
-            ),
-        )
+                """ALTER TABLE {name}2 REPLACE PARTITION tuple() FROM {name}""".format(
+                    name=name
+                )
+            )
 
         with pytest.raises(
             QueryRuntimeException,
@@ -1807,6 +1813,10 @@ def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster):
                 )
             )
 
+        assert node1.query(
+            """SELECT * FROM {name}""".format(name=name)
+        ).splitlines() == ["1"]
+
     finally:
         node1.query(f"DROP TABLE IF EXISTS {name} SYNC")
         node1.query(f"DROP TABLE IF EXISTS {name}2 SYNC")

From e21799f4a546e7bf798fb81a7d37c9cf324c89a5 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 11 Apr 2024 13:48:35 +0200
Subject: [PATCH 53/90] fix hung check

---
 src/Interpreters/InterpreterSystemQuery.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 9b4534601c35..f6db12e977c5 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -731,10 +731,12 @@ BlockIO InterpreterSystemQuery::execute()
         case Type::STOP_THREAD_FUZZER:
             getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER);
             ThreadFuzzer::stop();
+            CannotAllocateThreadFaultInjector::setFaultProbability(0);
             break;
         case Type::START_THREAD_FUZZER:
             getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER);
             ThreadFuzzer::start();
+            CannotAllocateThreadFaultInjector::setFaultProbability(getContext()->getServerSettings().cannot_allocate_thread_fault_injection_probability);
             break;
         case Type::UNFREEZE:
         {

From 0ef2153d55c2477a33c40301c74e0682eba63d1a Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 11 Apr 2024 14:02:32 +0200
Subject: [PATCH 54/90] Fix

---
 src/Interpreters/Cache/SLRUFileCachePriority.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
index 68bf182dd2e0..31143eb4a24d 100644
--- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp
@@ -209,8 +209,13 @@ bool SLRUFileCachePriority::collectCandidatesForEvictionInProtected(
     {
         return false;
     }
-    else
-        chassert(downgrade_candidates->size() > 0);
+
+    /// We can have no downgrade candidates because cache size could
+    /// reduce concurrently because of lock-free cache entries invalidation.
+    if (downgrade_candidates->size() == 0)
+    {
+        return true;
+    }
 
     if (!probationary_queue.collectCandidatesForEviction(
             downgrade_stat.total_stat.releasable_size, downgrade_stat.total_stat.releasable_count,

From 4f6b6e30e10153f448f599e592b496eb499f198a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 11 Apr 2024 12:30:34 +0000
Subject: [PATCH 55/90] Fix flaky 03093_bug37909_query_does_not_finish

---
 .../queries/0_stateless/03093_bug37909_query_does_not_finish.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql
index 463922c4e29a..62fa3f437af4 100644
--- a/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql
+++ b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql
@@ -75,3 +75,4 @@ FROM
 /* WHERE (v_date >= '2022-05-08') AND (v_date <= '2022-06-07') placing condition has same effect */
 GROUP BY vDate
 ORDER BY vDate ASC
+SETTINGS allow_experimental_analyzer = 1; -- the query times out if allow_experimental_analyzer = 0

From 8b38fabcf9e7ae844ff8844d2c61a49cc765708b Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 11 Apr 2024 12:35:52 +0000
Subject: [PATCH 56/90] better

---
 .../Transforms/SquashingChunksTransform.cpp   | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp
index 67cf22c7d4de..0d69b6e0a8d5 100644
--- a/src/Processors/Transforms/SquashingChunksTransform.cpp
+++ b/src/Processors/Transforms/SquashingChunksTransform.cpp
@@ -74,17 +74,6 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk)
 
         auto block = squashing.add({});
         chunk.setColumns(block.getColumns(), block.rows());
-
-        /// ISimpleTransform keeps output chunk (result of transform() execution) for some time and push it in the output port within subsequent prepare() call.
-        /// Because of our custom prepare() implementation we have to take care of both places where data could be buffered: `output_data` and `squashing`.
-        if (output_data.chunk.hasRows())
-        {
-            auto res = std::move(output_data.chunk);
-            output_data.chunk.clear();
-            if (chunk.hasRows())
-                res.append(chunk);
-            chunk = std::move(res);
-        }
     }
 }
 
@@ -92,7 +81,21 @@ IProcessor::Status SimpleSquashingChunksTransform::prepare()
 {
     if (!finished && input.isFinished())
     {
+        if (output.isFinished())
+            return Status::Finished;
+
+        if (!output.canPush())
+            return Status::PortFull;
+
+        if (has_output)
+        {
+            output.pushData(std::move(output_data));
+            has_output = false;
+            return Status::PortFull;
+        }
+
         finished = true;
+        /// On the next call to transform() we will return all data buffered in `squashing` (if any)
         return Status::Ready;
     }
     return ISimpleTransform::prepare();

From 3ff802376f0f6e8259d4087d91f0a6183e87d6f0 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 11 Apr 2024 12:36:53 +0000
Subject: [PATCH 57/90] Add test for issue 24607

---
 .../queries/0_stateless/03094_named_tuple_bug24607.reference  | 1 +
 tests/queries/0_stateless/03094_named_tuple_bug24607.sql      | 4 ++++
 2 files changed, 5 insertions(+)
 create mode 100644 tests/queries/0_stateless/03094_named_tuple_bug24607.reference
 create mode 100644 tests/queries/0_stateless/03094_named_tuple_bug24607.sql

diff --git a/tests/queries/0_stateless/03094_named_tuple_bug24607.reference b/tests/queries/0_stateless/03094_named_tuple_bug24607.reference
new file mode 100644
index 000000000000..fb6ca6c5c3ab
--- /dev/null
+++ b/tests/queries/0_stateless/03094_named_tuple_bug24607.reference
@@ -0,0 +1 @@
+(1,'test')	1
diff --git a/tests/queries/0_stateless/03094_named_tuple_bug24607.sql b/tests/queries/0_stateless/03094_named_tuple_bug24607.sql
new file mode 100644
index 000000000000..e3c97f3fe414
--- /dev/null
+++ b/tests/queries/0_stateless/03094_named_tuple_bug24607.sql
@@ -0,0 +1,4 @@
+SELECT
+    JSONExtract('{"a":1, "b":"test"}', 'Tuple(a UInt8, b String)') AS x,
+    x.a
+SETTINGS allow_experimental_analyzer = 1;

From c684770acee472375736c6429d74bc900f794c5a Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 11 Apr 2024 15:16:21 +0200
Subject: [PATCH 58/90] Use sudo to compress logs with the highest permissions

---
 tests/ci/integration_tests_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py
index 90e2b08386fc..1289190a29bb 100755
--- a/tests/ci/integration_tests_runner.py
+++ b/tests/ci/integration_tests_runner.py
@@ -398,7 +398,7 @@ def _install_clickhouse(self, debs_path):
     @staticmethod
     def _compress_logs(directory, relpaths, result_path):
         retcode = subprocess.call(  # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
-            f"tar --use-compress-program='zstd --threads=0' -cf {result_path} -C "
+            f"sudo tar --use-compress-program='zstd --threads=0' -cf {result_path} -C "
             f"{directory} {' '.join(relpaths)}",
             shell=True,
         )

From 0b76f95e57a271035571fca9342acb3636ae4cc9 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 11 Apr 2024 15:19:32 +0200
Subject: [PATCH 59/90] Remove unnecessary style-check comments

---
 tests/ci/integration_tests_runner.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py
index 1289190a29bb..118a790590a7 100755
--- a/tests/ci/integration_tests_runner.py
+++ b/tests/ci/integration_tests_runner.py
@@ -13,13 +13,13 @@
 import subprocess
 import sys
 import time
-from typing import Any, Dict
 import zlib  # for crc32
 from collections import defaultdict
 from itertools import chain
+from typing import Any, Dict
 
-from integration_test_images import IMAGES
 from env_helper import CI
+from integration_test_images import IMAGES
 
 MAX_RETRY = 1
 NUM_WORKERS = 5
@@ -397,9 +397,9 @@ def _install_clickhouse(self, debs_path):
 
     @staticmethod
     def _compress_logs(directory, relpaths, result_path):
-        retcode = subprocess.call(  # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
-            f"sudo tar --use-compress-program='zstd --threads=0' -cf {result_path} -C "
-            f"{directory} {' '.join(relpaths)}",
+        retcode = subprocess.call(
+            f"sudo tar --use-compress-program='zstd --threads=0' "
+            f"-cf {result_path} -C {directory} {' '.join(relpaths)}",
             shell=True,
         )
         # tar return 1 when the files are changed on compressing, we ignore it
@@ -432,9 +432,7 @@ def _get_all_tests(self, repo_path):
             "Getting all tests to the file %s with cmd: \n%s", out_file_full, cmd
         )
         with open(out_file_full, "wb") as ofd:
-            subprocess.check_call(  # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
-                cmd, shell=True, stdout=ofd, stderr=ofd
-            )
+            subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd)
 
         all_tests = set()
         with open(out_file_full, "r", encoding="utf-8") as all_tests_fd:
@@ -1007,9 +1005,7 @@ def run():
     if CI:
         # Avoid overlaps with previous runs
         logging.info("Clearing dmesg before run")
-        subprocess.check_call(  # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
-            "sudo -E dmesg --clear", shell=True
-        )
+        subprocess.check_call("sudo -E dmesg --clear", shell=True)
 
     state, description, test_results, _ = runner.run_impl(repo_path, build_path)
     logging.info("Tests finished")
@@ -1017,9 +1013,7 @@ def run():
     if CI:
         # Dump dmesg (to capture possible OOMs)
         logging.info("Dumping dmesg")
-        subprocess.check_call(  # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
-            "sudo -E dmesg -T", shell=True
-        )
+        subprocess.check_call("sudo -E dmesg -T", shell=True)
 
     status = (state, description)
     out_results_file = os.path.join(str(runner.path()), "test_results.tsv")

From 59d56668e9a5dd4b6a8ca0b73a689cae9d18f2d9 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 11 Apr 2024 11:58:44 +0000
Subject: [PATCH 60/90] CI: respect Sync status in the MQ

---
 .github/workflows/master.yml       |  4 +-
 .github/workflows/pull_request.yml | 13 ++++-
 tests/ci/ci.py                     | 12 +++--
 tests/ci/finish_check.py           | 31 ++++++------
 tests/ci/sync_pr.py                | 81 +++++++++++++++++++++++-------
 5 files changed, 100 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 64372a90613e..9a719a205d4c 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -23,10 +23,10 @@ jobs:
           clear-repository: true # to ensure correct digests
           fetch-depth: 0 # to get version
           filter: tree:0
-      - name: Check sync PR
+      - name: Merge sync PR
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 sync_pr.py || :
+          python3 sync_pr.py --merge || :
       - name: Python unit tests
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 74ce8452de8b..0eac9a9a722d 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -157,16 +157,25 @@ jobs:
   ################################# Stage Final #################################
   #
   FinishCheck:
-    if: ${{ !failure() && !cancelled() && github.event_name != 'merge_group' }}
-    needs: [Tests_1, Tests_2]
+    if: ${{ !failure() && !cancelled() }}
+    needs: [Tests_1, Tests_2, Builds_1_Report, Builds_2_Report]
     runs-on: [self-hosted, style-checker]
     steps:
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
+      - name: Check sync status
+        if: ${{ github.event_name == 'merge_group' }}
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 sync_pr.py --status
       - name: Finish label
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 finish_check.py
+      - name: Auto merge if approved
+        if: ${{ github.event_name != 'merge_group' }}
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
           python3 merge_pr.py --check-approved
 
 
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 8434355ce465..f11d62e91362 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -2121,11 +2121,13 @@ def main() -> int:
                     pr_info,
                     dump_to_file=True,
                 )
-                update_mergeable_check(
-                    commit,
-                    pr_info,
-                    job_report.check_name or _get_ext_check_name(args.job_name),
-                )
+                if not pr_info.is_merge_queue():
+                    # in the merge queue mergeable status must be set only in FinishCheck (last job in wf)
+                    update_mergeable_check(
+                        commit,
+                        pr_info,
+                        job_report.check_name or _get_ext_check_name(args.job_name),
+                    )
 
             print(f"Job report url: [{check_url}]")
             prepared_events = prepare_tests_results_for_clickhouse(
diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py
index eebc846f4b1b..617f4c9d88cf 100644
--- a/tests/ci/finish_check.py
+++ b/tests/ci/finish_check.py
@@ -28,21 +28,22 @@ def main():
     statuses = get_commit_filtered_statuses(commit)
     trigger_mergeable_check(commit, statuses)
 
-    statuses = [s for s in statuses if s.context == CI_STATUS_NAME]
-    if not statuses:
-        return
-    # Take the latest status
-    status = statuses[-1]
-    if status.state == PENDING:
-        post_commit_status(
-            commit,
-            SUCCESS,
-            status.target_url,
-            "All checks finished",
-            CI_STATUS_NAME,
-            pr_info,
-            dump_to_file=True,
-        )
+    if not pr_info.is_merge_queue():
+        statuses = [s for s in statuses if s.context == CI_STATUS_NAME]
+        if not statuses:
+            return
+        # Take the latest status
+        status = statuses[-1]
+        if status.state == PENDING:
+            post_commit_status(
+                commit,
+                SUCCESS,
+                status.target_url,
+                "All checks finished",
+                CI_STATUS_NAME,
+                pr_info,
+                dump_to_file=True,
+            )
 
 
 if __name__ == "__main__":
diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py
index f33f6122f309..e8d982fac166 100644
--- a/tests/ci/sync_pr.py
+++ b/tests/ci/sync_pr.py
@@ -2,14 +2,68 @@
 
 """Script for automatic sync PRs handling in private repos"""
 
+import argparse
 import sys
 
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from github_helper import GitHub
+from commit_status_helper import get_commit, post_commit_status
+from report import FAILURE, SUCCESS
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="Script for handling sync PRs",
+    )
+    parser.add_argument(
+        "--merge",
+        action="store_true",
+        help="merge sync pr",
+    )
+    parser.add_argument(
+        "--status",
+        action="store_true",
+        help="check and set sync pr status",
+    )
+    args = parser.parse_args()
+    return args
+
+
+def merge_sync_pr(gh, sync_pr):
+    if not sync_pr:
+        print("Sync PR not found - exiting")
+        return
+
+    if sync_pr.state == "closed":
+        print(f"Sync PR [{sync_pr.number}] already closed - exiting")
+        sys.exit(0)
+
+    if sync_pr.state != "open":
+        print(
+            f"WARNING: Unknown Sync PR [{sync_pr.number}] state [{sync_pr.state}] - exiting"
+        )
+        sys.exit(0)
+
+    print(f"Trying to merge Sync PR [{sync_pr.number}]")
+    if sync_pr.draft:
+        gh.toggle_pr_draft(sync_pr)
+    sync_pr.merge()
+
+
+def set_sync_status(gh, pr_info, sync_pr):
+    if not sync_pr or sync_pr.mergeable:
+        post_commit_status(get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync")
+    else:
+        post_commit_status(get_commit(gh, pr_info.sha), SUCCESS, "", "", "A Sync")
 
 
 def main():
+    args = parse_args()
+
+    assert args.merge ^ args.status
+
     gh = GitHub(get_best_robot_token())
 
     pr_info = PRInfo()
@@ -19,27 +73,20 @@ def main():
         query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr",
         repo="ClickHouse/clickhouse-private",
     )
+
+    sync_pr = None
+
     if len(prs) > 1:
         print(f"WARNING: More than one PR found [{prs}] - exiting")
-        sys.exit(0)
-    if len(prs) == 0:
+    elif len(prs) == 0:
         print("WARNING: No Sync PR found")
-        sys.exit(0)
-
-    pr = prs[0]
-
-    if pr.state == "closed":
-        print(f"Sync PR [{pr.number}] already closed - exiting")
-        sys.exit(0)
-
-    if pr.state != "open":
-        print(f"WARNING: Unknown Sync PR [{pr.number}] state [{pr.state}] - exiting")
-        sys.exit(0)
+    else:
+        sync_pr = prs[0]
 
-    print(f"Trying to merge Sync PR [{pr.number}]")
-    if pr.draft:
-        gh.toggle_pr_draft(pr)
-    pr.merge()
+    if args.merge:
+        merge_sync_pr(gh, sync_pr)
+    elif args.status:
+        set_sync_status(gh, pr_info, sync_pr)
 
 
 if __name__ == "__main__":

From 2bbd36a5b72f4ff4d7ebcdd66d0a7f8c5e955acf Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 11 Apr 2024 15:36:07 +0200
Subject: [PATCH 61/90] Run fast tests and style checks in parallel

---
 .github/workflows/pull_request.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 74ce8452de8b..0db7be65feab 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -83,7 +83,7 @@ jobs:
         ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
         RCSK
   FastTest:
-    needs: [RunConfig, StyleCheck]
+    needs: [RunConfig, BuildDockers]
     if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Fast test') }}
     uses: ./.github/workflows/reusable_test.yml
     with:

From 99078ef9d5e5fe3d205add749de6b72e2845eb4a Mon Sep 17 00:00:00 2001
From: Arnaud Rocher <arnaud.roche3@gmail.com>
Date: Thu, 11 Apr 2024 15:56:47 +0200
Subject: [PATCH 62/90] Fix typo in `like` function documentation

---
 docs/en/sql-reference/functions/string-search-functions.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index f7e56e73520f..df266b224fbb 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -970,7 +970,7 @@ If the haystack or the LIKE expression are not valid UTF-8, the behavior is unde
 
 No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
 
-To match against literal `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`.
+To match against literal `%`, `_` and `\` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`.
 The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`.
 Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`.
 
@@ -1768,4 +1768,4 @@ SELECT hasTokenCaseInsensitiveOrNull('Hello World','hello,world');
 
 ```response
 null
-```
\ No newline at end of file
+```

From dd49b09902d29de85299d570d14b934f801d4ec3 Mon Sep 17 00:00:00 2001
From: Joshua Hildred <jthildred@gmail.com>
Date: Thu, 11 Apr 2024 06:58:35 -0700
Subject: [PATCH 63/90] Address review comments

---
 .../Passes/LogicalExpressionOptimizerPass.cpp | 62 +++++++------------
 .../03032_redundant_equals.reference          |  3 +
 .../0_stateless/03032_redundant_equals.sql    |  4 ++
 3 files changed, 28 insertions(+), 41 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 05efe983b42e..63f8c4786cec 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -274,18 +274,7 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont
         }
     }
 
-    void leaveImpl(QueryTreeNodePtr & node)
-    {
-        if (!need_rerun_resolve)
-            return;
-
-        if (auto * function_node = node->as<FunctionNode>())
-            rerunFunctionResolve(function_node, getContext());
-    }
-
 private:
-    bool need_rerun_resolve = false;
-
     void tryOptimizeAndEqualsNotEqualsChain(QueryTreeNodePtr & node)
     {
         auto & function_node = node->as<FunctionNode &>();
@@ -588,12 +577,6 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont
         auto & function_node = node->as<FunctionNode &>();
         assert(function_node.getFunctionName() == "equals");
 
-        bool lhs_const;
-        bool maybe_invert;
-
-        const ConstantNode * constant;
-        const FunctionNode * child_function;
-
         const auto function_arguments = function_node.getArguments().getNodes();
         if (function_arguments.size() != 2)
             return;
@@ -601,47 +584,44 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont
         const auto & lhs = function_arguments[0];
         const auto & rhs = function_arguments[1];
 
-        if ((constant = lhs->as<ConstantNode>()))
-            lhs_const = true;
-        else if ((constant = rhs->as<ConstantNode>()))
-            lhs_const = false;
+        UInt64 constant_value;
+        bool is_lhs_const;
+        if (const auto * lhs_constant = lhs->as<ConstantNode>())
+        {
+            if (!lhs_constant->getValue().tryGet<UInt64>(constant_value) || constant_value > 1
+                || isNullableOrLowCardinalityNullable(lhs_constant->getResultType()))
+                return;
+            is_lhs_const = true;
+        }
+        else if (const auto * rhs_constant = rhs->as<ConstantNode>())
+        {
+            if (!rhs_constant->getValue().tryGet<UInt64>(constant_value) || constant_value > 1
+                || isNullableOrLowCardinalityNullable(rhs_constant->getResultType()))
+                return;
+            is_lhs_const = false;
+        }
         else
             return;
 
-        UInt64 val;
-        if (!constant->getValue().tryGet<UInt64>(val))
-            return;
-
-        if (val == 1)
-            maybe_invert = false;
-        else if (val == 0)
-            maybe_invert = true;
-        else
-            return;
+        bool need_invert = (constant_value == 0);
 
-        if (lhs_const)
-            child_function = rhs->as<FunctionNode>();
-        else
-            child_function = lhs->as<FunctionNode>();
+        const FunctionNode * child_function = is_lhs_const ? rhs->as<FunctionNode>() : lhs->as<FunctionNode>();
 
         if (!child_function || !isBooleanFunction(child_function->getFunctionName()))
             return;
 
-        if (removeLowCardinality(constant->getResultType())->isNullable())
-            need_rerun_resolve = true;
-
-        if (maybe_invert)
+        if (need_invert)
         {
             auto not_resolver = FunctionFactory::instance().get("not", getContext());
             const auto not_node = std::make_shared<FunctionNode>("not");
             auto & arguments = not_node->getArguments().getNodes();
             arguments.reserve(1);
-            arguments.push_back(lhs_const ? rhs : lhs);
+            arguments.push_back(is_lhs_const ? rhs : lhs);
             not_node->resolveAsFunction(not_resolver->build(not_node->getArgumentColumns()));
             node = not_node;
         }
         else
-            node = lhs_const ? rhs : lhs;
+            node = is_lhs_const ? rhs : lhs;
     }
 };
 
diff --git a/tests/queries/0_stateless/03032_redundant_equals.reference b/tests/queries/0_stateless/03032_redundant_equals.reference
index b154addf55fb..5b211a69007c 100644
--- a/tests/queries/0_stateless/03032_redundant_equals.reference
+++ b/tests/queries/0_stateless/03032_redundant_equals.reference
@@ -15,6 +15,9 @@
 100
 101
 100
+101
+101
+101
 100
 101
 100
diff --git a/tests/queries/0_stateless/03032_redundant_equals.sql b/tests/queries/0_stateless/03032_redundant_equals.sql
index bd2306c7575c..de85ec5cf002 100644
--- a/tests/queries/0_stateless/03032_redundant_equals.sql
+++ b/tests/queries/0_stateless/03032_redundant_equals.sql
@@ -25,6 +25,10 @@ SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1)))
 SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1;
 SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1;
 SELECT * FROM test_table WHERE ((k not in (99) = 1) AND (k in (100) = 1)) = 1;
+-- we skip optimizing queries with toNullable(0 or 1) but lets make sure they still work 
+SELECT * FROM test_table WHERE (k = 101) = toLowCardinality(toNullable(1));
+SELECT * FROM test_table WHERE (k = 101) = toNullable(1);
+SELECT * FROM test_table WHERE (k = 101) = toLowCardinality(1);
 SELECT * FROM test_table WHERE ((k not in (101) = toNullable(0)) OR (k in (100) = toNullable(1))) = toNullable(1);
 SELECT * FROM test_table WHERE (((k NOT IN toLowCardinality(toNullable(101))) = toLowCardinality(toNullable(0))) OR ((k IN (toLowCardinality(100))) = toNullable(1)));
 SELECT * FROM test_table WHERE (((k IN toLowCardinality(toNullable(101))) = toLowCardinality(toNullable(0))) AND ((k NOT IN (toLowCardinality(100))) = toNullable(1))) = toNullable(toLowCardinality(0));

From 671d2a8927a79b3038dc504524fbbe5cc3c39e9a Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 11 Apr 2024 14:33:25 +0000
Subject: [PATCH 64/90] Automatic style fix

---
 tests/ci/sync_pr.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py
index e8d982fac166..c58cc20d7af4 100644
--- a/tests/ci/sync_pr.py
+++ b/tests/ci/sync_pr.py
@@ -54,7 +54,9 @@ def merge_sync_pr(gh, sync_pr):
 
 def set_sync_status(gh, pr_info, sync_pr):
     if not sync_pr or sync_pr.mergeable:
-        post_commit_status(get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync")
+        post_commit_status(
+            get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync"
+        )
     else:
         post_commit_status(get_commit(gh, pr_info.sha), SUCCESS, "", "", "A Sync")
 

From 3e16309e991b6ac833a18eae1e7dd120e7c9f63b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 11 Apr 2024 15:25:52 +0000
Subject: [PATCH 65/90] Allow constant folding throught __getScalar

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 22 +++++++++++++++++--
 ..._limit_with_constant_expressions.reference |  3 +++
 .../00834_limit_with_constant_expressions.sql |  2 ++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index aedf860f5be4..44e0d2a33037 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -5624,17 +5624,35 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
                 function_name,
                 scope.scope_node->formatASTForErrorMessage());
 
+        bool argument_is_constant = false;
         const auto * constant_node = function_argument->as<ConstantNode>();
         if (constant_node)
         {
             argument_column.column = constant_node->getResultType()->createColumnConst(1, constant_node->getValue());
             argument_column.type = constant_node->getResultType();
+            argument_is_constant = true;
         }
-        else
+        else if(const auto * get_scalar_function_node = function_argument->as<FunctionNode>();
+                get_scalar_function_node && get_scalar_function_node->getFunctionName() == "__getScalar")
         {
-            all_arguments_constants = false;
+            /// Allow constant folding through getScalar
+            const auto * get_scalar_const_arg = get_scalar_function_node->getArguments().getNodes().at(0)->as<ConstantNode>();
+            if (get_scalar_const_arg && scope.context->hasQueryContext())
+            {
+                auto query_context = scope.context->getQueryContext();
+                auto scalar_string = toString(get_scalar_const_arg->getValue());
+                if (query_context->hasScalar(scalar_string))
+                {
+                    auto scalar = query_context->getScalar(scalar_string);
+                    argument_column.column = ColumnConst::create(scalar.getByPosition(0).column, 1);
+                    argument_column.type = get_scalar_function_node->getResultType();
+                    argument_is_constant = true;
+                }
+            }
         }
 
+        all_arguments_constants &= argument_is_constant;
+
         argument_types.push_back(argument_column.type);
         argument_columns.emplace_back(std::move(argument_column));
     }
diff --git a/tests/queries/0_stateless/00834_limit_with_constant_expressions.reference b/tests/queries/0_stateless/00834_limit_with_constant_expressions.reference
index 593bf010efd1..5d7483702424 100644
--- a/tests/queries/0_stateless/00834_limit_with_constant_expressions.reference
+++ b/tests/queries/0_stateless/00834_limit_with_constant_expressions.reference
@@ -19,3 +19,6 @@
 2
 3
 4
+0
+1
+2
diff --git a/tests/queries/0_stateless/00834_limit_with_constant_expressions.sql b/tests/queries/0_stateless/00834_limit_with_constant_expressions.sql
index 544866341309..47b403a37f98 100644
--- a/tests/queries/0_stateless/00834_limit_with_constant_expressions.sql
+++ b/tests/queries/0_stateless/00834_limit_with_constant_expressions.sql
@@ -24,3 +24,5 @@ SELECT * FROM numbers(10) LIMIT LENGTH('NNN') + COS(0), toDate('0000-00-02'); --
 SELECT * FROM numbers(10) LIMIT a + 5 - a; -- { serverError 47 }
 SELECT * FROM numbers(10) LIMIT a + b; -- { serverError 47 }
 SELECT * FROM numbers(10) LIMIT 'Hello'; -- { serverError 440 }
+
+SELECT number from numbers(10) order by number limit (select sum(number), count() from numbers(3)).1;

From e8f616e80bce36463db1de8504e35e286953b0b7 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 11 Apr 2024 15:55:25 +0000
Subject: [PATCH 66/90] CI: fix for sync check status in mq

---
 tests/ci/sync_pr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py
index c58cc20d7af4..acff7ba541bb 100644
--- a/tests/ci/sync_pr.py
+++ b/tests/ci/sync_pr.py
@@ -53,7 +53,7 @@ def merge_sync_pr(gh, sync_pr):
 
 
 def set_sync_status(gh, pr_info, sync_pr):
-    if not sync_pr or sync_pr.mergeable:
+    if not sync_pr or not sync_pr.mergeable:
         post_commit_status(
             get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync"
         )

From 523ee302cbef13f2ed3a290457f79be6fe1527a6 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 11 Apr 2024 16:37:54 +0000
Subject: [PATCH 67/90] more profile events

---
 src/Common/ProfileEvents.cpp                        |  2 ++
 src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp     |  4 ++++
 .../AzureBlobStorage/AzureObjectStorage.cpp         | 13 +++++++++++--
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 23eed53509ee..c00d10175869 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -439,12 +439,14 @@ The server successfully detected this situation and will download merged part fr
     M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \
     M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
     M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \
+    M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \
     \
     M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \
     M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \
     M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \
     M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \
     M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
+    M(DiskAzureGetProperties, "Number of Disk Azure blob storage API GetProperties calls.") \
     \
     M(ReadBufferFromAzureMicroseconds, "Time spent on reading from Azure.") \
     M(ReadBufferFromAzureInitMicroseconds, "Time spent initializing connection to Azure.") \
diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp
index c79c95e974ec..48b4ed23af05 100644
--- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp
+++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp
@@ -278,6 +278,10 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran
 
         try
         {
+            ProfileEvents::increment(ProfileEvents::AzureGetObject);
+            if (read_settings.for_object_storage)
+                ProfileEvents::increment(ProfileEvents::DiskAzureGetObject);
+
             Azure::Storage::Blobs::DownloadBlobOptions download_options;
             download_options.Range = {static_cast<int64_t>(range_begin), n};
             auto download_response = blob_client->Download(download_options);
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index fb3a35301c0b..ff4b481eefd8 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -28,19 +28,21 @@ namespace ProfileEvents
     extern const Event DiskAzureListObjects;
     extern const Event AzureDeleteObjects;
     extern const Event DiskAzureDeleteObjects;
+    extern const Event AzureGetProperties;
+    extern const Event DiskAzureGetProperties;
+    extern const Event AzureCopyObject;
+    extern const Event DiskAzureCopyObject;
 }
 
 namespace DB
 {
 
-
 namespace ErrorCodes
 {
     extern const int AZURE_BLOB_STORAGE_ERROR;
     extern const int UNSUPPORTED_METHOD;
 }
 
-
 namespace
 {
 
@@ -346,9 +348,13 @@ void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
 
 ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) const
 {
+    ProfileEvents::increment(ProfileEvents::AzureGetProperties);
+    ProfileEvents::increment(ProfileEvents::DiskAzureGetProperties);
+
     auto client_ptr = client.get();
     auto blob_client = client_ptr->GetBlobClient(path);
     auto properties = blob_client.GetProperties().Value;
+
     ObjectMetadata result;
     result.size_bytes = properties.BlobSize;
     if (!properties.Metadata.empty())
@@ -379,6 +385,9 @@ void AzureObjectStorage::copyObject( /// NOLINT
             copy_options.Metadata[key] = value;
     }
 
+    ProfileEvents::increment(ProfileEvents::AzureCopyObject);
+    ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
+
     dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options);
 }
 

From cbfc2b96f92d52680554efaed563226d44c82d67 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 11 Apr 2024 19:28:10 +0200
Subject: [PATCH 68/90] fix polygon something

---
 src/Common/threadPoolCallbackRunner.h     |  6 +++---
 src/Dictionaries/PolygonDictionaryUtils.h | 13 +++++++------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h
index ef22f9038d80..5beec6608019 100644
--- a/src/Common/threadPoolCallbackRunner.h
+++ b/src/Common/threadPoolCallbackRunner.h
@@ -73,10 +73,10 @@ std::future<Result> scheduleFromThreadPoolUnsafe(T && task, ThreadPool & pool, c
 /// NOTE It's still not completely safe.
 /// When creating a runner on stack, you MUST make sure that it's created (and destroyed) before local objects captured by task lambda.
 
-template <typename Result, typename Callback = std::function<Result()>>
+template <typename Result, typename PoolT = ThreadPool, typename Callback = std::function<Result()>>
 class ThreadPoolCallbackRunnerLocal
 {
-    ThreadPool & pool;
+    PoolT & pool;
     std::string thread_name;
 
     enum TaskState
@@ -106,7 +106,7 @@ class ThreadPoolCallbackRunnerLocal
     }
 
 public:
-    ThreadPoolCallbackRunnerLocal(ThreadPool & pool_, const std::string & thread_name_)
+    ThreadPoolCallbackRunnerLocal(PoolT & pool_, const std::string & thread_name_)
         : pool(pool_)
         , thread_name(thread_name_)
     {
diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h
index 0fd1fead456b..9fba467a3630 100644
--- a/src/Dictionaries/PolygonDictionaryUtils.h
+++ b/src/Dictionaries/PolygonDictionaryUtils.h
@@ -3,6 +3,7 @@
 #include <base/types.h>
 #include <Common/iota.h>
 #include <Common/ThreadPool.h>
+#include <Common/threadPoolCallbackRunner.h>
 #include <Poco/Logger.h>
 
 #include <boost/geometry.hpp>
@@ -250,10 +251,11 @@ class GridRoot : public ICell<ReturnCell>
         auto y_shift = (current_max_y - current_min_y) / DividedCell<ReturnCell>::kSplit;
         std::vector<std::unique_ptr<ICell<ReturnCell>>> children;
         children.resize(DividedCell<ReturnCell>::kSplit * DividedCell<ReturnCell>::kSplit);
-        std::vector<ThreadFromGlobalPool> threads{};
+
+        ThreadPoolCallbackRunnerLocal<void, GlobalThreadPool> runner(GlobalThreadPool::instance(), "PolygonDict");
         for (size_t i = 0; i < DividedCell<ReturnCell>::kSplit; current_min_x += x_shift, ++i)
         {
-            auto handle_row = [this, &children, &y_shift, &x_shift, &possible_ids, &depth, i](Coord x, Coord y)
+            auto handle_row = [this, &children, &y_shift, &x_shift, &possible_ids, &depth, i, x = current_min_x, y = current_min_y]() mutable
             {
                 for (size_t j = 0; j < DividedCell<ReturnCell>::kSplit; y += y_shift, ++j)
                 {
@@ -261,12 +263,11 @@ class GridRoot : public ICell<ReturnCell>
                 }
             };
             if (depth <= kMultiProcessingDepth)
-                threads.emplace_back(handle_row, current_min_x, current_min_y);
+                runner(std::move(handle_row));
             else
-                handle_row(current_min_x, current_min_y);
+                handle_row();
         }
-        for (auto & thread : threads)
-            thread.join();
+        runner.waitForAllToFinishAndRethrowFirstError();
         return std::make_unique<DividedCell<ReturnCell>>(std::move(children));
     }
 

From 61d4d9a46edadaf88faaa0bb5535f54fc8af51d5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 11 Apr 2024 19:39:44 +0200
Subject: [PATCH 69/90] Update QueryAnalysisPass.cpp

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 44e0d2a33037..b8fa9277d5c3 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -5632,7 +5632,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
             argument_column.type = constant_node->getResultType();
             argument_is_constant = true;
         }
-        else if(const auto * get_scalar_function_node = function_argument->as<FunctionNode>();
+        else if (const auto * get_scalar_function_node = function_argument->as<FunctionNode>();
                 get_scalar_function_node && get_scalar_function_node->getFunctionName() == "__getScalar")
         {
             /// Allow constant folding through getScalar

From b90eb1962f78019322dcf9f59f7a29a916d24b2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 11 Apr 2024 19:56:30 +0200
Subject: [PATCH 70/90] Remove mentions of clean_deleted_rows from the
 documentation

---
 .../mergetree-family/replacingmergetree.md             |  4 ++--
 docs/en/operations/settings/merge-tree-settings.md     | 10 ----------
 .../mergetree-family/replacingmergetree.md             |  3 +--
 3 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
index 6de818c130f0..a6258bcd5817 100644
--- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md
@@ -25,7 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 [ORDER BY expr]
 [PRIMARY KEY expr]
 [SAMPLE BY expr]
-[SETTINGS name=value, clean_deleted_rows=value, ...]
+[SETTINGS name=value, ...]
 ```
 
 For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md).
@@ -97,7 +97,7 @@ SELECT * FROM mySecondReplacingMT FINAL;
 :::note
 `is_deleted` can only be enabled when `ver` is used.
 
-The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used, or if the engine setting `clean_deleted_rows` has been set to `Always`.
+The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used.
 
 No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept.
 
diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index 9327d52227f9..76250b804765 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -852,16 +852,6 @@ If the file name for column is too long (more than `max_file_name_length` bytes)
 
 The maximal length of the file name to keep it as is without hashing. Takes effect only if setting `replace_long_file_name_to_hash` is enabled. The value of this setting does not include the length of file extension. So, it is recommended to set it below the maximum filename length (usually 255 bytes) with some gap to avoid filesystem errors. Default value: 127.
 
-## clean_deleted_rows
-
-Enable/disable automatic deletion of rows flagged as `is_deleted` when perform `OPTIMIZE ... FINAL` on a table using the ReplacingMergeTree engine. When disabled, the `CLEANUP` keyword has to be added to the `OPTIMIZE ... FINAL` to have the same behaviour.
-
-Possible values:
-
-- `Always` or `Never`.
-
-Default value: `Never`
-
 ## allow_experimental_block_number_column
 
 Persists virtual column `_block_number` on merges.
diff --git a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md
index e8089b2c42b7..a6493f20b6ee 100644
--- a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md
@@ -99,10 +99,9 @@ SELECT * FROM mySecondReplacingMT FINAL;
 
     - при использовании инструкции `OPTIMIZE ... FINAL CLEANUP`
     - при использовании инструкции `OPTIMIZE ... FINAL`
-    - параметр движка `clean_deleted_rows` установлен в значение `Always` (по умолчанию - `Never`)
     - есть новые версии строки
 
-Не рекомендуется выполнять `FINAL CLEANUP` или использовать параметр движка `clean_deleted_rows` со значением `Always`, это может привести к неожиданным результатам, например удаленные строки могут вновь появиться.
+Не рекомендуется выполнять `FINAL CLEANUP`, это может привести к неожиданным результатам, например удаленные строки могут вновь появиться.
 
 Вне зависимости от производимых изменений над данными, версия должна увеличиваться. Если у двух строк одна и та же версия, то остается только последняя вставленная строка.
 :::

From 20e8b64cee607211078800a2306b82424cac6082 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 11 Apr 2024 19:47:42 +0200
Subject: [PATCH 71/90] fix

---
 src/Client/Connection.cpp      | 2 +-
 src/Client/Connection.h        | 2 +-
 src/Client/IServerConnection.h | 2 +-
 src/Client/LocalConnection.h   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 4e2456134793..483201509c4d 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -475,7 +475,7 @@ const String & Connection::getDefaultDatabase() const
     return default_database;
 }
 
-const String & Connection::getDescription(bool with_extra) const
+const String & Connection::getDescription(bool with_extra) const /// NOLINT
 {
     if (with_extra)
         return full_description;
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index 71513ea919d6..9632eb9d948c 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -88,7 +88,7 @@ class Connection : public IServerConnection
     const String & getServerDisplayName(const ConnectionTimeouts & timeouts) override;
 
     /// For log and exception messages.
-    const String & getDescription(bool with_extra = false) const override;
+    const String & getDescription(bool with_extra = false) const override; /// NOLINT
     const String & getHost() const;
     UInt16 getPort() const;
     const String & getDefaultDatabase() const;
diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h
index 724afa95d7a6..e7376491f8ce 100644
--- a/src/Client/IServerConnection.h
+++ b/src/Client/IServerConnection.h
@@ -88,7 +88,7 @@ class IServerConnection : boost::noncopyable
     virtual const String & getServerTimezone(const ConnectionTimeouts & timeouts) = 0;
     virtual const String & getServerDisplayName(const ConnectionTimeouts & timeouts) = 0;
 
-    virtual const String & getDescription(bool with_extra = false) const = 0;
+    virtual const String & getDescription(bool with_extra = false) const = 0;  /// NOLINT
 
     virtual std::vector<std::pair<String, String>> getPasswordComplexityRules() const = 0;
 
diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h
index 6218fbe341f4..ac5a05c3e92d 100644
--- a/src/Client/LocalConnection.h
+++ b/src/Client/LocalConnection.h
@@ -90,7 +90,7 @@ class LocalConnection : public IServerConnection, WithContext
     const String & getServerTimezone(const ConnectionTimeouts & timeouts) override;
     const String & getServerDisplayName(const ConnectionTimeouts & timeouts) override;
 
-    const String & getDescription([[maybe_unused]] bool with_extra = false) const override { return description; }
+    const String & getDescription([[maybe_unused]] bool with_extra = false) const override { return description; }  /// NOLINT
 
     std::vector<std::pair<String, String>> getPasswordComplexityRules() const override { return {}; }
 

From 83d6f2ef99efb9857bbff5163f4f76b4c586b7e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 11 Apr 2024 21:51:05 +0200
Subject: [PATCH 72/90] Correct the documentation about duplicates with argmin
 and argmax

---
 docs/en/sql-reference/aggregate-functions/reference/argmax.md | 2 +-
 docs/en/sql-reference/aggregate-functions/reference/argmin.md | 2 +-
 docs/ru/sql-reference/aggregate-functions/reference/argmax.md | 2 +-
 docs/ru/sql-reference/aggregate-functions/reference/argmin.md | 2 +-
 docs/zh/sql-reference/aggregate-functions/reference/argmax.md | 2 +-
 docs/zh/sql-reference/aggregate-functions/reference/argmin.md | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
index 8f10318838bb..2274dd4a5dcf 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md
@@ -5,7 +5,7 @@ sidebar_position: 106
 
 # argMax
 
-Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered.
+Calculates the `arg` value for a maximum `val` value. If there are multiple rows with equal `val` being the maximum, which of the associated `arg` is returned is not deterministic.
 Both parts the `arg` and the `max` behave as [aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md), they both [skip `Null`](/docs/en/sql-reference/aggregate-functions/index.md#null-processing) during processing and return not `Null` values if not `Null` values are available.
 
 **Syntax**
diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
index 616fa3d33b83..297744fb1dbb 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md
@@ -5,7 +5,7 @@ sidebar_position: 105
 
 # argMin
 
-Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered.
+Calculates the `arg` value for a minimum `val` value. If there are multiple rows with equal `val` being the maximum, which of the associated `arg` is returned is not deterministic.
 Both parts the `arg` and the `min` behave as [aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md), they both [skip `Null`](/docs/en/sql-reference/aggregate-functions/index.md#null-processing) during processing and return not `Null` values if not `Null` values are available.
 
 **Syntax**
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
index f459683c7c92..0489606b3c15 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md
@@ -5,7 +5,7 @@ sidebar_position: 106
 
 # argMax {#agg-function-argmax}
 
-Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений.
+Вычисляет значение `arg` при максимальном значении `val`.
 
 **Синтаксис**
 
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
index 4d8bc47c42d9..d74661cf43c8 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md
@@ -5,7 +5,7 @@ sidebar_position: 105
 
 # argMin {#agg-function-argmin}
 
-Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений.
+Вычисляет значение `arg` при минимальном значении `val`.
 
 **Синтаксис**
 
diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md
index 6ae7155ca245..d1d7930867f6 100644
--- a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md
+++ b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md
@@ -5,7 +5,7 @@ sidebar_position: 106
 
 # argMax {#agg-function-argmax}
 
-计算 `val` 最大值对应的 `arg` 值。 如果 `val` 最大值存在几个不同的 `arg` 值，输出遇到的第一个值。
+计算 `val` 最大值对应的 `arg` 值。
 
 **语法**
 
diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md
index cb21a13021bb..fb66075f2b02 100644
--- a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md
+++ b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md
@@ -7,7 +7,7 @@ sidebar_position: 105
 
 语法: `argMin(arg, val)` 或 `argMin(tuple(arg, val))`
 
-计算 `val` 最小值对应的 `arg` 值。 如果 `val` 最小值存在几个不同的 `arg` 值，输出遇到的第一个(`arg`)值。
+计算 `val` 最小值对应的 `arg` 值。
 
 **示例:**
 

From 8f4f3aaf14d0838cdcb566d2c88f003981987264 Mon Sep 17 00:00:00 2001
From: Joshua Hildred <jthildred@gmail.com>
Date: Thu, 11 Apr 2024 12:56:50 -0700
Subject: [PATCH 73/90] Small code cleanup

---
 src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
index 63f8c4786cec..eff52f8e6fd0 100644
--- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
+++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp
@@ -603,14 +603,12 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont
         else
             return;
 
-        bool need_invert = (constant_value == 0);
-
         const FunctionNode * child_function = is_lhs_const ? rhs->as<FunctionNode>() : lhs->as<FunctionNode>();
-
         if (!child_function || !isBooleanFunction(child_function->getFunctionName()))
             return;
 
-        if (need_invert)
+        // if we have something like `function = 0`, we need to add a `NOT` when dropping the `= 0`
+        if (constant_value == 0)
         {
             auto not_resolver = FunctionFactory::instance().get("not", getContext());
             const auto not_node = std::make_shared<FunctionNode>("not");

From 3ce02239c6392822250894f9cca1bf7c434a9242 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nickita.taranov@gmail.com>
Date: Thu, 11 Apr 2024 23:12:11 +0200
Subject: [PATCH 74/90] Revert "More optimal loading of marks"

---
 src/Storages/MergeTree/MarkRange.cpp          |  5 --
 src/Storages/MergeTree/MarkRange.h            |  1 -
 .../MergeTree/MergeTreeIndexReader.cpp        |  3 -
 .../MergeTree/MergeTreeMarksLoader.cpp        |  8 ---
 src/Storages/MergeTree/MergeTreeMarksLoader.h |  1 -
 .../MergeTree/MergeTreeReaderCompact.cpp      |  1 -
 .../MergeTree/MergeTreeReaderStream.cpp       | 72 +++++--------------
 .../MergeTree/MergeTreeReaderStream.h         | 38 +++-------
 .../MergeTree/MergeTreeReaderWide.cpp         | 32 +++------
 src/Storages/MergeTree/MergeTreeReaderWide.h  |  1 -
 .../MergeTree/MergeTreeSequentialSource.cpp   |  9 ++-
 .../test_merge_tree_load_marks/__init__.py    |  0
 .../configs/config.xml                        | 12 ----
 .../test_merge_tree_load_marks/test.py        | 62 ----------------
 .../02532_send_logs_level_test.reference      |  1 -
 .../0_stateless/02532_send_logs_level_test.sh |  2 +-
 16 files changed, 41 insertions(+), 207 deletions(-)
 delete mode 100644 tests/integration/test_merge_tree_load_marks/__init__.py
 delete mode 100644 tests/integration/test_merge_tree_load_marks/configs/config.xml
 delete mode 100644 tests/integration/test_merge_tree_load_marks/test.py

diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp
index c6e98b4e5a15..bd8546f04cc7 100644
--- a/src/Storages/MergeTree/MarkRange.cpp
+++ b/src/Storages/MergeTree/MarkRange.cpp
@@ -81,11 +81,6 @@ size_t MarkRanges::getNumberOfMarks() const
     return result;
 }
 
-bool MarkRanges::isOneRangeForWholePart(size_t num_marks_in_part) const
-{
-    return size() == 1 && front().begin == 0 && front().end == num_marks_in_part;
-}
-
 void MarkRanges::serialize(WriteBuffer & out) const
 {
     writeBinaryLittleEndian(this->size(), out);
diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h
index f36d5d898254..1d9d0a1e27e8 100644
--- a/src/Storages/MergeTree/MarkRange.h
+++ b/src/Storages/MergeTree/MarkRange.h
@@ -36,7 +36,6 @@ struct MarkRanges : public std::deque<MarkRange>
     using std::deque<MarkRange>::deque; /// NOLINT(modernize-type-traits)
 
     size_t getNumberOfMarks() const;
-    bool isOneRangeForWholePart(size_t num_marks_in_part) const;
 
     void serialize(WriteBuffer & out) const;
     String describe() const;
diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
index e7ae1fc5c13d..6012994b46d7 100644
--- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
@@ -31,8 +31,6 @@ std::unique_ptr<MergeTreeReaderStream> makeIndexReader(
         load_marks_threadpool,
         /*num_columns_in_mark=*/ 1);
 
-    marks_loader->startAsyncLoad();
-
     return std::make_unique<MergeTreeReaderStreamSingleColumn>(
         part->getDataPartStoragePtr(),
         index->getFileName(), extension, marks_count,
@@ -67,7 +65,6 @@ MergeTreeIndexReader::MergeTreeIndexReader(
         mark_cache,
         uncompressed_cache,
         std::move(settings));
-
     version = index_format.version;
 
     stream->adjustRightMark(getLastMark(all_mark_ranges_));
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
index 1e9a320fa953..01ceb845951d 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
@@ -64,10 +64,6 @@ MergeTreeMarksLoader::MergeTreeMarksLoader(
     , read_settings(read_settings_)
     , num_columns_in_mark(num_columns_in_mark_)
     , load_marks_threadpool(load_marks_threadpool_)
-{
-}
-
-void MergeTreeMarksLoader::startAsyncLoad()
 {
     if (load_marks_threadpool)
         future = loadMarksAsync();
@@ -106,8 +102,6 @@ MergeTreeMarksGetterPtr MergeTreeMarksLoader::loadMarks()
 
 MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl()
 {
-    LOG_TEST(getLogger("MergeTreeMarksLoader"), "Loading marks from path {}", mrk_path);
-
     /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.
     MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
 
@@ -224,9 +218,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync()
         }
     }
     else
-    {
         loaded_marks = loadMarksImpl();
-    }
 
     if (!loaded_marks)
     {
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.h b/src/Storages/MergeTree/MergeTreeMarksLoader.h
index 2aa4474e1c5d..73dd462f2fa6 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.h
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.h
@@ -50,7 +50,6 @@ class MergeTreeMarksLoader
 
     ~MergeTreeMarksLoader();
 
-    void startAsyncLoad();
     MergeTreeMarksGetterPtr loadMarks();
     size_t getNumColumns() const { return num_columns_in_mark; }
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 8810491b62ec..a22bff6b8d22 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -48,7 +48,6 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
     , profile_callback(profile_callback_)
     , clock_type(clock_type_)
 {
-    marks_loader->startAsyncLoad();
 }
 
 void MergeTreeReaderCompact::fillColumnPositions()
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
index 15ef02440cb3..40a16176c69f 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
@@ -13,7 +13,6 @@ namespace ErrorCodes
 {
     extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int CANNOT_READ_ALL_DATA;
-    extern const int LOGICAL_ERROR;
 }
 
 MergeTreeReaderStream::MergeTreeReaderStream(
@@ -42,17 +41,14 @@ MergeTreeReaderStream::MergeTreeReaderStream(
 {
 }
 
-void MergeTreeReaderStream::loadMarks()
-{
-    if (!marks_getter)
-        marks_getter = marks_loader->loadMarks();
-}
-
 void MergeTreeReaderStream::init()
 {
     if (initialized)
         return;
 
+    initialized = true;
+    marks_getter = marks_loader->loadMarks();
+
     /// Compute the size of the buffer.
     auto [max_mark_range_bytes, sum_mark_range_bytes] = estimateMarkRangeBytes(all_mark_ranges);
 
@@ -114,15 +110,11 @@ void MergeTreeReaderStream::init()
         data_buffer = non_cached_buffer.get();
         compressed_data_buffer = non_cached_buffer.get();
     }
-
-    initialized = true;
 }
 
 void MergeTreeReaderStream::seekToMarkAndColumn(size_t row_index, size_t column_position)
 {
     init();
-    loadMarks();
-
     const auto & mark = marks_getter->getMark(row_index, column_position);
 
     try
@@ -201,7 +193,7 @@ CompressedReadBufferBase * MergeTreeReaderStream::getCompressedDataBuffer()
     return compressed_data_buffer;
 }
 
-size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark)
+size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) const
 {
     /// NOTE: if we are reading the whole file, then right_mark == marks_count
     /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
@@ -210,8 +202,7 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark)
     if (marks_count == 0)
         return 0;
 
-    chassert(right_mark <= marks_count);
-    loadMarks();
+    assert(right_mark <= marks_count);
 
     if (right_mark == 0)
         return marks_getter->getMark(right_mark, 0).offset_in_compressed_file;
@@ -290,9 +281,9 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark)
     return file_size;
 }
 
-std::pair<size_t, size_t> MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges)
+std::pair<size_t, size_t> MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const
 {
-    loadMarks();
+    assert(marks_getter != nullptr);
 
     size_t max_range_bytes = 0;
     size_t sum_range_bytes = 0;
@@ -311,34 +302,7 @@ std::pair<size_t, size_t> MergeTreeReaderStreamSingleColumn::estimateMarkRangeBy
     return {max_range_bytes, sum_range_bytes};
 }
 
-size_t MergeTreeReaderStreamSingleColumnWholePart::getRightOffset(size_t right_mark)
-{
-    if (right_mark != marks_count)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "Expected one right mark: {}, got: {}",
-            marks_count, right_mark);
-    }
-    return file_size;
-}
-
-std::pair<size_t, size_t> MergeTreeReaderStreamSingleColumnWholePart::estimateMarkRangeBytes(const MarkRanges & mark_ranges)
-{
-    if (!mark_ranges.isOneRangeForWholePart(marks_count))
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-            "Expected one mark range that covers the whole part, got: {}",
-            mark_ranges.describe());
-    }
-    return {file_size, file_size};
-}
-
-void MergeTreeReaderStreamSingleColumnWholePart::seekToMark(size_t)
-{
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeReaderStreamSingleColumnWholePart cannot seek to marks");
-}
-
-size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position)
+size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const
 {
     /// NOTE: if we are reading the whole file, then right_mark == marks_count
     /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
@@ -347,8 +311,7 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ
     if (marks_count == 0)
         return 0;
 
-    chassert(right_mark_non_included <= marks_count);
-    loadMarks();
+    assert(right_mark_non_included <= marks_count);
 
     if (right_mark_non_included == 0)
         return marks_getter->getMark(right_mark_non_included, column_position).offset_in_compressed_file;
@@ -384,9 +347,9 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ
 }
 
 std::pair<size_t, size_t>
-MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position)
+MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const
 {
-    loadMarks();
+    assert(marks_getter != nullptr);
 
     /// As a maximal range we return the maximal size of a whole stripe.
     size_t max_range_bytes = 0;
@@ -423,9 +386,8 @@ MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const Mark
     return {max_range_bytes, sum_range_bytes};
 }
 
-MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position)
+MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position) const
 {
-    loadMarks();
     const auto & current_mark = marks_getter->getMark(row_index, column_position);
 
     if (marks_getter->getNumColumns() == 1)
@@ -472,27 +434,27 @@ MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeM
     return marks_getter->getMark(mark_index + 1, column_position + 1);
 }
 
-size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included)
+size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const
 {
     return getRightOffsetOneColumn(right_mark_non_included, column_position);
 }
 
-std::pair<size_t, size_t> MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges)
+std::pair<size_t, size_t> MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const
 {
     return estimateMarkRangeBytesOneColumn(mark_ranges, column_position);
 }
 
-size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included)
+size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const
 {
     return getRightOffsetOneColumn(right_mark_non_included, marks_loader->getNumColumns() - 1);
 }
 
-std::pair<size_t, size_t> MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges)
+std::pair<size_t, size_t> MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const
 {
     size_t max_range_bytes = 0;
     size_t sum_range_bytes = 0;
 
-    for (size_t i = 0; i < marks_loader->getNumColumns(); ++i)
+    for (size_t i = 0; i < marks_getter->getNumColumns(); ++i)
     {
         auto [current_max, current_sum] = estimateMarkRangeBytesOneColumn(mark_ranges, i);
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h
index 05341cd8acc4..f3ca6953ceb9 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.h
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.h
@@ -40,7 +40,6 @@ class MergeTreeReaderStream
     /// Seeks to exact mark in file.
     void seekToMarkAndColumn(size_t row_index, size_t column_position);
 
-    /// Seeks to the start of the file.
     void seekToStart();
 
     /**
@@ -54,11 +53,11 @@ class MergeTreeReaderStream
 
 private:
     /// Returns offset in file up to which it's needed to read file to read all rows up to @right_mark mark.
-    virtual size_t getRightOffset(size_t right_mark) = 0;
+    virtual size_t getRightOffset(size_t right_mark) const = 0;
 
     /// Returns estimated max amount of bytes to read among mark ranges (which is used as size for read buffer)
     /// and total amount of bytes to read in all mark ranges.
-    virtual std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) = 0;
+    virtual std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) const = 0;
 
     const ReadBufferFromFileBase::ProfileCallback profile_callback;
     const clockid_t clock_type;
@@ -81,7 +80,6 @@ class MergeTreeReaderStream
 
 protected:
     void init();
-    void loadMarks();
 
     const MergeTreeReaderSettings settings;
     const size_t marks_count;
@@ -102,25 +100,11 @@ class MergeTreeReaderStreamSingleColumn : public MergeTreeReaderStream
     {
     }
 
-    size_t getRightOffset(size_t right_mark_non_included) override;
-    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) override;
+    size_t getRightOffset(size_t right_mark_non_included) const override;
+    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override;
     void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); }
 };
 
-class MergeTreeReaderStreamSingleColumnWholePart : public MergeTreeReaderStream
-{
-public:
-    template <typename... Args>
-    explicit MergeTreeReaderStreamSingleColumnWholePart(Args &&... args)
-        : MergeTreeReaderStream{std::forward<Args>(args)...}
-    {
-    }
-
-    size_t getRightOffset(size_t right_mark_non_included) override;
-    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) override;
-    void seekToMark(size_t row_index) override;
-};
-
 /// Base class for reading from file that contains multiple columns.
 /// It is used to read from compact parts.
 /// See more details about data layout in MergeTreeDataPartCompact.h.
@@ -134,9 +118,9 @@ class MergeTreeReaderStreamMultipleColumns : public MergeTreeReaderStream
     }
 
 protected:
-    size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position);
-    std::pair<size_t, size_t> estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position);
-    MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position);
+    size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const;
+    std::pair<size_t, size_t> estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const;
+    MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position) const;
 };
 
 /// Class for reading a single column from file that contains multiple columns
@@ -151,8 +135,8 @@ class MergeTreeReaderStreamOneOfMultipleColumns : public MergeTreeReaderStreamMu
     {
     }
 
-    size_t getRightOffset(size_t right_mark_non_included) override;
-    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) override;
+    size_t getRightOffset(size_t right_mark_non_included) const override;
+    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override;
     void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, column_position); }
 
 private:
@@ -170,8 +154,8 @@ class MergeTreeReaderStreamAllOfMultipleColumns : public MergeTreeReaderStreamMu
     {
     }
 
-    size_t getRightOffset(size_t right_mark_non_included) override;
-    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) override;
+    size_t getRightOffset(size_t right_mark_non_included) const override;
+    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override;
     void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); }
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index d398668d5c83..394a22835f1f 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -43,7 +43,6 @@ MergeTreeReaderWide::MergeTreeReaderWide(
         mark_ranges_,
         settings_,
         avg_value_size_hints_)
-    , read_whole_part(all_mark_ranges.isOneRangeForWholePart(data_part_info_for_read->getMarksCount()))
 {
     try
     {
@@ -228,13 +227,12 @@ void MergeTreeReaderWide::addStreams(
 
         auto context = data_part_info_for_read->getContext();
         auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr;
-        size_t num_marks_in_part = data_part_info_for_read->getMarksCount();
 
         auto marks_loader = std::make_shared<MergeTreeMarksLoader>(
             data_part_info_for_read,
             mark_cache,
             data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name),
-            num_marks_in_part,
+            data_part_info_for_read->getMarksCount(),
             data_part_info_for_read->getIndexGranularityInfo(),
             settings.save_marks_in_cache,
             settings.read_settings,
@@ -245,24 +243,11 @@ void MergeTreeReaderWide::addStreams(
         auto stream_settings = settings;
         stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys;
 
-        auto create_stream = [&]<typename Stream>()
-        {
-            return std::make_unique<Stream>(
-                data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION,
-                num_marks_in_part, all_mark_ranges, stream_settings,
-                uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION),
-                std::move(marks_loader), profile_callback, clock_type);
-        };
-
-        if (read_whole_part)
-        {
-            streams.emplace(*stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumnWholePart>());
-        }
-        else
-        {
-            marks_loader->startAsyncLoad();
-            streams.emplace(*stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumn>());
-        }
+        streams.emplace(*stream_name, std::make_unique<MergeTreeReaderStreamSingleColumn>(
+            data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION,
+            data_part_info_for_read->getMarksCount(), all_mark_ranges, stream_settings,
+            uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION),
+            std::move(marks_loader), profile_callback, clock_type));
     };
 
     serialization->enumerateStreams(callback);
@@ -340,8 +325,7 @@ void MergeTreeReaderWide::prefetchForColumn(
 
         if (stream_name && !prefetched_streams.contains(*stream_name))
         {
-            bool seek_to_mark = !continue_reading && !read_whole_part;
-
+            bool seek_to_mark = !continue_reading;
             if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache))
             {
                 buf->prefetch(priority);
@@ -365,7 +349,7 @@ void MergeTreeReaderWide::readData(
 
     deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path)
     {
-        bool seek_to_mark = !was_prefetched && !continue_reading && !read_whole_part;
+        bool seek_to_mark = !was_prefetched && !continue_reading;
 
         return getStream(
             /* seek_to_start = */false, substream_path,
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h
index 7ffe565d2623..a9a5526dd65b 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.h
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.h
@@ -73,7 +73,6 @@ class MergeTreeReaderWide : public IMergeTreeReader
     std::unordered_map<String, ISerialization::SubstreamsCache> caches;
     std::unordered_set<std::string> prefetched_streams;
     ssize_t prefetched_from_mark = -1;
-    bool read_whole_part = false;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 47661a3ff93f..c022cfe38617 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -184,12 +184,12 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
         storage_snapshot,
         *mark_ranges,
         /*virtual_fields=*/ {},
-        /*uncompressed_cache=*/ {},
+        /*uncompressed_cache=*/{},
         mark_cache.get(),
         alter_conversions,
         reader_settings,
-        /*avg_value_size_hints=*/ {},
-        /*profile_callback=*/ {});
+        {},
+        {});
 }
 
 static void fillBlockNumberColumns(
@@ -230,7 +230,6 @@ try
     const auto & header = getPort().getHeader();
     /// Part level is useful for next step for merging non-merge tree table
     bool add_part_level = storage.merging_params.mode != MergeTreeData::MergingParams::Ordinary;
-    size_t num_marks_in_part = data_part->getMarksCount();
 
     if (!isCancelled() && current_row < data_part->rows_count)
     {
@@ -239,7 +238,7 @@ try
 
         const auto & sample = reader->getColumns();
         Columns columns(sample.size());
-        size_t rows_read = reader->readRows(current_mark, num_marks_in_part, continue_reading, rows_to_read, columns);
+        size_t rows_read = reader->readRows(current_mark, data_part->getMarksCount(), continue_reading, rows_to_read, columns);
 
         if (rows_read)
         {
diff --git a/tests/integration/test_merge_tree_load_marks/__init__.py b/tests/integration/test_merge_tree_load_marks/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tests/integration/test_merge_tree_load_marks/configs/config.xml b/tests/integration/test_merge_tree_load_marks/configs/config.xml
deleted file mode 100644
index 1c9ee8d698f1..000000000000
--- a/tests/integration/test_merge_tree_load_marks/configs/config.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-<clickhouse>
-    <text_log>
-        <database>system</database>
-        <table>text_log</table>
-        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
-        <max_size_rows>1048576</max_size_rows>
-        <reserved_size_rows>8192</reserved_size_rows>
-        <buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
-        <flush_on_crash>false</flush_on_crash>
-        <level>test</level>
-    </text_log>
-</clickhouse>
diff --git a/tests/integration/test_merge_tree_load_marks/test.py b/tests/integration/test_merge_tree_load_marks/test.py
deleted file mode 100644
index b066b2a6ec0d..000000000000
--- a/tests/integration/test_merge_tree_load_marks/test.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-
-node = cluster.add_instance(
-    "node",
-    main_configs=["configs/config.xml"],
-)
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
-    try:
-        cluster.start()
-        yield cluster
-    finally:
-        cluster.shutdown()
-
-
-# This test is bad and it should be a functional test but S3 metrics
-# are accounted  incorrectly for merges in part_log and query_log.
-# Also we have text_log with level 'trace' in functional tests
-# but this test requeires text_log with level 'test'.
-
-
-@pytest.mark.parametrize("min_bytes_for_wide_part", [0, 1000000000])
-def test_merge_load_marks(started_cluster, min_bytes_for_wide_part):
-    node.query(
-        f"""
-        DROP TABLE IF EXISTS t_load_marks;
-
-        CREATE TABLE t_load_marks (a UInt64, b UInt64)
-        ENGINE = MergeTree ORDER BY a
-        SETTINGS min_bytes_for_wide_part = {min_bytes_for_wide_part};
-
-        INSERT INTO t_load_marks SELECT number, number FROM numbers(1000);
-        INSERT INTO t_load_marks SELECT number, number FROM numbers(1000);
-
-        OPTIMIZE TABLE t_load_marks FINAL;
-        SYSTEM FLUSH LOGS;
-    """
-    )
-
-    uuid = node.query(
-        "SELECT uuid FROM system.tables WHERE table = 't_prewarm_merge'"
-    ).strip()
-
-    result = node.query(
-        f"""
-        SELECT count()
-        FROM system.text_log
-        WHERE (query_id LIKE '%{uuid}::all_1_2_1%') AND (message LIKE '%Loading marks%')
-    """
-    ).strip()
-
-    result = int(result)
-
-    is_wide = min_bytes_for_wide_part == 0
-    not_loaded = result == 0
-
-    assert is_wide == not_loaded
diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.reference b/tests/queries/0_stateless/02532_send_logs_level_test.reference
index 7e51b888d9c7..dbd49cfc0a41 100644
--- a/tests/queries/0_stateless/02532_send_logs_level_test.reference
+++ b/tests/queries/0_stateless/02532_send_logs_level_test.reference
@@ -1,3 +1,2 @@
-<Test> MergeTreeMarksLoader: Loading marks from path data.cmrk3
 <Test> MergeTreeRangeReader: First reader returned: num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]:  Int32(size = 1), requested columns: key
 <Test> MergeTreeRangeReader: read() returned num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]:  Int32(size = 1), sample block key
diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh
index 4afc6d4496bd..f65d8705569b 100755
--- a/tests/queries/0_stateless/02532_send_logs_level_test.sh
+++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 $CLICKHOUSE_CLIENT -nm -q "
     drop table if exists data;
-    create table data (key Int) engine=MergeTree order by tuple() settings min_bytes_for_wide_part = '1G', compress_marks = 1;
+    create table data (key Int) engine=MergeTree order by tuple();
     insert into data values (1);
 "
 

From 361d73f7a4baa2df437c524964483a886465f140 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 12 Apr 2024 00:07:53 +0200
Subject: [PATCH 75/90] Try to fix Bugfix validation job

---
 docker/test/stateless/run.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 271f30d187b5..898d82655004 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -16,8 +16,9 @@ ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
 
 dpkg -i package_folder/clickhouse-common-static_*.deb
 dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
-dpkg -i package_folder/clickhouse-odbc-bridge_*.deb
-dpkg -i package_folder/clickhouse-library-bridge_*.deb
+# Accept failure in the next 2 commands until 23.4 is released (for compatibility and Bugfix validation run)
+dpkg -i package_folder/clickhouse-odbc-bridge_*.deb || true
+dpkg -i package_folder/clickhouse-library-bridge_*.deb || true
 dpkg -i package_folder/clickhouse-server_*.deb
 dpkg -i package_folder/clickhouse-client_*.deb
 

From 9664fb92499f3f4dfc8591baa98b70c007191825 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 12 Apr 2024 00:44:26 +0200
Subject: [PATCH 76/90] Highlight only when necessary

---
 src/Client/ClientBaseHelpers.cpp | 1 +
 src/Parsers/IParser.cpp          | 3 +++
 src/Parsers/IParser.h            | 1 +
 3 files changed, 5 insertions(+)

diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp
index b1d29b34ffc6..8310aa67c226 100644
--- a/src/Client/ClientBaseHelpers.cpp
+++ b/src/Client/ClientBaseHelpers.cpp
@@ -128,6 +128,7 @@ void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors
     Tokens tokens(begin, end, 1000, true);
     IParser::Pos token_iterator(tokens, static_cast<uint32_t>(1000), static_cast<uint32_t>(10000));
     Expected expected;
+    expected.enable_highlighting = true;
 
     /// We don't do highlighting for foreign dialects, such as PRQL and Kusto.
     /// Only normal ClickHouse SQL queries are highlighted.
diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp
index eb4ddfa01d24..ddd210b01ece 100644
--- a/src/Parsers/IParser.cpp
+++ b/src/Parsers/IParser.cpp
@@ -44,6 +44,9 @@ static bool intersects(T a_begin, T a_end, T b_begin, T b_end)
 
 void Expected::highlight(HighlightedRange range)
 {
+    if (!enable_highlighting)
+        return;
+
     auto it = highlights.lower_bound(range);
     while (it != highlights.end() && range.begin < it->end)
     {
diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h
index f8146c0a4f6d..0ae862fee756 100644
--- a/src/Parsers/IParser.h
+++ b/src/Parsers/IParser.h
@@ -56,6 +56,7 @@ struct Expected
     absl::InlinedVector<const char *, 7> variants;
     const char * max_parsed_pos = nullptr;
 
+    bool enable_highlighting = false;
     std::set<HighlightedRange> highlights;
 
     /// 'description' should be statically allocated string.

From 20db642e7194cd985f9b7340c9cebf01fd0f0cff Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 12 Apr 2024 00:46:55 +0200
Subject: [PATCH 77/90] Fix a test

---
 .../0_stateless/01676_clickhouse_client_autocomplete.python     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
index 13160d4e561c..c62d2e9d98a2 100644
--- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
+++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python
@@ -128,6 +128,7 @@ if __name__ == "__main__":
     clickhouse_client = os.environ["CLICKHOUSE_CLIENT"]
     args = shlex.split(clickhouse_client)
     args.append("--wait_for_suggestions_to_load")
+    args.append("--highlight=0")
     [
         run_with_timeout(
             test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS
@@ -139,6 +140,7 @@ if __name__ == "__main__":
     clickhouse_local = os.environ["CLICKHOUSE_LOCAL"]
     args = shlex.split(clickhouse_local)
     args.append("--wait_for_suggestions_to_load")
+    args.append("--highlight=0")
     [
         run_with_timeout(
             test_completion, [args[0], args, comp_word], COMPLETION_TIMEOUT_SECONDS

From d903e189c43380e6e1956b9787d9dd2f3189c9a1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 12 Apr 2024 01:01:04 +0200
Subject: [PATCH 78/90] Update docker/test/stateless/run.sh

---
 docker/test/stateless/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 898d82655004..9497b7ecc197 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -16,7 +16,7 @@ ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
 
 dpkg -i package_folder/clickhouse-common-static_*.deb
 dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
-# Accept failure in the next 2 commands until 23.4 is released (for compatibility and Bugfix validation run)
+# Accept failure in the next two commands until 24.4 is released (for compatibility and Bugfix validation run)
 dpkg -i package_folder/clickhouse-odbc-bridge_*.deb || true
 dpkg -i package_folder/clickhouse-library-bridge_*.deb || true
 dpkg -i package_folder/clickhouse-server_*.deb

From 13283abce6e1c25e16bcc684ad3ffee9cd419251 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Fri, 12 Apr 2024 03:47:29 +0200
Subject: [PATCH 79/90] Apply review comments

---
 src/Coordination/KeeperAsynchronousMetrics.cpp | 2 +-
 src/Coordination/KeeperAsynchronousMetrics.h   | 2 +-
 src/Interpreters/ServerAsynchronousMetrics.cpp | 4 ++--
 src/Interpreters/ServerAsynchronousMetrics.h   | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp
index a5b4bc4af97c..86166ffe31bd 100644
--- a/src/Coordination/KeeperAsynchronousMetrics.cpp
+++ b/src/Coordination/KeeperAsynchronousMetrics.cpp
@@ -114,7 +114,7 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM
 }
 
 KeeperAsynchronousMetrics::KeeperAsynchronousMetrics(
-    ContextPtr context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_)
+    ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_)
     : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_), context(std::move(context_))
 {
 }
diff --git a/src/Coordination/KeeperAsynchronousMetrics.h b/src/Coordination/KeeperAsynchronousMetrics.h
index 33e8d6818d76..ec0e60cbb6ec 100644
--- a/src/Coordination/KeeperAsynchronousMetrics.h
+++ b/src/Coordination/KeeperAsynchronousMetrics.h
@@ -13,7 +13,7 @@ class KeeperAsynchronousMetrics : public AsynchronousMetrics
 {
 public:
     KeeperAsynchronousMetrics(
-        ContextPtr context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_);
+        ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_);
     ~KeeperAsynchronousMetrics() override;
 
 private:
diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp
index 7703a3521303..4f8b03a5eaa9 100644
--- a/src/Interpreters/ServerAsynchronousMetrics.cpp
+++ b/src/Interpreters/ServerAsynchronousMetrics.cpp
@@ -53,8 +53,8 @@ void calculateMaxAndSum(Max & max, Sum & sum, T x)
 
 ServerAsynchronousMetrics::ServerAsynchronousMetrics(
     ContextPtr global_context_,
-    int update_period_seconds,
-    int heavy_metrics_update_period_seconds,
+    unsigned update_period_seconds,
+    unsigned heavy_metrics_update_period_seconds,
     const ProtocolServerMetricsFunc & protocol_server_metrics_func_)
     : WithContext(global_context_)
     , AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_)
diff --git a/src/Interpreters/ServerAsynchronousMetrics.h b/src/Interpreters/ServerAsynchronousMetrics.h
index b0cf8efbfd7d..e3c83dc748e4 100644
--- a/src/Interpreters/ServerAsynchronousMetrics.h
+++ b/src/Interpreters/ServerAsynchronousMetrics.h
@@ -12,8 +12,8 @@ class ServerAsynchronousMetrics : WithContext, public AsynchronousMetrics
 public:
     ServerAsynchronousMetrics(
         ContextPtr global_context_,
-        int update_period_seconds,
-        int heavy_metrics_update_period_seconds,
+        unsigned update_period_seconds,
+        unsigned heavy_metrics_update_period_seconds,
         const ProtocolServerMetricsFunc & protocol_server_metrics_func_);
     ~ServerAsynchronousMetrics() override;
 

From 7cd3c86d6ed25f56f614fc95a20c430fb2e1f2bf Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 12 Apr 2024 12:36:12 +0200
Subject: [PATCH 80/90] Add missing leftXYZ and rightXYZ functions

---
 .../functions/string-functions.md             | 188 ++++++++++++++++++
 1 file changed, 188 insertions(+)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index d4df3e0479a8..68b139a892c0 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -102,6 +102,100 @@ Alias:
 - `CHAR_LENGTH`
 - `CHARACTER_LENGTH`
 
+## left
+
+Returns the substring of a string `s` which starts at the specified byte index `offset` from the left.
+
+**Syntax**
+
+``` sql
+left(s, offset)
+```
+
+**Parameters**
+
+- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
+
+**Returned value**
+
+- For positive `offset`: A substring of `s` with `offset` many bytes, starting from the left of the string.
+- For negative `offset`: A substring of `s` with `length(s) - |offset|` bytes, starting from the left of the string.
+- An empty string if `length` is 0.
+
+**Example**
+
+Query:
+
+```sql
+SELECT left('Hello', 3);
+```
+
+Result:
+
+```response
+Hel
+```
+
+Query:
+
+```sql
+SELECT left('Hello', -3);
+```
+
+Result:
+
+```response
+He
+```
+
+## leftUTF8
+
+Returns the substring of a UTF-8 encoded string `s` which starts at the specified byte index `offset` from the left.
+
+**Syntax**
+
+``` sql
+leftUTF8(s, offset)
+```
+
+**Parameters**
+
+- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
+
+**Returned value**
+
+- For positive `offset`: A substring of `s` with `offset` many bytes, starting from the left of the string.
+- For negative `offset`: A substring of `s` with `length(s) - |offset|` bytes, starting from the left of the string.
+- An empty string if `length` is 0.
+
+**Example**
+
+Query:
+
+```sql
+SELECT leftUTF8('Привет', 4);
+```
+
+Result:
+
+```response
+Прив
+```
+
+Query:
+
+```sql
+SELECT leftUTF8('Привет', -4);
+```
+
+Result:
+
+```response
+Пр
+```
+
 ## leftPad
 
 Pads a string from the left with spaces or with a specified string (multiple times, if needed) until the resulting string reaches the specified `length`.
@@ -176,6 +270,100 @@ Result:
 └─────────────────────────────┴────────────────────────┘
 ```
 
+## right
+
+Returns the substring of a string `s` which starts at the specified byte index `offset` from the right.
+
+**Syntax**
+
+``` sql
+right(s, offset)
+```
+
+**Parameters**
+
+- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
+
+**Returned value**
+
+- For positive `offset`: A substring of `s` with `offset` many bytes, starting from the right of the string.
+- For negative `offset`: A substring of `s` with `length(s) - |offset|` bytes, starting from the right of the string.
+- An empty string if `length` is 0.
+
+**Example**
+
+Query:
+
+```sql
+SELECT right('Hello', 3);
+```
+
+Result:
+
+```response
+llo
+```
+
+Query:
+
+```sql
+SELECT right('Hello', -3);
+```
+
+Result:
+
+```response
+lo
+```
+
+## rightUTF8
+
+Returns the substring of a UTF-8 encoded string `s` which starts at the specified byte index `offset` from the right.
+
+**Syntax**
+
+``` sql
+rightUTF8(s, offset)
+```
+
+**Parameters**
+
+- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
+
+**Returned value**
+
+- For positive `offset`: A substring of `s` with `offset` many bytes, starting from the right of the string.
+- For negative `offset`: A substring of `s` with `length(s) - |offset|` bytes, starting from the right of the string.
+- An empty string if `length` is 0.
+
+**Example**
+
+Query:
+
+```sql
+SELECT rightUTF8('Привет', 4);
+```
+
+Result:
+
+```response
+ивет
+```
+
+Query:
+
+```sql
+SELECT rightUTF8('Привет', -4);
+```
+
+Result:
+
+```response
+ет
+```
+
 ## rightPad
 
 Pads a string from the right with spaces or with a specified string (multiple times, if needed) until the resulting string reaches the specified `length`.

From b2a83d54f328f7c81405bfd67275af378fa5a680 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 12 Apr 2024 12:41:45 +0200
Subject: [PATCH 81/90] Make descriptions less ambiguous

---
 docs/en/sql-reference/functions/string-functions.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index 68b139a892c0..92f0889563ba 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -104,7 +104,7 @@ Alias:
 
 ## left
 
-Returns the substring of a string `s` which starts at the specified byte index `offset` from the left.
+Returns a substring of string `s` with a specified `offset` starting from the left.
 
 **Syntax**
 
@@ -151,7 +151,7 @@ He
 
 ## leftUTF8
 
-Returns the substring of a UTF-8 encoded string `s` which starts at the specified byte index `offset` from the left.
+Returns a substring of a UTF-8 encoded string `s` with a specified `offset` starting from the left.
 
 **Syntax**
 
@@ -272,7 +272,7 @@ Result:
 
 ## right
 
-Returns the substring of a string `s` which starts at the specified byte index `offset` from the right.
+Returns a substring of string `s` with a specified `offset` starting from the right.
 
 **Syntax**
 
@@ -319,7 +319,7 @@ lo
 
 ## rightUTF8
 
-Returns the substring of a UTF-8 encoded string `s` which starts at the specified byte index `offset` from the right.
+Returns a substring of UTF-8 encoded string `s` with a specified `offset` starting from the right.
 
 **Syntax**
 

From 0d2e0e3131b9d97f03a13d9e7bb2f7208262d410 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 12 Apr 2024 11:11:49 +0000
Subject: [PATCH 82/90] CI: MQ sync status check fix

---
 tests/ci/sync_pr.py | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py
index acff7ba541bb..7240a07fb6e3 100644
--- a/tests/ci/sync_pr.py
+++ b/tests/ci/sync_pr.py
@@ -4,6 +4,7 @@
 
 import argparse
 import sys
+import time
 
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
@@ -53,12 +54,36 @@ def merge_sync_pr(gh, sync_pr):
 
 
 def set_sync_status(gh, pr_info, sync_pr):
-    if not sync_pr or not sync_pr.mergeable:
+    if not sync_pr:
         post_commit_status(
-            get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR failure", "A Sync"
+            get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR not found", "A Sync"
         )
-    else:
+        return
+
+    retries = 0
+    while sync_pr.mergeable_state == "unknown" and retries < 3:
+        retries += 1
+        print(f"Unknown status. Trying to fetch again [{retries}/3]")
+        time.sleep(5)
+        sync_pr = gh.get_pulls_from_search(
+            query=f"head:sync-upstream/pr/{sync_pr.number} org:ClickHouse type:pr",
+            repo="ClickHouse/clickhouse-private",
+        )
+
+    if sync_pr.mergeable_state == "clean":
+        print(f"Sync PR [{sync_pr.number}] is clean")
         post_commit_status(get_commit(gh, pr_info.sha), SUCCESS, "", "", "A Sync")
+    else:
+        print(
+            f"Sync PR [{sync_pr}] is not mergeable, state [{sync_pr.mergeable_state}]"
+        )
+        post_commit_status(
+            get_commit(gh, pr_info.sha),
+            FAILURE,
+            "",
+            f"state: {sync_pr.mergeable_state}",
+            "A Sync",
+        )
 
 
 def main():

From 6fff5723b78b972687a5c64c10f7658617317a9c Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:31:34 +0200
Subject: [PATCH 83/90] Add leftUTF and rightUTF to aspell-dict

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 697f93f78c9b..5d1d2b650fc8 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1833,6 +1833,7 @@ laravel
 largestTriangleThreeBuckets
 latencies
 ldap
+leftUTF
 leftPad
 leftPadUTF
 lemmatization
@@ -2306,6 +2307,7 @@ retriable
 reverseUTF
 rightPad
 rightPadUTF
+rightUTF
 risc
 riscv
 ro

From 3f10530c2871c1b8a8d2ee9001c76560f93e92ac Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 12 Apr 2024 11:23:43 +0000
Subject: [PATCH 84/90] remove A sync check from MQ for a while

---
 tests/ci/sync_pr.py | 43 ++++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py
index 7240a07fb6e3..0e1ab2994b9a 100644
--- a/tests/ci/sync_pr.py
+++ b/tests/ci/sync_pr.py
@@ -4,13 +4,12 @@
 
 import argparse
 import sys
-import time
 
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from github_helper import GitHub
 from commit_status_helper import get_commit, post_commit_status
-from report import FAILURE, SUCCESS
+from report import SUCCESS
 
 
 def parse_args() -> argparse.Namespace:
@@ -54,21 +53,23 @@ def merge_sync_pr(gh, sync_pr):
 
 
 def set_sync_status(gh, pr_info, sync_pr):
+    # FIXME: uncomment posting red Sync status to prohibit merge in MQ if PR state fetching works good
     if not sync_pr:
-        post_commit_status(
-            get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR not found", "A Sync"
-        )
+        # post_commit_status(
+        #     get_commit(gh, pr_info.sha), FAILURE, "", "Sync PR not found", "A Sync"
+        # )
         return
 
-    retries = 0
-    while sync_pr.mergeable_state == "unknown" and retries < 3:
-        retries += 1
-        print(f"Unknown status. Trying to fetch again [{retries}/3]")
-        time.sleep(5)
-        sync_pr = gh.get_pulls_from_search(
-            query=f"head:sync-upstream/pr/{sync_pr.number} org:ClickHouse type:pr",
-            repo="ClickHouse/clickhouse-private",
-        )
+    # FIXME: fetch sync pr in a proper way
+    # retries = 0
+    # while sync_pr.mergeable_state == "unknown" and retries < 3:
+    #     retries += 1
+    #     print(f"Unknown status. Trying to fetch again [{retries}/3]")
+    #     time.sleep(5)
+    #     sync_pr = gh.get_pulls_from_search(
+    #         query=f"head:sync-upstream/pr/{sync_pr.number} org:ClickHouse type:pr",
+    #         repo="ClickHouse/clickhouse-private",
+    #     )
 
     if sync_pr.mergeable_state == "clean":
         print(f"Sync PR [{sync_pr.number}] is clean")
@@ -77,13 +78,13 @@ def set_sync_status(gh, pr_info, sync_pr):
         print(
             f"Sync PR [{sync_pr}] is not mergeable, state [{sync_pr.mergeable_state}]"
         )
-        post_commit_status(
-            get_commit(gh, pr_info.sha),
-            FAILURE,
-            "",
-            f"state: {sync_pr.mergeable_state}",
-            "A Sync",
-        )
+        # post_commit_status(
+        #     get_commit(gh, pr_info.sha),
+        #     FAILURE,
+        #     "",
+        #     f"state: {sync_pr.mergeable_state}",
+        #     "A Sync",
+        # )
 
 
 def main():

From 2d6c51578a092c1b97d95bdbf3805950f50b7234 Mon Sep 17 00:00:00 2001
From: peter279k <peter279k@gmail.com>
Date: Fri, 12 Apr 2024 19:47:38 +0800
Subject: [PATCH 85/90] Add truncate and trunc functions usage

---
 .../functions/rounding-functions.md           | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index 3ede66cf3166..74b68a038204 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -26,6 +26,37 @@ Returns the smallest round number that is greater than or equal to `x`. In every
 
 Returns the round number with largest absolute value that has an absolute value less than or equal to `x`‘s. In every other way, it is the same as the ’floor’ function (see above).
 
+**Syntax**
+
+```sql
+trunc(input, precision)
+```
+
+Alias: `truncate`.
+
+**Parameters**
+
+- `input`: A float type [Float](/docs/en/sql-reference/data-types/float.md).
+- `precision`: A decimal type [Decimal](/docs/en/sql-reference/data-types/decimal.md).
+
+**Returned value**
+
+- A [Float64](/docs/en/sql-reference/data-types/float.md) value.
+
+**Example**
+
+Query:
+
+```sql
+SELECT trunc(123.45, 1) as res;
+```
+
+```response
+┌───res─┐
+│ 123.4 │
+└───────┘
+```
+
 ## round(x\[, N\])
 
 Rounds a value to a specified number of decimal places.

From 85cdecb12328860aea3a3bd14013657354027a8f Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:57:42 +0200
Subject: [PATCH 86/90] Add a more illustrative example

---
 docs/en/sql-reference/functions/rounding-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index 74b68a038204..1ffe7807e4f4 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -48,7 +48,7 @@ Alias: `truncate`.
 Query:
 
 ```sql
-SELECT trunc(123.45, 1) as res;
+SELECT trunc(123.499, 1) as res;
 ```
 
 ```response

From 81f97921ce71892224ce2c181f075bf2701e0fd7 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 12 Apr 2024 14:17:08 +0200
Subject: [PATCH 87/90] Add requirement for ccache/sccache into dev docs

---
 docs/en/development/developer-instruction.md | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index 42c7e5ac2957..763485331790 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -83,11 +83,17 @@ ClickHouse uses CMake and Ninja for building.
 
 - Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks.
 
-To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build`.
+- ccache - a compiler cache. It speeds up recompilation by caching previous compilations and detecting when the same compilation is being done again.
 
-On CentOS, RedHat run `sudo yum install cmake ninja-build`.
+:::tip
+As an alternative for ccache a distributed [sccache](https://github.com/mozilla/sccache) could be used. The system will chose it with higher priority.
+:::
+
+To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build ccache`.
+
+On CentOS, RedHat run `sudo yum install cmake ninja-build ccache`.
 
-If you use Arch or Gentoo, you probably know it yourself how to install CMake.
+If you use Arch or Gentoo, you probably know it yourself how to install CMake and others.
 
 ## C++ Compiler {#c-compiler}
 

From e00e0ad65199db0534381aeb9c279b7091a684c4 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 12 Apr 2024 14:32:59 +0200
Subject: [PATCH 88/90] Fix a false statement

---
 docs/en/development/developer-instruction.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index 763485331790..6623c46fa9fa 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -86,7 +86,7 @@ ClickHouse uses CMake and Ninja for building.
 - ccache - a compiler cache. It speeds up recompilation by caching previous compilations and detecting when the same compilation is being done again.
 
 :::tip
-As an alternative for ccache a distributed [sccache](https://github.com/mozilla/sccache) could be used. The system will chose it with higher priority.
+As an alternative for ccache a distributed [sccache](https://github.com/mozilla/sccache) could be used. To prefer it, `-DCOMPILER_CACHE=sccache` CMake flag should be used.
 :::
 
 To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build ccache`.

From c10055eb788271fe0097fb5f1bd20dcd01c8442f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Fri, 12 Apr 2024 14:34:19 +0200
Subject: [PATCH 89/90] Add sccache to the spelling dictionary

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 9f7776f5201d..d191eb8b9ce1 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -2348,6 +2348,7 @@ rw
 sasl
 satisfiable
 scala
+sccache
 schemas
 seekable
 seektable

From e2ff2f8a1fd2a660208111b6ba83849b24fefc5d Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Sun, 14 Apr 2024 10:55:52 +0300
Subject: [PATCH 90/90] JOIN filter push down right stream filled crash fix

---
 .../Optimizations/filterPushDown.cpp          |  2 +-
 ...er_push_down_right_stream_filled.reference |  0
 ...n_filter_push_down_right_stream_filled.sql | 25 +++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.reference
 create mode 100644 tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.sql

diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
index ebf780bb692e..5eab5e8f4a46 100644
--- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
+++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
@@ -363,7 +363,7 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::
             JoinKind::Left);
     }
 
-    if (join_filter_push_down_actions.right_stream_filter_to_push_down)
+    if (join_filter_push_down_actions.right_stream_filter_to_push_down && allow_push_down_to_right)
     {
         updated_steps += addNewFilterStepOrThrow(parent_node,
             nodes,
diff --git a/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.reference b/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.reference
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.sql b/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.sql
new file mode 100644
index 000000000000..4ce7657e1487
--- /dev/null
+++ b/tests/queries/0_stateless/03095_join_filter_push_down_right_stream_filled.sql
@@ -0,0 +1,25 @@
+DROP TABLE IF EXISTS t1__fuzz_0;
+CREATE TABLE t1__fuzz_0
+(
+    `x` UInt8,
+    `str` String
+)
+ENGINE = MergeTree ORDER BY x;
+
+INSERT INTO t1__fuzz_0 SELECT number, toString(number) FROM numbers(10);
+
+DROP TABLE IF EXISTS left_join__fuzz_2;
+CREATE TABLE left_join__fuzz_2
+(
+    `x` UInt32,
+    `s` LowCardinality(String)
+) ENGINE = Join(`ALL`, LEFT, x);
+
+INSERT INTO left_join__fuzz_2 SELECT number, toString(number) FROM numbers(10);
+
+SELECT 14 FROM t1__fuzz_0 LEFT JOIN left_join__fuzz_2 USING (x)
+WHERE pointInPolygon(materialize((-inf, 1023)), [(5, 0.9998999834060669), (1.1920928955078125e-7, 100.0000991821289), (1.000100016593933, 100.0000991821289)])
+ORDER BY toNullable('202.79.32.10') DESC NULLS LAST, toNullable(toLowCardinality(toUInt256(14))) ASC, x DESC NULLS LAST;
+
+DROP TABLE t1__fuzz_0;
+DROP TABLE left_join__fuzz_2;